@@ -81,42 +81,73 @@ def get_requests_session():
8181 return session
8282
8383
84+ def get_all_sources_and_licenses (session , media_type ):
85+ LOGGER .info ("Fetching all sources and licenses" )
86+ sources = set ()
87+ licenses = set ()
88+ url = f"{ OPENVERSE_BASE_URL } /{ media_type } /?format=json"
89+ try :
90+ response = session .get (url )
91+ response .raise_for_status ()
92+ records = response .json ().get ("results" , [])
93+ for record in records :
94+ sources .add (record .get ("source" , "" ))
95+ licenses .add (record .get ("license" , "" ))
96+ return list (sources ), list (licenses )
97+ except requests .HTTPError as e :
98+ LOGGER .error (f"Failed to fetch sources and licenses: { e } " )
99+ raise shared .QuantifyingException (
100+ f"Failed to fetch sources and licenses: { e } "
101+ )
102+
103+
84104def query_openverse (session ):
85105 """
86106 Fetch records from Openverse API.
87107 """
88108 tally = {}
89109 for media_type in MEDIA_TYPES :
90110 LOGGER .info (f"Fetching { media_type } data..." )
91- url = f"{ OPENVERSE_BASE_URL } /{ media_type } /?page_size={ PAGE_SIZE } "
92- try :
93- response = session .get (url )
94- if response .status_code == 401 :
95- raise shared .QuantifyingException (
96- f"Unauthorized(401): Check API key for { media_type } ." ,
97- exit_code = 1 ,
98- )
99- response .raise_for_status ()
100- data = response .json ()
101- records = data .get ("results" , [])
102- for record in records :
103- key = (
104- record .get (OPENVERSE_FIELDS [0 ], "" ), # source
105- media_type ,
106- record .get (OPENVERSE_FIELDS [2 ], "" ), # license
107- record .get (OPENVERSE_FIELDS [3 ], "" ), # license version
111+ sources , licenses = get_all_sources_and_licenses (session , media_type )
112+ for source in sources :
113+ for license in licenses :
114+ url = (
115+ f"{ OPENVERSE_BASE_URL } /{ media_type } /?"
116+ f"source={ source } &license={ license } "
117+ "&format=json"
108118 )
109- tally [key ] = tally .get (key , 0 ) + 1
110- except requests .RequestException as e :
111- LOGGER .error (f"Openverse fetch failed: { e } " )
112- raise shared .QuantifyingException (f"Openverse fetch failed: { e } " )
119+ LOGGER .info (f"GETTING FOR: { url } " )
120+ try :
121+ response = session .get (url )
122+ if response .status_code == 401 :
123+ raise shared .QuantifyingException (
124+ "Unauthorized(401): Check API key for"
125+ f" { media_type } ." ,
126+ exit_code = 1 ,
127+ )
128+ response .raise_for_status ()
129+ data = response .json ()
130+ count = data .get ("result_count" , 0 )
131+ records = data .get ("results" , [])
132+ for record in records :
133+ key = (
134+ record .get (OPENVERSE_FIELDS [0 ], "" ), # source
135+ media_type ,
136+ record .get (OPENVERSE_FIELDS [2 ], "" ), # license
137+ )
138+ tally [key ] = count
139+ except requests .RequestException as e :
140+ LOGGER .error (f"Openverse fetch failed: { e } " )
141+ raise shared .QuantifyingException (
142+ f"Openverse fetch failed: { e } "
143+ )
113144 # Convert tally dictionary to a list of dicts for writing
145+ LOGGER .info ("Aggrgating the data" )
114146 aggregate = [
115147 {
116148 OPENVERSE_FIELDS [0 ]: field [0 ], # source
117149 "media_type" : field [1 ],
118150 OPENVERSE_FIELDS [2 ]: field [2 ], # license
119- OPENVERSE_FIELDS [3 ]: field [3 ], # license version
120151 "media_count" : count ,
121152 }
122153 for field , count in tally .items ()
0 commit comments