11#!/usr/bin/env python
22"""
33Fetch CC Legal Tool usage from Openverse API.
4+
5+ Note:
6+ Because anonymous Openverse API access
7+ returns a maximum of ~240 result count
8+ per source-license combination, this
9+ script currently provides approximate counts.
10+ It does not include vide pagination or license_version
11+ breakdown.
412"""
513
614# Standard library
3442# Constants
3543FILE_PATH = os .path .join (PATHS ["data_phase" ], "openverse_fetch.csv" )
3644OPENVERSE_FIELDS = [
37- "source " ,
38- "media_type " ,
39- "CC_TOOL_IDENTIFIER " ,
40- "media_count " ,
45+ "SOURCE " ,
46+ "MEDIA_TYPE " ,
47+ "LICENSE " ,
48+ "MEDIA_COUNT " ,
4149]
4250OPENVERSE_BASE_URL = "https://api.openverse.org/v1"
4351MEDIA_TYPES = ["audio" , "images" ]
@@ -80,7 +88,10 @@ def get_requests_session():
8088
8189
8290def get_all_sources_and_licenses (session , media_type ):
83- LOGGER .info ("Fetching all sources and licenses" )
91+ """
92+ Fetch all available sources and licenses for a given media_type.
93+ """
94+ LOGGER .info (f"Fetching all sources and licenses for { media_type } " )
8495 sources = set ()
8596 licenses = set ()
8697 url = f"{ OPENVERSE_BASE_URL } /{ media_type } /?format=json"
@@ -112,7 +123,7 @@ def query_openverse(session):
112123 url = (
113124 f"{ OPENVERSE_BASE_URL } /{ media_type } /?"
114125 f"source={ source } &license={ license } "
115- "&format=json"
126+ "&format=json&page=1 "
116127 )
117128 LOGGER .info (f"Target URL: { url } " )
118129 try :
@@ -126,17 +137,8 @@ def query_openverse(session):
126137 response .raise_for_status ()
127138 data = response .json ()
128139 count = data .get ("result_count" , 0 )
129- records = data .get ("results" , [])
130- for record in records :
131- key = (
132- record .get (OPENVERSE_FIELDS [0 ], "" ), # source
133- media_type ,
134- record .get ("license" , "" ), # license
135- record .get (
136- "license_version" , ""
137- ), # license version
138- )
139- tally [key ] = count
140+ key = (source , media_type , license )
141+ tally [key ] = count
140142 except (requests .HTTPError , requests .RequestException ) as e :
141143 LOGGER .error (f"Openverse fetch failed: { e } " )
142144 raise shared .QuantifyingException (
@@ -146,14 +148,10 @@ def query_openverse(session):
146148 LOGGER .info ("Aggregating the data" )
147149 aggregate = [
148150 {
149- OPENVERSE_FIELDS [0 ]: field [0 ], # source
150- "media_type" : field [1 ],
151- # CC_TOOL_IDENTIFIER = f"CC {license.upper()} {license_version}"
152- OPENVERSE_FIELDS [2 ]: (
153- f"{ 'CC ' + field [2 ].upper () if field [2 ] not in ['cc0' , 'pdm' ] else field [2 ].upper ()} " # noqa: E501
154- f" { field [3 ]} "
155- ),
156- OPENVERSE_FIELDS [3 ]: count , # media_count
151+ OPENVERSE_FIELDS [0 ].lower (): field [0 ], # SOURCE
152+ OPENVERSE_FIELDS [1 ].lower (): field [1 ], # MEDIA_TYPE
153+ OPENVERSE_FIELDS [2 ].lower (): field [2 ], # LICENSE
154+ OPENVERSE_FIELDS [3 ].lower (): count , # MEDIA_COUNT
157155 }
158156 for field , count in tally .items ()
159157 ]
@@ -167,7 +165,7 @@ def write_data(args, data):
167165 with open (FILE_PATH , "w" , newline = "" , encoding = "utf-8" ) as f :
168166 writer = csv .DictWriter (
169167 f ,
170- fieldnames = [ header . upper () for header in OPENVERSE_FIELDS ] ,
168+ fieldnames = OPENVERSE_FIELDS ,
171169 dialect = "unix" ,
172170 )
173171 writer .writeheader ()
0 commit comments