Skip to content

Commit 1623c6a

Browse files
committed
create cc licenses
1 parent b11661b commit 1623c6a

1 file changed

Lines changed: 11 additions & 12 deletions

File tree

scripts/1-fetch/openverse_fetch.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,9 @@ def get_all_sources_and_licenses(session, media_type):
8989
"""
9090
Fetch all available sources for a given media_type.
9191
"""
92-
LOGGER.info(f"Fetching all sources and licenses for {media_type}")
92+
LOGGER.info(f"Fetching all sources for {media_type}")
9393
url = f"{OPENVERSE_BASE_URL}/{media_type}/stats/?format=json"
94-
# encoded_nc_sampling = urllib.parse.quote("nc-sampling+", safe="")
95-
# encoded_sampling = urllib.parse.quote("sampling+", safe="")
94+
# Standard /stats/ license
9695
licenses = [
9796
"by",
9897
"by-nc",
@@ -117,8 +116,8 @@ def get_all_sources_and_licenses(session, media_type):
117116
]
118117
)
119118
"""
120-
To ensure the sources in /stats/ endpoints are indexed in
121-
Openverse's catalog.
119+
To ensure the sources in /stats/ endpoints are truly
120+
indexed in Openverse's catalog.
122121
"""
123122
valid_sources = set()
124123
for source in raw_sources:
@@ -130,8 +129,8 @@ def get_all_sources_and_licenses(session, media_type):
130129
valid_sources.add(source)
131130
else:
132131
LOGGER.info(
133-
f"Skipping source {source}:"
134-
f" not available in /{media_type}/ endpoint"
132+
f"Skipping source {source}: "
133+
f"not available in /{media_type}/ endpoint"
135134
)
136135
LOGGER.info(f"Found {len(valid_sources)} sources for {media_type}")
137136
return valid_sources, set(licenses)
@@ -144,9 +143,8 @@ def get_all_sources_and_licenses(session, media_type):
144143

145144
def query_openverse(session):
146145
"""
147-
Fetch available sources given the
148-
media_type and use standard list
149-
of Openverse's standard licenses.
146+
Fetch available sources given the media_type and use
147+
standard list of Openverse's standard licenses.
150148
"""
151149
tally = {}
152150
for media_type in MEDIA_TYPES:
@@ -186,7 +184,9 @@ def query_openverse(session):
186184
{
187185
OPENVERSE_FIELDS[0].lower(): field[0], # SOURCE
188186
OPENVERSE_FIELDS[1].lower(): field[1], # MEDIA_TYPE
189-
OPENVERSE_FIELDS[2].lower(): field[2], # LICENSE
187+
OPENVERSE_FIELDS[2].lower(): (
188+
f"{'cc ' + field[2] if field[2] not in ['pdm', 'cc0'] else field[2]}" # noqa: E501
189+
), # LICENSE
190190
OPENVERSE_FIELDS[3].lower(): count, # MEDIA_COUNT
191191
}
192192
for field, count in tally.items()
@@ -214,7 +214,6 @@ def main():
214214
session = get_requests_session()
215215
LOGGER.info("Starting Openverse Fetch Script...")
216216
records = query_openverse(session)
217-
LOGGER.info(f"CHECKING: {records[0]}")
218217
write_data(args, records)
219218
LOGGER.info(f"Fetched {len(records)} unique Openverse records")
220219

0 commit comments

Comments
 (0)