Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/populace-dataset-repo-support.added.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support dataset-type Hugging Face repos in dataset materialization (retry with repo_type=dataset before surfacing the original failure).
24 changes: 19 additions & 5 deletions src/policyengine/provenance/dataset_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,24 @@ def materialize_dataset_source(
)

reference = parse_hf_uri(dataset_source)
return download_huggingface_dataset(
reference.repo_id,
reference.path,
version=_select_version(reference.version, version),
)
try:
return download_huggingface_dataset(
reference.repo_id,
reference.path,
version=_select_version(reference.version, version),
)
except Exception:
# The core helper assumes a model-type repo; certified data
# releases may live in dataset-type repos (e.g.
# policyengine/populace-us). Retry with the dataset repo type
# before surfacing the original failure.
from huggingface_hub import hf_hub_download

return hf_hub_download(
repo_id=reference.repo_id,
repo_type="dataset",
filename=reference.path,
revision=_select_version(reference.version, version),
)

return dataset_source