Skip to content

Commit 8cba605

Browse files
committed
added option to download to specified folder in download()
1 parent 8ab2d77 commit 8cba605

1 file changed

Lines changed: 48 additions & 17 deletions

File tree

coderdata/download/downloader.py

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,46 +11,71 @@ def download(
1111
exist_ok: bool=False
1212
):
1313
"""
14-
Download the most recent version of files from a Figshare dataset, filtered by a specific prefix or all files.
14+
Download the most recent version of files from a Figshare dataset,
15+
filtered by a specific prefix or all files.
1516
16-
This function queries the Figshare API to retrieve details of a dataset and then downloads files from it.
17-
Files can be filtered by a specified prefix such as hcmi, beataml, etc. If 'all', an empty string, or None is passed as the prefix,
18-
all files in the dataset are downloaded. The function identifies the most recent version of a file
19-
by selecting the one with the highest ID among duplicates with the same name.
17+
This function queries the Figshare API to retrieve details of a
18+
dataset and then downloads files from it. Files can be filtered by a
19+
specified prefix such as hcmi, beataml, etc. If 'all', an empty
20+
string, or None is passed as the prefix, all files in the dataset
21+
are downloaded. The function identifies the most recent version of a
22+
file by selecting the one with the highest ID among duplicates with
23+
the same name.
2024
2125
Parameters
2226
----------
2327
dataset_prefix : str, optional
24-
The prefix of the dataset to download (e.g., 'hcmi'). If 'all', an empty string, or None,
25-
all files in the dataset are downloaded. Default is None.
28+
The prefix of the dataset to download (e.g., 'hcmi'). If 'all',
29+
an empty string, or None, all files in the dataset are
30+
downloaded. Default is None.
2631
2732
Returns
2833
-------
2934
None
30-
The function downloads files to the local repository and does not return any value.
35+
The function downloads files to the local repository and does
36+
not return any value.
3137
"""
3238

39+
# Create Path object from `local_path`
40+
if type(local_path) != Path:
41+
local_path = Path(local_path)
42+
43+
if not local_path.exists():
44+
Path.mkdir(local_path)
3345
# Get the dataset details
3446
url = "https://api.figshare.com/v2/articles/25033697"
3547

3648
response = requests.get(url)
3749
if response.status_code != 200:
38-
raise Exception(f"Failed to get dataset details from Figshare: {response.text}")
50+
raise Exception(
51+
f"Failed to get dataset details from Figshare: {response.text}"
52+
)
3953

4054
data = response.json()
4155

56+
# making sure that we are case insensitive
57+
name = name.casefold()
58+
4259
# Filter files by the specified prefix
43-
if dataset_prefix and dataset_prefix.lower() != "all":
44-
filtered_files = [file for file in data['files'] if file['name'].startswith(dataset_prefix)]
60+
if name != "all":
61+
filtered_files = [
62+
file
63+
for file
64+
in data['files']
65+
if file['name'].startswith(name)
66+
]
4567
else:
4668
filtered_files = data['files']
4769

4870
# Group files by name and select the one with the highest ID
4971
unique_files = {}
5072
for file in filtered_files:
51-
file_name = file['name']
73+
file_name = local_path.joinpath(file['name'])
5274
file_id = file['id']
53-
if file_name not in unique_files or file_id > unique_files[file_name]['id']:
75+
if (
76+
file_name not in unique_files
77+
or file_id > unique_files[file_name]['id']
78+
):
5479
unique_files[file_name] = {'file_info': file, 'id': file_id}
5580

5681
for file_name, file_data in unique_files.items():
@@ -60,11 +85,17 @@ def download(
6085
# Download the file
6186
with requests.get(file_url, stream=True) as r:
6287
r.raise_for_status()
63-
with open(file_name, 'wb') as f:
64-
for chunk in r.iter_content(chunk_size=8192):
65-
f.write(chunk)
88+
if file_name.exists() and not exist_ok:
89+
os.warn(
90+
f"{file_name} already exists. Use argument 'exist_ok=True'"
91+
"to overwrite existing file."
92+
)
93+
else:
94+
with open(file_name, 'wb') as f:
95+
for chunk in r.iter_content(chunk_size=8192):
96+
f.write(chunk)
6697

67-
print(f"Downloaded {file_name} to local repository.")
98+
print(f"Downloaded '{file_url}' to '{file_name}'")
6899

69100
return
70101

0 commit comments

Comments
 (0)