@@ -11,46 +11,71 @@ def download(
1111 exist_ok : bool = False
1212 ):
1313 """
14- Download the most recent version of files from a Figshare dataset, filtered by a specific prefix or all files.
14+ Download the most recent version of files from a Figshare dataset,
15+ filtered by a specific prefix or all files.
1516
16- This function queries the Figshare API to retrieve details of a dataset and then downloads files from it.
17- Files can be filtered by a specified prefix such as hcmi, beataml, etc. If 'all', an empty string, or None is passed as the prefix,
18- all files in the dataset are downloaded. The function identifies the most recent version of a file
19- by selecting the one with the highest ID among duplicates with the same name.
17+ This function queries the Figshare API to retrieve details of a
18+ dataset and then downloads files from it. Files can be filtered by a
19+ specified prefix such as hcmi, beataml, etc. If 'all', an empty
20+ string, or None is passed as the prefix, all files in the dataset
21+ are downloaded. The function identifies the most recent version of a
22+ file by selecting the one with the highest ID among duplicates with
23+ the same name.
2024
2125 Parameters
2226 ----------
2327 dataset_prefix : str, optional
24- The prefix of the dataset to download (e.g., 'hcmi'). If 'all', an empty string, or None,
25- all files in the dataset are downloaded. Default is None.
28+ The prefix of the dataset to download (e.g., 'hcmi'). If 'all',
29+ an empty string, or None, all files in the dataset are
30+ downloaded. Default is None.
2631
2732 Returns
2833 -------
2934 None
30- The function downloads files to the local repository and does not return any value.
35+ The function downloads files to the local repository and does
36+ not return any value.
3137 """
3238
39+ # Create Path object from `local_path`
40+ if type (local_path ) != Path :
41+ local_path = Path (local_path )
42+
43+ if not local_path .exists ():
44+ Path .mkdir (local_path )
3345 # Get the dataset details
3446 url = "https://api.figshare.com/v2/articles/25033697"
3547
3648 response = requests .get (url )
3749 if response .status_code != 200 :
38- raise Exception (f"Failed to get dataset details from Figshare: { response .text } " )
50+ raise Exception (
51+ f"Failed to get dataset details from Figshare: { response .text } "
52+ )
3953
4054 data = response .json ()
4155
56+ # making sure that we are case insensitive
57+ name = name .casefold ()
58+
4259 # Filter files by the specified prefix
43- if dataset_prefix and dataset_prefix .lower () != "all" :
44- filtered_files = [file for file in data ['files' ] if file ['name' ].startswith (dataset_prefix )]
60+ if name != "all" :
61+ filtered_files = [
62+ file
63+ for file
64+ in data ['files' ]
65+ if file ['name' ].startswith (name )
66+ ]
4567 else :
4668 filtered_files = data ['files' ]
4769
4870 # Group files by name and select the one with the highest ID
4971 unique_files = {}
5072 for file in filtered_files :
51- file_name = file ['name' ]
73+ file_name = local_path . joinpath ( file ['name' ])
5274 file_id = file ['id' ]
53- if file_name not in unique_files or file_id > unique_files [file_name ]['id' ]:
75+ if (
76+ file_name not in unique_files
77+ or file_id > unique_files [file_name ]['id' ]
78+ ):
5479 unique_files [file_name ] = {'file_info' : file , 'id' : file_id }
5580
5681 for file_name , file_data in unique_files .items ():
@@ -60,11 +85,17 @@ def download(
6085 # Download the file
6186 with requests .get (file_url , stream = True ) as r :
6287 r .raise_for_status ()
63- with open (file_name , 'wb' ) as f :
64- for chunk in r .iter_content (chunk_size = 8192 ):
65- f .write (chunk )
88+ if file_name .exists () and not exist_ok :
89+ os .warn (
90+ f"{ file_name } already exists. Use argument 'exist_ok=True'"
91+ "to overwrite existing file."
92+ )
93+ else :
94+ with open (file_name , 'wb' ) as f :
95+ for chunk in r .iter_content (chunk_size = 8192 ):
96+ f .write (chunk )
6697
67- print (f"Downloaded { file_name } to local repository. " )
98+ print (f"Downloaded ' { file_url } ' to ' { file_name } ' " )
6899
69100 return
70101
0 commit comments