Skip to content

Commit 929644d

Browse files
authored
Merge pull request #406 from PNNL-CompBio/391-coderdata-package-needs-checksum-for-download-function
implemented md5sum check for downloaded files
2 parents 93a1d58 + 004fb7b commit 929644d

1 file changed

Lines changed: 31 additions & 13 deletions

File tree

coderdata/download/downloader.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# coderdata/download/downloader.py
22

33
from importlib import resources
4+
from hashlib import md5
45
from pathlib import Path
56
from os import PathLike
67
import os
@@ -87,21 +88,38 @@ def download(
8788
for file_name, file_data in unique_files.items():
8889
file_info = file_data['file_info']
8990
file_url = file_info['download_url']
90-
91+
file_md5sum = file_info['supplied_md5']
92+
retry_count = 10
9193
# Download the file
92-
with requests.get(file_url, stream=True) as r:
93-
r.raise_for_status()
94-
if file_name.exists() and not exist_ok:
94+
while retry_count > 0:
95+
with requests.get(file_url, stream=True) as r:
96+
r.raise_for_status()
97+
if file_name.exists() and not exist_ok:
98+
warnings.warn(
99+
f"{file_name} already exists. Use argument 'exist_ok=True'"
100+
"to overwrite existing file."
101+
)
102+
else:
103+
with open(file_name, 'wb') as f:
104+
for chunk in r.iter_content(chunk_size=8192):
105+
f.write(chunk)
106+
with open(file_name, 'rb') as f:
107+
check_md5sum = md5(f.read()).hexdigest()
108+
if file_md5sum == check_md5sum:
109+
break
110+
elif retry_count > 0:
95111
warnings.warn(
96-
f"{file_name} already exists. Use argument 'exist_ok=True'"
97-
"to overwrite existing file."
98-
)
99-
else:
100-
with open(file_name, 'wb') as f:
101-
for chunk in r.iter_content(chunk_size=8192):
102-
f.write(chunk)
103-
104-
print(f"Downloaded '{file_url}' to '{file_name}'")
112+
f"{file_name} could not be downloaded successfully. "
113+
f"(expected md5sum: {file_md5sum} - "
114+
f"calculated md5sum: {check_md5sum})... retrying..."
115+
)
116+
retry_count = retry_count - 1
117+
if retry_count == 0:
118+
warnings.warn(
119+
f"{file_name} could not be downloaded. Try again."
120+
)
121+
else:
122+
print(f"Downloaded '{file_url}' to '{file_name}'")
105123

106124
return
107125

0 commit comments

Comments
 (0)