|
1 | 1 | # coderdata/download/downloader.py |
2 | 2 |
|
3 | 3 | from importlib import resources |
| 4 | +from hashlib import md5 |
4 | 5 | from pathlib import Path |
5 | 6 | from os import PathLike |
6 | 7 | import os |
@@ -87,21 +88,38 @@ def download( |
87 | 88 | for file_name, file_data in unique_files.items(): |
88 | 89 | file_info = file_data['file_info'] |
89 | 90 | file_url = file_info['download_url'] |
90 | | - |
| 91 | + file_md5sum = file_info['supplied_md5'] |
| 92 | + retry_count = 10 |
91 | 93 | # Download the file |
92 | | - with requests.get(file_url, stream=True) as r: |
93 | | - r.raise_for_status() |
94 | | - if file_name.exists() and not exist_ok: |
| 94 | + while retry_count > 0: |
| 95 | + with requests.get(file_url, stream=True) as r: |
| 96 | + r.raise_for_status() |
| 97 | + if file_name.exists() and not exist_ok: |
| 98 | + warnings.warn( |
| 99 | + f"{file_name} already exists. Use argument 'exist_ok=True'" |
| 100 | + "to overwrite existing file." |
| 101 | + ) |
| 102 | + else: |
| 103 | + with open(file_name, 'wb') as f: |
| 104 | + for chunk in r.iter_content(chunk_size=8192): |
| 105 | + f.write(chunk) |
| 106 | + with open(file_name, 'rb') as f: |
| 107 | + check_md5sum = md5(f.read()).hexdigest() |
| 108 | + if file_md5sum == check_md5sum: |
| 109 | + break |
| 110 | + elif retry_count > 0: |
95 | 111 | warnings.warn( |
96 | | - f"{file_name} already exists. Use argument 'exist_ok=True'" |
97 | | - "to overwrite existing file." |
98 | | - ) |
99 | | - else: |
100 | | - with open(file_name, 'wb') as f: |
101 | | - for chunk in r.iter_content(chunk_size=8192): |
102 | | - f.write(chunk) |
103 | | - |
104 | | - print(f"Downloaded '{file_url}' to '{file_name}'") |
| 112 | + f"{file_name} could not be downloaded successfully. " |
| 113 | + f"(expected md5sum: {file_md5sum} - " |
| 114 | + f"calculated md5sum: {check_md5sum})... retrying..." |
| 115 | + ) |
| 116 | + retry_count = retry_count - 1 |
| 117 | + if retry_count == 0: |
| 118 | + warnings.warn( |
| 119 | + f"{file_name} could not be downloaded. Try again." |
| 120 | + ) |
| 121 | + else: |
| 122 | + print(f"Downloaded '{file_url}' to '{file_name}'") |
105 | 123 |
|
106 | 124 | return |
107 | 125 |
|
0 commit comments