Skip to content

Commit b48e80f

Browse files
authored
common : update download code (ggml-org#19573)
* common : remove legacy .json to .etag migration code Signed-off-by: Adrien Gallouët <angt@huggingface.co> * common : simplify common_download_file_single_online This commit also force a redownload if the file exists but has no .etag file. Signed-off-by: Adrien Gallouët <angt@huggingface.co> --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co>
1 parent 752584d commit b48e80f

1 file changed

Lines changed: 67 additions & 100 deletions

File tree

common/download.cpp

Lines changed: 67 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -114,44 +114,18 @@ static void write_etag(const std::string & path, const std::string & etag) {
114114
}
115115

116116
static std::string read_etag(const std::string & path) {
117-
std::string none;
118117
const std::string etag_path = path + ".etag";
119-
120-
if (std::filesystem::exists(etag_path)) {
121-
std::ifstream etag_in(etag_path);
122-
if (!etag_in) {
123-
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
124-
return none;
125-
}
126-
std::string etag;
127-
std::getline(etag_in, etag);
128-
return etag;
118+
if (!std::filesystem::exists(etag_path)) {
119+
return {};
129120
}
130-
131-
// no etag file, but maybe there is an old .json
132-
// remove this code later
133-
const std::string metadata_path = path + ".json";
134-
135-
if (std::filesystem::exists(metadata_path)) {
136-
std::ifstream metadata_in(metadata_path);
137-
try {
138-
nlohmann::json metadata_json;
139-
metadata_in >> metadata_json;
140-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
141-
metadata_json.dump().c_str());
142-
if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) {
143-
std::string etag = metadata_json.at("etag");
144-
write_etag(path, etag);
145-
if (!std::filesystem::remove(metadata_path)) {
146-
LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str());
147-
}
148-
return etag;
149-
}
150-
} catch (const nlohmann::json::exception & e) {
151-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
152-
}
121+
std::ifstream etag_in(etag_path);
122+
if (!etag_in) {
123+
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
124+
return {};
153125
}
154-
return none;
126+
std::string etag;
127+
std::getline(etag_in, etag);
128+
return etag;
155129
}
156130

157131
static bool is_http_status_ok(int status) {
@@ -347,95 +321,88 @@ static int common_download_file_single_online(const std::string & url,
347321
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
348322
}
349323

350-
for (int i = 0; i < max_attempts; ++i) {
351-
auto head = cli.Head(parts.path);
352-
bool head_ok = head && head->status >= 200 && head->status < 300;
353-
if (!head_ok) {
354-
LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
355-
if (file_exists) {
356-
LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
357-
return 304; // 304 Not Modified - fake cached response
358-
}
359-
return head->status; // cannot use cached file, return raw status code
360-
// TODO: maybe retry only on certain codes
324+
auto head = cli.Head(parts.path);
325+
if (!head || head->status < 200 || head->status >= 300) {
326+
LOG_WRN("%s: HEAD failed, status: %d\n", __func__, head ? head->status : -1);
327+
if (file_exists) {
328+
LOG_INF("%s: using cached file (HEAD failed): %s\n", __func__, path.c_str());
329+
return 304; // 304 Not Modified - fake cached response
361330
}
331+
return head ? head->status : -1;
332+
}
362333

363-
std::string etag;
364-
if (head_ok && head->has_header("ETag")) {
365-
etag = head->get_header_value("ETag");
366-
}
334+
std::string etag;
335+
if (head->has_header("ETag")) {
336+
etag = head->get_header_value("ETag");
337+
}
367338

368-
size_t total_size = 0;
369-
if (head_ok && head->has_header("Content-Length")) {
370-
try {
371-
total_size = std::stoull(head->get_header_value("Content-Length"));
372-
} catch (const std::exception& e) {
373-
LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what());
374-
}
339+
size_t total_size = 0;
340+
if (head->has_header("Content-Length")) {
341+
try {
342+
total_size = std::stoull(head->get_header_value("Content-Length"));
343+
} catch (const std::exception& e) {
344+
LOG_WRN("%s: invalid Content-Length in HEAD response: %s\n", __func__, e.what());
375345
}
346+
}
376347

377-
bool supports_ranges = false;
378-
if (head_ok && head->has_header("Accept-Ranges")) {
379-
supports_ranges = head->get_header_value("Accept-Ranges") != "none";
380-
}
348+
bool supports_ranges = false;
349+
if (head->has_header("Accept-Ranges")) {
350+
supports_ranges = head->get_header_value("Accept-Ranges") != "none";
351+
}
381352

382-
bool should_download_from_scratch = false;
383-
if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
384-
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
385-
last_etag.c_str(), etag.c_str());
386-
should_download_from_scratch = true;
353+
if (file_exists) {
354+
if (etag.empty()) {
355+
LOG_INF("%s: using cached file (no server etag): %s\n", __func__, path.c_str());
356+
return 304; // 304 Not Modified - fake cached response
387357
}
358+
if (!last_etag.empty() && last_etag == etag) {
359+
LOG_INF("%s: using cached file (same etag): %s\n", __func__, path.c_str());
360+
return 304; // 304 Not Modified - fake cached response
361+
}
362+
if (remove(path.c_str()) != 0) {
363+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
364+
return -1;
365+
}
366+
}
388367

389-
if (file_exists) {
390-
if (!should_download_from_scratch) {
391-
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
392-
return 304; // 304 Not Modified - fake cached response
393-
}
394-
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
395-
if (remove(path.c_str()) != 0) {
396-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
397-
return -1;
398-
}
368+
const std::string path_temporary = path + ".downloadInProgress";
369+
int delay = retry_delay_seconds;
370+
371+
for (int i = 0; i < max_attempts; ++i) {
372+
if (i) {
373+
LOG_WRN("%s: retrying after %d seconds...\n", __func__, delay);
374+
std::this_thread::sleep_for(std::chrono::seconds(delay));
375+
delay *= retry_delay_seconds;
399376
}
400377

401-
const std::string path_temporary = path + ".downloadInProgress";
402378
size_t existing_size = 0;
403379

404380
if (std::filesystem::exists(path_temporary)) {
405-
if (supports_ranges && !should_download_from_scratch) {
381+
if (supports_ranges) {
406382
existing_size = std::filesystem::file_size(path_temporary);
407383
} else if (remove(path_temporary.c_str()) != 0) {
408384
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
409385
return -1;
410386
}
411387
}
412388

413-
// start the download
414-
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
415-
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
416-
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
417-
if (!was_pull_successful) {
418-
if (i + 1 < max_attempts) {
419-
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
420-
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
421-
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
422-
} else {
423-
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
424-
}
425-
continue;
426-
}
389+
LOG_INF("%s: downloading from %s to %s (etag:%s)...\n",
390+
__func__, common_http_show_masked_url(parts).c_str(),
391+
path_temporary.c_str(), etag.c_str());
427392

428-
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
429-
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
430-
return -1;
431-
}
432-
if (!etag.empty()) {
433-
write_etag(path, etag);
393+
if (common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size)) {
394+
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
395+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
396+
return -1;
397+
}
398+
if (!etag.empty()) {
399+
write_etag(path, etag);
400+
}
401+
return head->status;
434402
}
435-
436-
return head->status; // TODO: use actual GET status?
437403
}
438404

405+
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
439406
return -1; // max attempts reached
440407
}
441408

0 commit comments

Comments
 (0)