Skip to content

Commit 8c1e30d

Browse files
committed
refactor: split model file processing from tensor loading
1 parent 4870b6e commit 8c1e30d

2 files changed

Lines changed: 73 additions & 24 deletions

File tree

src/model.cpp

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1318,16 +1318,11 @@ void ModelLoader::set_wtype_override(ggml_type wtype, std::string tensor_type_ru
13181318
}
13191319
}
13201320

1321-
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
1322-
int64_t process_time_ms = 0;
1323-
std::atomic<int64_t> read_time_ms(0);
1324-
std::atomic<int64_t> memcpy_time_ms(0);
1325-
std::atomic<int64_t> copy_to_backend_time_ms(0);
1326-
std::atomic<int64_t> convert_time_ms(0);
1327-
std::atomic<uint64_t> bytes_processed(0);
1321+
void ModelLoader::process_model_files(bool enable_mmap) {
13281322

1329-
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
1330-
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
1323+
if (model_files_processed) {
1324+
return;
1325+
}
13311326

13321327
int64_t start_time = ggml_time_ms();
13331328

@@ -1339,22 +1334,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
13391334
processed_tensor_storages.push_back(tensor_storage);
13401335
}
13411336

1342-
process_time_ms = ggml_time_ms() - start_time;
1343-
1344-
bool success = true;
1345-
size_t total_tensors_processed = 0;
1346-
const size_t total_tensors_to_process = processed_tensor_storages.size();
1347-
const int64_t t_start = ggml_time_ms();
1348-
int last_n_threads = 1;
1349-
13501337
for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) {
13511338
std::string file_path = file_paths_[file_index];
1352-
LOG_DEBUG("loading tensors from %s", file_path.c_str());
13531339

1354-
std::vector<const TensorStorage*> file_tensors;
1340+
std::vector<TensorStorage> file_tensors;
13551341
for (const auto& ts : processed_tensor_storages) {
13561342
if (ts.file_index == file_index) {
1357-
file_tensors.push_back(&ts);
1343+
file_tensors.push_back(ts);
13581344
}
13591345
}
13601346
if (file_tensors.empty()) {
@@ -1363,7 +1349,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
13631349

13641350
bool is_zip = false;
13651351
for (auto const& ts : file_tensors) {
1366-
if (ts->index_in_zip >= 0) {
1352+
if (ts.index_in_zip >= 0) {
13671353
is_zip = true;
13681354
break;
13691355
}
@@ -1378,6 +1364,58 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
13781364
}
13791365
}
13801366

1367+
ModelFileData fdata;
1368+
fdata.path = file_path;
1369+
fdata.mmapped = std::shared_ptr<MmapWrapper>(std::move(mmapped));
1370+
fdata.tensors = std::move(file_tensors);
1371+
fdata.is_zip = is_zip;
1372+
1373+
file_data.push_back(std::move(fdata));
1374+
}
1375+
1376+
model_files_processed = true;
1377+
1378+
int64_t end_time = ggml_time_ms();
1379+
int64_t process_time_ms = end_time - start_time;
1380+
1381+
LOG_INFO("model files processing completed in %.2fs", process_time_ms / 1000.f);
1382+
}
1383+
1384+
bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads_p, bool enable_mmap) {
1385+
1386+
process_model_files(enable_mmap);
1387+
1388+
std::atomic<int64_t> read_time_ms(0);
1389+
std::atomic<int64_t> memcpy_time_ms(0);
1390+
std::atomic<int64_t> copy_to_backend_time_ms(0);
1391+
std::atomic<int64_t> convert_time_ms(0);
1392+
std::atomic<uint64_t> bytes_processed(0);
1393+
1394+
int num_threads_to_use = n_threads_p > 0 ? n_threads_p : sd_get_num_physical_cores();
1395+
LOG_DEBUG("using %d threads for model loading", num_threads_to_use);
1396+
1397+
int64_t start_time = ggml_time_ms();
1398+
1399+
size_t total_tensors_to_process = 0;
1400+
for (const auto& fdata : file_data) {
1401+
total_tensors_to_process += fdata.tensors.size();
1402+
}
1403+
1404+
bool success = true;
1405+
size_t total_tensors_processed = 0;
1406+
const int64_t t_start = start_time;
1407+
int last_n_threads = 1;
1408+
1409+
for (auto & fdata : file_data) {
1410+
const std::string & file_path = fdata.path;
1411+
LOG_DEBUG("loading tensors from %s", file_path.c_str());
1412+
1413+
const std::vector<TensorStorage> & file_tensors = fdata.tensors;
1414+
1415+
bool is_zip = fdata.is_zip;
1416+
1417+
std::shared_ptr<MmapWrapper> mmapped = fdata.mmapped;
1418+
13811419
int n_threads = is_zip ? 1 : std::min(num_threads_to_use, (int)file_tensors.size());
13821420
if (n_threads < 1) {
13831421
n_threads = 1;
@@ -1418,7 +1456,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
14181456
break;
14191457
}
14201458

1421-
const TensorStorage& tensor_storage = *file_tensors[idx];
1459+
const TensorStorage& tensor_storage = file_tensors[idx];
14221460
ggml_tensor* dst_tensor = nullptr;
14231461

14241462
t0 = ggml_time_ms();
@@ -1578,9 +1616,8 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
15781616
}
15791617

15801618
int64_t end_time = ggml_time_ms();
1581-
LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
1619+
LOG_INFO("loading tensors completed, taking %.2fs (read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)",
15821620
(end_time - start_time) / 1000.f,
1583-
process_time_ms / 1000.f,
15841621
(read_time_ms.load() / (float)last_n_threads) / 1000.f,
15851622
(memcpy_time_ms.load() / (float)last_n_threads) / 1000.f,
15861623
(convert_time_ms.load() / (float)last_n_threads) / 1000.f,

src/model.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,21 @@ typedef std::function<bool(const TensorStorage&, ggml_tensor**)> on_new_tensor_c
289289

290290
typedef OrderedMap<std::string, TensorStorage> String2TensorStorage;
291291

292+
class MmapWrapper;
293+
294+
struct ModelFileData {
295+
std::string path;
296+
std::vector<TensorStorage> tensors;
297+
std::shared_ptr<MmapWrapper> mmapped;
298+
bool is_zip;
299+
};
300+
292301
class ModelLoader {
293302
protected:
294303
SDVersion version_ = VERSION_COUNT;
295304
std::vector<std::string> file_paths_;
305+
std::vector<ModelFileData> file_data;
306+
bool model_files_processed = false;
296307
String2TensorStorage tensor_storage_map;
297308

298309
void add_tensor_storage(const TensorStorage& tensor_storage);
@@ -322,6 +333,7 @@ class ModelLoader {
322333
std::map<ggml_type, uint32_t> get_vae_wtype_stat();
323334
String2TensorStorage& get_tensor_storage_map() { return tensor_storage_map; }
324335
void set_wtype_override(ggml_type wtype, std::string tensor_type_rules = "");
336+
void process_model_files(bool enable_mmap = false);
325337
bool load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_threads = 0, bool use_mmap = false);
326338
bool load_tensors(std::map<std::string, ggml_tensor*>& tensors,
327339
std::set<std::string> ignore_tensors = {},

0 commit comments

Comments
 (0)