Skip to content

Commit bb1731d

Browse files
committed
feat: enable memory-mapped tensors for all compatible backends
1 parent 6e4f647 commit bb1731d

1 file changed

Lines changed: 45 additions & 14 deletions

File tree

src/stable-diffusion.cpp

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,37 @@ class StableDiffusionGGML {
408408
apply_lora_immediately = false;
409409
}
410410

411+
std::map<std::string, ggml_tensor*> mmap_able_tensors;
412+
bool enable_mmap_tensors = false;
413+
bool main_backend_mmap = false;
414+
if (sd_ctx_params->enable_mmap) {
415+
if (apply_lora_immediately) {
416+
LOG_DEBUG("cannot memory-map model weights: only supported with --lora-apply-mode at_runtime");
417+
} else {
418+
enable_mmap_tensors = true;
419+
if (offload_params_to_cpu) {
420+
main_backend_mmap = true;
421+
} else {
422+
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
423+
struct ggml_backend_dev_props props;
424+
ggml_backend_dev_get_props(dev, &props);
425+
main_backend_mmap = props.caps.buffer_from_host_ptr;
426+
}
427+
}
428+
}
429+
430+
auto get_param_tensors = [&](auto&& model, bool force_cpu = false, auto... extra) {
431+
std::map<std::string, ggml_tensor*> temp;
432+
model->get_param_tensors(temp, std::forward<decltype(extra)>(extra)...);
433+
bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
434+
for (const auto& [key, tensor] : temp) {
435+
tensors[key] = tensor;
436+
if (do_mmap) {
437+
mmap_able_tensors[key] = tensor;
438+
}
439+
}
440+
};
441+
411442
if (sd_version_is_control(version)) {
412443
// Might need vae encode for control cond
413444
vae_decode_only = false;
@@ -514,7 +545,7 @@ class StableDiffusionGGML {
514545
clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
515546
offload_params_to_cpu,
516547
tensor_storage_map);
517-
clip_vision->get_param_tensors(tensors);
548+
get_param_tensors(clip_vision);
518549
}
519550
} else if (sd_version_is_qwen_image(version)) {
520551
bool enable_vision = false;
@@ -580,16 +611,16 @@ class StableDiffusionGGML {
580611
}
581612
}
582613

583-
cond_stage_model->get_param_tensors(tensors);
614+
get_param_tensors(cond_stage_model, clip_on_cpu);
584615

585-
diffusion_model->get_param_tensors(tensors);
616+
get_param_tensors(diffusion_model);
586617

587618
if (sd_version_is_unet_edit(version)) {
588619
vae_decode_only = false;
589620
}
590621

591622
if (high_noise_diffusion_model) {
592-
high_noise_diffusion_model->get_param_tensors(tensors);
623+
get_param_tensors(high_noise_diffusion_model);
593624
}
594625

595626
if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu(backend)) {
@@ -652,6 +683,8 @@ class StableDiffusionGGML {
652683
}
653684
};
654685

686+
bool force_vae_cpu = sd_ctx_params->keep_vae_on_cpu;
687+
655688
if (version == VERSION_CHROMA_RADIANCE) {
656689
LOG_INFO("using FakeVAE");
657690
first_stage_model = std::make_shared<FakeVAE>(version,
@@ -660,15 +693,15 @@ class StableDiffusionGGML {
660693
} else if (use_tae && !tae_preview_only) {
661694
LOG_INFO("using TAE for encoding / decoding");
662695
first_stage_model = create_tae();
663-
first_stage_model->get_param_tensors(tensors, "tae");
696+
get_param_tensors(first_stage_model, force_vae_cpu, "tae");
664697
} else {
665698
LOG_INFO("using VAE for encoding / decoding");
666699
first_stage_model = create_vae();
667-
first_stage_model->get_param_tensors(tensors, "first_stage_model");
700+
get_param_tensors(first_stage_model, force_vae_cpu, "first_stage_model");
668701
if (use_tae && tae_preview_only) {
669702
LOG_INFO("using TAE for preview");
670703
preview_vae = create_tae();
671-
preview_vae->get_param_tensors(tensors, "tae");
704+
get_param_tensors(first_stage_model, force_vae_cpu, "tae");
672705
}
673706
}
674707

@@ -733,7 +766,7 @@ class StableDiffusionGGML {
733766
}
734767
}
735768
if (use_pmid) {
736-
pmid_model->get_param_tensors(tensors, "pmid");
769+
get_param_tensors(pmid_model, false, "pmid");
737770
}
738771

739772
if (sd_ctx_params->flash_attn) {
@@ -810,13 +843,11 @@ class StableDiffusionGGML {
810843
ignore_tensors.insert("conditioner.embedders.3");
811844
}
812845

813-
if (sd_ctx_params->enable_mmap) {
814-
if (!(offload_params_to_cpu || ggml_backend_is_cpu(backend))) {
815-
LOG_DEBUG("cannot memory-map model weights: only supported with CPU or --offload-to-cpu");
816-
} else if (apply_lora_immediately) {
817-
LOG_DEBUG("cannot memory-map model weights: only supported with --lora-apply-mode at_runtime");
846+
if (enable_mmap_tensors) {
847+
if (mmap_able_tensors.empty()) {
848+
LOG_DEBUG("no tensors could be memory-mapped");
818849
} else {
819-
mmap_tensor_store = model_loader.mmap_tensors(tensors, ignore_tensors);
850+
mmap_tensor_store = model_loader.mmap_tensors(mmap_able_tensors, ignore_tensors);
820851
}
821852
}
822853

0 commit comments

Comments
 (0)