@@ -408,6 +408,37 @@ class StableDiffusionGGML {
408408 apply_lora_immediately = false ;
409409 }
410410
411+ std::map<std::string, ggml_tensor*> mmap_able_tensors;
412+ bool enable_mmap_tensors = false ;
413+ bool main_backend_mmap = false ;
414+ if (sd_ctx_params->enable_mmap ) {
415+ if (apply_lora_immediately) {
416+ LOG_DEBUG (" cannot memory-map model weights: only supported with --lora-apply-mode at_runtime" );
417+ } else {
418+ enable_mmap_tensors = true ;
419+ if (offload_params_to_cpu) {
420+ main_backend_mmap = true ;
421+ } else {
422+ ggml_backend_dev_t dev = ggml_backend_get_device (backend);
423+ struct ggml_backend_dev_props props;
424+ ggml_backend_dev_get_props (dev, &props);
425+ main_backend_mmap = props.caps .buffer_from_host_ptr ;
426+ }
427+ }
428+ }
429+
430+ auto get_param_tensors = [&](auto && model, bool force_cpu = false , auto ... extra) {
431+ std::map<std::string, ggml_tensor*> temp;
432+ model->get_param_tensors (temp, std::forward<decltype (extra)>(extra)...);
433+ bool do_mmap = enable_mmap_tensors && (main_backend_mmap || force_cpu);
434+ for (const auto & [key, tensor] : temp) {
435+ tensors[key] = tensor;
436+ if (do_mmap) {
437+ mmap_able_tensors[key] = tensor;
438+ }
439+ }
440+ };
441+
411442 if (sd_version_is_control (version)) {
412443 // Might need vae encode for control cond
413444 vae_decode_only = false ;
@@ -514,7 +545,7 @@ class StableDiffusionGGML {
514545 clip_vision = std::make_shared<FrozenCLIPVisionEmbedder>(backend,
515546 offload_params_to_cpu,
516547 tensor_storage_map);
517- clip_vision-> get_param_tensors (tensors );
548+ get_param_tensors (clip_vision );
518549 }
519550 } else if (sd_version_is_qwen_image (version)) {
520551 bool enable_vision = false ;
@@ -580,16 +611,16 @@ class StableDiffusionGGML {
580611 }
581612 }
582613
583- cond_stage_model-> get_param_tensors (tensors );
614+ get_param_tensors (cond_stage_model, clip_on_cpu );
584615
585- diffusion_model-> get_param_tensors (tensors );
616+ get_param_tensors (diffusion_model );
586617
587618 if (sd_version_is_unet_edit (version)) {
588619 vae_decode_only = false ;
589620 }
590621
591622 if (high_noise_diffusion_model) {
592- high_noise_diffusion_model-> get_param_tensors (tensors );
623+ get_param_tensors (high_noise_diffusion_model );
593624 }
594625
595626 if (sd_ctx_params->keep_vae_on_cpu && !ggml_backend_is_cpu (backend)) {
@@ -652,6 +683,8 @@ class StableDiffusionGGML {
652683 }
653684 };
654685
686+ bool force_vae_cpu = sd_ctx_params->keep_vae_on_cpu ;
687+
655688 if (version == VERSION_CHROMA_RADIANCE) {
656689 LOG_INFO (" using FakeVAE" );
657690 first_stage_model = std::make_shared<FakeVAE>(version,
@@ -660,15 +693,15 @@ class StableDiffusionGGML {
660693 } else if (use_tae && !tae_preview_only) {
661694 LOG_INFO (" using TAE for encoding / decoding" );
662695 first_stage_model = create_tae ();
663- first_stage_model-> get_param_tensors (tensors , " tae" );
696+ get_param_tensors (first_stage_model, force_vae_cpu , " tae" );
664697 } else {
665698 LOG_INFO (" using VAE for encoding / decoding" );
666699 first_stage_model = create_vae ();
667- first_stage_model-> get_param_tensors (tensors , " first_stage_model" );
700+ get_param_tensors (first_stage_model, force_vae_cpu , " first_stage_model" );
668701 if (use_tae && tae_preview_only) {
669702 LOG_INFO (" using TAE for preview" );
670703 preview_vae = create_tae ();
671- preview_vae-> get_param_tensors (tensors , " tae" );
704+ get_param_tensors (first_stage_model, force_vae_cpu , " tae" );
672705 }
673706 }
674707
@@ -733,7 +766,7 @@ class StableDiffusionGGML {
733766 }
734767 }
735768 if (use_pmid) {
736- pmid_model-> get_param_tensors (tensors , " pmid" );
769+ get_param_tensors (pmid_model, false , " pmid" );
737770 }
738771
739772 if (sd_ctx_params->flash_attn ) {
@@ -810,13 +843,11 @@ class StableDiffusionGGML {
810843 ignore_tensors.insert (" conditioner.embedders.3" );
811844 }
812845
813- if (sd_ctx_params->enable_mmap ) {
814- if (!(offload_params_to_cpu || ggml_backend_is_cpu (backend))) {
815- LOG_DEBUG (" cannot memory-map model weights: only supported with CPU or --offload-to-cpu" );
816- } else if (apply_lora_immediately) {
817- LOG_DEBUG (" cannot memory-map model weights: only supported with --lora-apply-mode at_runtime" );
846+ if (enable_mmap_tensors) {
847+ if (mmap_able_tensors.empty ()) {
848+ LOG_DEBUG (" no tensors could be memory-mapped" );
818849 } else {
819- mmap_tensor_store = model_loader.mmap_tensors (tensors , ignore_tensors);
850+ mmap_tensor_store = model_loader.mmap_tensors (mmap_able_tensors , ignore_tensors);
820851 }
821852 }
822853
0 commit comments