leejet
diff --git a/‎src/auto_encoder_kl.hpp‎
Lines changed: 930 additions & 0 deletions b/‎src/auto_encoder_kl.hpp‎
Lines changed: 930 additions & 0 deletions
diff --git a/‎src/ggml_extend.hpp‎
Lines changed: 25 additions & 10 deletions b/‎src/ggml_extend.hpp‎
Lines changed: 25 additions & 10 deletions
diff --git a/‎src/model.cpp‎
Lines changed: 4 additions & 2 deletions b/‎src/model.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/name_conversion.cpp‎
Lines changed: 5 additions & 1 deletion b/‎src/name_conversion.cpp‎
Lines changed: 5 additions & 1 deletion
@@ -377,6 +377,12 @@ __STATIC_INLINE__ void copy_ggml_tensor(struct ggml_tensor* dst, struct ggml_ten
     ggml_free(ctx);
 }
 
+__STATIC_INLINE__ ggml_tensor* ggml_ext_dup_and_cpy_tensor(ggml_context* ctx, ggml_tensor* src) {
+    ggml_tensor* dup = ggml_dup_tensor(ctx, src);
+    copy_ggml_tensor(dup, src);
+    return dup;
+}
+
 __STATIC_INLINE__ float sigmoid(float x) {
     return 1 / (1.0f + expf(-x));
 }
@@ -637,7 +643,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_tensor_concat(struct ggml_context
 }
 
 // convert values from [0, 1] to [-1, 1]
-__STATIC_INLINE__ void process_vae_input_tensor(struct ggml_tensor* src) {
+__STATIC_INLINE__ void scale_to_minus1_1(struct ggml_tensor* src) {
     int64_t nelements = ggml_nelements(src);
     float* data       = (float*)src->data;
     for (int i = 0; i < nelements; i++) {
@@ -647,7 +653,7 @@ __STATIC_INLINE__ void process_vae_input_tensor(struct ggml_tensor* src) {
 }
 
 // convert values from [-1, 1] to [0, 1]
-__STATIC_INLINE__ void process_vae_output_tensor(struct ggml_tensor* src) {
+__STATIC_INLINE__ void scale_to_0_1(struct ggml_tensor* src) {
     int64_t nelements = ggml_nelements(src);
     float* data       = (float*)src->data;
     for (int i = 0; i < nelements; i++) {
@@ -834,7 +840,8 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
                                             const float tile_overlap_factor,
                                             const bool circular_x,
                                             const bool circular_y,
-                                            on_tile_process on_processing) {
+                                            on_tile_process on_processing,
+                                            bool slient = false) {
     output = ggml_set_f32(output, 0);
 
     int input_width   = (int)input->ne[0];
@@ -864,8 +871,10 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
     float tile_overlap_factor_y;
     sd_tiling_calc_tiles(num_tiles_y, tile_overlap_factor_y, small_height, p_tile_size_y, tile_overlap_factor, circular_y);
 
-    LOG_DEBUG("num tiles : %d, %d ", num_tiles_x, num_tiles_y);
-    LOG_DEBUG("optimal overlap : %f, %f (targeting %f)", tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
+    if (!slient) {
+        LOG_DEBUG("num tiles : %d, %d ", num_tiles_x, num_tiles_y);
+        LOG_DEBUG("optimal overlap : %f, %f (targeting %f)", tile_overlap_factor_x, tile_overlap_factor_y, tile_overlap_factor);
+    }
 
     int tile_overlap_x     = (int32_t)(p_tile_size_x * tile_overlap_factor_x);
     int non_tile_overlap_x = p_tile_size_x - tile_overlap_x;
@@ -896,7 +905,9 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
     params.mem_buffer = nullptr;
     params.no_alloc   = false;
 
-    LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
+    if (!slient) {
+        LOG_DEBUG("tile work buffer size: %.2f MB", params.mem_size / 1024.f / 1024.f);
+    }
 
     // draft context
     struct ggml_context* tiles_ctx = ggml_init(params);
@@ -909,8 +920,10 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
     ggml_tensor* input_tile  = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, input_tile_size_x, input_tile_size_y, input->ne[2], input->ne[3]);
     ggml_tensor* output_tile = ggml_new_tensor_4d(tiles_ctx, GGML_TYPE_F32, output_tile_size_x, output_tile_size_y, output->ne[2], output->ne[3]);
     int num_tiles            = num_tiles_x * num_tiles_y;
-    LOG_DEBUG("processing %i tiles", num_tiles);
-    pretty_progress(0, num_tiles, 0.0f);
+    if (!slient) {
+        LOG_DEBUG("processing %i tiles", num_tiles);
+        pretty_progress(0, num_tiles, 0.0f);
+    }
     int tile_count = 1;
     bool last_y = false, last_x = false;
     float last_time = 0.0f;
@@ -960,8 +973,10 @@ __STATIC_INLINE__ void sd_tiling_non_square(ggml_tensor* input,
         }
         last_x = false;
     }
-    if (tile_count < num_tiles) {
-        pretty_progress(num_tiles, num_tiles, last_time);
+    if (!slient) {
+        if (tile_count < num_tiles) {
+            pretty_progress(num_tiles, num_tiles, last_time);
+        }
     }
     ggml_free(tiles_ctx);
 }
 
@@ -1104,10 +1104,12 @@ SDVersion ModelLoader::get_sd_version() {
             tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {
             has_middle_block_1 = true;
         }
-        if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos) {
+        if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos ||
+            tensor_storage.name.find("unet.up_blocks.1.attentions.0.transformer_blocks.1") != std::string::npos) {
             has_output_block_311 = true;
         }
-        if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos) {
+        if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos ||
+            tensor_storage.name.find("unet.up_blocks.2.attentions.1") != std::string::npos) {
             has_output_block_71 = true;
         }
         if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" ||
 
@@ -1120,7 +1120,11 @@ std::string convert_tensor_name(std::string name, SDVersion version) {
         for (const auto& prefix : first_stage_model_prefix_vec) {
             if (starts_with(name, prefix)) {
                 name = convert_first_stage_model_name(name.substr(prefix.size()), prefix);
-                name = prefix + name;
+                if (version == VERSION_SDXS) {
+                    name = "tae." + name;
+                } else {
+                    name = prefix + name;
+                }
                 break;
             }
         }
Original file line number	Diff line number	Diff line change
`@@ -1104,10 +1104,12 @@ SDVersion ModelLoader::get_sd_version() {`
`1104`	`1104`	`tensor_storage.name.find("unet.mid_block.resnets.1.") != std::string::npos) {`
`1105`	`1105`	`has_middle_block_1 = true;`
`1106`	`1106`	`}`
`1107`		`- if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos) {`
	`1107`	`+ if (tensor_storage.name.find("model.diffusion_model.output_blocks.3.1.transformer_blocks.1") != std::string::npos \|\|`
	`1108`	`+ tensor_storage.name.find("unet.up_blocks.1.attentions.0.transformer_blocks.1") != std::string::npos) {`
`1108`	`1109`	`has_output_block_311 = true;`
`1109`	`1110`	`}`
`1110`		`- if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos) {`
	`1111`	`+ if (tensor_storage.name.find("model.diffusion_model.output_blocks.7.1") != std::string::npos \|\|`
	`1112`	`+ tensor_storage.name.find("unet.up_blocks.2.attentions.1") != std::string::npos) {`
`1111`	`1113`	`has_output_block_71 = true;`
`1112`	`1114`	`}`
`1113`	`1115`	`if (tensor_storage.name == "cond_stage_model.transformer.text_model.embeddings.token_embedding.weight" \|\|`
Original file line number	Diff line number	Diff line change
`@@ -1120,7 +1120,11 @@ std::string convert_tensor_name(std::string name, SDVersion version) {`
`1120`	`1120`	`for (const auto& prefix : first_stage_model_prefix_vec) {`
`1121`	`1121`	`if (starts_with(name, prefix)) {`
`1122`	`1122`	`name = convert_first_stage_model_name(name.substr(prefix.size()), prefix);`
`1123`		`- name = prefix + name;`
	`1123`	`+ if (version == VERSION_SDXS) {`
	`1124`	`+ name = "tae." + name;`
	`1125`	`+ } else {`
	`1126`	`+ name = prefix + name;`
	`1127`	`+ }`
`1124`	`1128`	`break;`
`1125`	`1129`	`}`
`1126`	`1130`	`}`