feat: add er_sde sampler (#1403)

rmatif · web-flow · commit 1b4e9be64366 · 2026-04-17T01:32:16.000+08:00
diff --git a/README.md b/README.md
@@ -97,6 +97,7 @@ API and command-line option may change frequently.***
     - `DPM++ 2M`
     - [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
     - `DPM++ 2S a`
+    - `ER-SDE`
     - [`LCM`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13952)
 - Cross-platform reproducibility
     - `--rng cuda`, default, consistent with the `stable-diffusion-webui GPU RNG`
diff --git a/examples/cli/README.md b/examples/cli/README.md
@@ -114,15 +114,15 @@ Generation Options:
                                            medium
   --skip-layer-start <float>               SLG enabling point (default: 0.01)
   --skip-layer-end <float>                 SLG disabling point (default: 0.2)
-  --eta <float>                            noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
+  --eta <float>                            noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
   --flow-shift <float>                     shift value for Flow models like SD3.x or WAN (default: auto)
   --high-noise-cfg-scale <float>           (high noise) unconditional guidance scale: (default: 7.0)
   --high-noise-img-cfg-scale <float>       (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
   --high-noise-guidance <float>            (high noise) distilled guidance scale for models with guidance input (default: 3.5)
   --high-noise-slg-scale <float>           (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
   --high-noise-skip-layer-start <float>    (high noise) SLG enabling point (default: 0.01)
   --high-noise-skip-layer-end <float>      (high noise) SLG disabling point (default: 0.2)
-  --high-noise-eta <float>                 (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
+  --high-noise-eta <float>                 (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
   --strength <float>                       strength for noising/unnoising (default: 0.75)
   --pm-style-strength <float>
   --control-strength <float>               strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@@ -133,10 +133,10 @@ Generation Options:
   --disable-image-metadata                 do not embed generation metadata on image files
   -s, --seed                               RNG seed (default: 42, use random seed for < 0)
   --sampling-method                        sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
-                                           tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
+                                           tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
                                            otherwise)
   --high-noise-sampling-method             (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
-                                           ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
+                                           ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
                                            euler_a otherwise
   --scheduler                              denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
                                            kl_optimal, lcm, bong_tangent], default: discrete
diff --git a/examples/common/common.cpp b/examples/common/common.cpp
@@ -855,7 +855,7 @@ ArgOptions SDGenerationParams::get_options() {
          &sample_params.guidance.slg.layer_end},
         {"",
          "--eta",
-         "noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
+         "noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
          &sample_params.eta},
         {"",
          "--flow-shift",
@@ -887,7 +887,7 @@ ArgOptions SDGenerationParams::get_options() {
          &high_noise_sample_params.guidance.slg.layer_end},
         {"",
          "--high-noise-eta",
-         "(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
+         "(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
          &high_noise_sample_params.eta},
         {"",
          "--strength",
@@ -1185,12 +1185,12 @@ ArgOptions SDGenerationParams::get_options() {
          on_seed_arg},
         {"",
          "--sampling-method",
-         "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] "
+         "sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde] "
          "(default: euler for Flux/SD3/Wan, euler_a otherwise)",
          on_sample_method_arg},
         {"",
          "--high-noise-sampling-method",
-         "(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s]"
+         "(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde]"
          " default: euler for Flux/SD3/Wan, euler_a otherwise",
          on_high_noise_sample_method_arg},
         {"",
diff --git a/examples/server/README.md b/examples/server/README.md
@@ -219,15 +219,15 @@ Default Generation Options:
                                            medium
   --skip-layer-start <float>               SLG enabling point (default: 0.01)
   --skip-layer-end <float>                 SLG disabling point (default: 0.2)
-  --eta <float>                            noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
+  --eta <float>                            noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
   --flow-shift <float>                     shift value for Flow models like SD3.x or WAN (default: auto)
   --high-noise-cfg-scale <float>           (high noise) unconditional guidance scale: (default: 7.0)
   --high-noise-img-cfg-scale <float>       (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
   --high-noise-guidance <float>            (high noise) distilled guidance scale for models with guidance input (default: 3.5)
   --high-noise-slg-scale <float>           (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
   --high-noise-skip-layer-start <float>    (high noise) SLG enabling point (default: 0.01)
   --high-noise-skip-layer-end <float>      (high noise) SLG disabling point (default: 0.2)
-  --high-noise-eta <float>                 (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
+  --high-noise-eta <float>                 (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
   --strength <float>                       strength for noising/unnoising (default: 0.75)
   --pm-style-strength <float>
   --control-strength <float>               strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@@ -238,10 +238,10 @@ Default Generation Options:
   --disable-image-metadata                 do not embed generation metadata on image files
   -s, --seed                               RNG seed (default: 42, use random seed for < 0)
   --sampling-method                        sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
-                                           tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
+                                           tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
                                            otherwise)
   --high-noise-sampling-method             (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
-                                           ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
+                                           ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
                                            euler_a otherwise
   --scheduler                              denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
                                            kl_optimal, lcm, bong_tangent], default: discrete
diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h
@@ -50,6 +50,7 @@ enum sample_method_t {
     TCD_SAMPLE_METHOD,
     RES_MULTISTEP_SAMPLE_METHOD,
     RES_2S_SAMPLE_METHOD,
+    ER_SDE_SAMPLE_METHOD,
     SAMPLE_METHOD_COUNT
 };
 
diff --git a/src/denoiser.hpp b/src/denoiser.hpp
@@ -1285,6 +1285,140 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
     return x;
 }
 
+static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
+                                       sd::Tensor<float> x,
+                                       std::vector<float> sigmas,
+                                       std::shared_ptr<RNG> rng,
+                                       bool is_flow_denoiser,
+                                       float eta) {
+    constexpr int max_stage                  = 3;
+    constexpr int num_integration_points     = 200;
+    constexpr float num_integration_points_f = 200.0f;
+    float s_noise                            = eta;
+
+    auto er_sde_flow_sigma = [](float sigma) -> float {
+        sigma = std::max(sigma, 1e-6f);
+        sigma = std::min(sigma, 1.0f - 1e-4f);
+        return sigma;
+    };
+
+    auto sigma_to_er_sde_lambda = [&](float sigma, bool is_flow_denoiser) -> float {
+        if (is_flow_denoiser) {
+            sigma = er_sde_flow_sigma(sigma);
+            return sigma / std::max(1.0f - sigma, 1e-6f);
+        }
+        return std::max(sigma, 1e-6f);
+    };
+
+    auto sigma_to_er_sde_alpha = [&](float sigma, bool is_flow_denoiser) -> float {
+        if (is_flow_denoiser) {
+            sigma = er_sde_flow_sigma(sigma);
+            return 1.0f - sigma;
+        }
+        return 1.0f;
+    };
+
+    auto er_sde_noise_scaler = [](float x) -> float {
+        x = std::max(x, 0.0f);
+        return x * (std::exp(std::pow(x, 0.3f)) + 10.0f);
+    };
+
+    if (is_flow_denoiser) {
+        for (size_t i = 0; i + 1 < sigmas.size(); ++i) {
+            if (sigmas[i] > 1.0f) {
+                sigmas[i] = er_sde_flow_sigma(sigmas[i]);
+            }
+        }
+    }
+
+    std::vector<float> er_lambdas(sigmas.size(), 0.0f);
+    for (size_t i = 0; i < sigmas.size(); ++i) {
+        er_lambdas[i] = sigma_to_er_sde_lambda(sigmas[i], is_flow_denoiser);
+    }
+
+    sd::Tensor<float> old_denoised   = x;
+    sd::Tensor<float> old_denoised_d = x;
+    bool have_old_denoised           = false;
+    bool have_old_denoised_d         = false;
+
+    int steps = static_cast<int>(sigmas.size()) - 1;
+    for (int i = 0; i < steps; i++) {
+        sd::Tensor<float> denoised = model(x, sigmas[i], i + 1);
+        if (denoised.empty()) {
+            return {};
+        }
+
+        int stage_used = std::min(max_stage, i + 1);
+
+        if (sigmas[i + 1] == 0.0f) {
+            x = denoised;
+        } else {
+            float er_lambda_s = er_lambdas[i];
+            float er_lambda_t = er_lambdas[i + 1];
+            float alpha_s     = sigma_to_er_sde_alpha(sigmas[i], is_flow_denoiser);
+            float alpha_t     = sigma_to_er_sde_alpha(sigmas[i + 1], is_flow_denoiser);
+            float scaled_s    = er_sde_noise_scaler(er_lambda_s);
+            float scaled_t    = er_sde_noise_scaler(er_lambda_t);
+            float r_alpha     = alpha_s > 0.0f ? alpha_t / alpha_s : 0.0f;
+            float r           = scaled_s > 0.0f ? scaled_t / scaled_s : 0.0f;
+
+            x = r_alpha * r * x + alpha_t * (1.0f - r) * denoised;
+
+            if (stage_used >= 2 && have_old_denoised) {
+                float dt               = er_lambda_t - er_lambda_s;
+                float lambda_step_size = -dt / num_integration_points_f;
+                float s                = 0.0f;
+                float s_u              = 0.0f;
+
+                for (int p = 0; p < num_integration_points; ++p) {
+                    float lambda_pos = er_lambda_t + p * lambda_step_size;
+                    float scaled_pos = er_sde_noise_scaler(lambda_pos);
+                    if (scaled_pos <= 0.0f) {
+                        continue;
+                    }
+
+                    s += 1.0f / scaled_pos;
+                    if (stage_used >= 3 && have_old_denoised_d) {
+                        s_u += (lambda_pos - er_lambda_s) / scaled_pos;
+                    }
+                }
+
+                s *= lambda_step_size;
+
+                float denom_d = er_lambda_s - er_lambdas[i - 1];
+                if (std::fabs(denom_d) > 1e-12f) {
+                    float coeff_d                = alpha_t * (dt + s * scaled_t);
+                    sd::Tensor<float> denoised_d = (denoised - old_denoised) / denom_d;
+                    x += coeff_d * denoised_d;
+
+                    if (stage_used >= 3 && have_old_denoised_d) {
+                        float denom_u = (er_lambda_s - er_lambdas[i - 2]) * 0.5f;
+                        if (std::fabs(denom_u) > 1e-12f) {
+                            s_u *= lambda_step_size;
+                            float coeff_u                = alpha_t * (0.5f * dt * dt + s_u * scaled_t);
+                            sd::Tensor<float> denoised_u = (denoised_d - old_denoised_d) / denom_u;
+                            x += coeff_u * denoised_u;
+                        }
+                    }
+
+                    old_denoised_d      = denoised_d;
+                    have_old_denoised_d = true;
+                }
+            }
+
+            float noise_scale_sq = er_lambda_t * er_lambda_t - er_lambda_s * er_lambda_s * r * r;
+            if (s_noise > 0.0f && noise_scale_sq > 0.0f) {
+                float noise_scale = alpha_t * std::sqrt(std::max(noise_scale_sq, 0.0f));
+                x += sd::Tensor<float>::randn_like(x, rng) * noise_scale;
+            }
+        }
+
+        old_denoised      = denoised;
+        have_old_denoised = true;
+    }
+    return x;
+}
+
 static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
                                               sd::Tensor<float> x,
                                               const std::vector<float>& sigmas,
@@ -1446,6 +1580,8 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
             return sample_res_multistep(model, std::move(x), sigmas, rng, eta);
         case RES_2S_SAMPLE_METHOD:
             return sample_res_2s(model, std::move(x), sigmas, rng, eta);
+        case ER_SDE_SAMPLE_METHOD:
+            return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
         case DDIM_TRAILING_SAMPLE_METHOD:
             return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta);
         case TCD_SAMPLE_METHOD:
diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
@@ -71,6 +71,7 @@ const char* sampling_methods_str[] = {
     "TCD",
     "Res Multistep",
     "Res 2s",
+    "ER-SDE",
 };
 
 /*================================================== Helper Functions ================================================*/
@@ -1991,6 +1992,7 @@ const char* sample_method_to_str[] = {
     "tcd",
     "res_multistep",
     "res_2s",
+    "er_sde",
 };
 
 const char* sd_sample_method_name(enum sample_method_t sample_method) {
@@ -2473,6 +2475,7 @@ static float resolve_eta(sd_ctx_t* sd_ctx,
                 return 0.0f;
             case EULER_A_SAMPLE_METHOD:
             case DPMPP2S_A_SAMPLE_METHOD:
+            case ER_SDE_SAMPLE_METHOD:
                 return 1.0f;
             default:;
         }