Skip to content

Commit 1b4e9be

Browse files
authored
feat: add er_sde sampler (#1403)
1 parent d73b419 commit 1b4e9be

7 files changed

Lines changed: 153 additions & 12 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ API and command-line option may change frequently.***
9797
- `DPM++ 2M`
9898
- [`DPM++ 2M v2`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions/8457)
9999
- `DPM++ 2S a`
100+
- `ER-SDE`
100101
- [`LCM`](https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/13952)
101102
- Cross-platform reproducibility
102103
- `--rng cuda`, default, consistent with the `stable-diffusion-webui GPU RNG`

examples/cli/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,15 @@ Generation Options:
114114
medium
115115
--skip-layer-start <float> SLG enabling point (default: 0.01)
116116
--skip-layer-end <float> SLG disabling point (default: 0.2)
117-
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
117+
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
118118
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
119119
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
120120
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
121121
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
122122
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
123123
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
124124
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
125-
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
125+
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
126126
--strength <float> strength for noising/unnoising (default: 0.75)
127127
--pm-style-strength <float>
128128
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@@ -133,10 +133,10 @@ Generation Options:
133133
--disable-image-metadata do not embed generation metadata on image files
134134
-s, --seed RNG seed (default: 42, use random seed for < 0)
135135
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
136-
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
136+
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
137137
otherwise)
138138
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
139-
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
139+
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
140140
euler_a otherwise
141141
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
142142
kl_optimal, lcm, bong_tangent], default: discrete

examples/common/common.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ ArgOptions SDGenerationParams::get_options() {
855855
&sample_params.guidance.slg.layer_end},
856856
{"",
857857
"--eta",
858-
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
858+
"noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
859859
&sample_params.eta},
860860
{"",
861861
"--flow-shift",
@@ -887,7 +887,7 @@ ArgOptions SDGenerationParams::get_options() {
887887
&high_noise_sample_params.guidance.slg.layer_end},
888888
{"",
889889
"--high-noise-eta",
890-
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)",
890+
"(high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)",
891891
&high_noise_sample_params.eta},
892892
{"",
893893
"--strength",
@@ -1185,12 +1185,12 @@ ArgOptions SDGenerationParams::get_options() {
11851185
on_seed_arg},
11861186
{"",
11871187
"--sampling-method",
1188-
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] "
1188+
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde] "
11891189
"(default: euler for Flux/SD3/Wan, euler_a otherwise)",
11901190
on_sample_method_arg},
11911191
{"",
11921192
"--high-noise-sampling-method",
1193-
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s]"
1193+
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s, er_sde]"
11941194
" default: euler for Flux/SD3/Wan, euler_a otherwise",
11951195
on_high_noise_sample_method_arg},
11961196
{"",

examples/server/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,15 +219,15 @@ Default Generation Options:
219219
medium
220220
--skip-layer-start <float> SLG enabling point (default: 0.01)
221221
--skip-layer-end <float> SLG disabling point (default: 0.2)
222-
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
222+
--eta <float> noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
223223
--flow-shift <float> shift value for Flow models like SD3.x or WAN (default: auto)
224224
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
225225
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
226226
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
227227
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
228228
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
229229
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
230-
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a and dpm++2s_a)
230+
--high-noise-eta <float> (high noise) noise multiplier (default: 0 for ddim_trailing, tcd, res_multistep and res_2s; 1 for euler_a, er_sde and dpm++2s_a)
231231
--strength <float> strength for noising/unnoising (default: 0.75)
232232
--pm-style-strength <float>
233233
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@@ -238,10 +238,10 @@ Default Generation Options:
238238
--disable-image-metadata do not embed generation metadata on image files
239239
-s, --seed RNG seed (default: 42, use random seed for < 0)
240240
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
241-
tcd, res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a
241+
tcd, res_multistep, res_2s, er_sde] (default: euler for Flux/SD3/Wan, euler_a
242242
otherwise)
243243
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
244-
ddim_trailing, tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan,
244+
ddim_trailing, tcd, res_multistep, res_2s, er_sde] default: euler for Flux/SD3/Wan,
245245
euler_a otherwise
246246
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
247247
kl_optimal, lcm, bong_tangent], default: discrete

include/stable-diffusion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ enum sample_method_t {
5050
TCD_SAMPLE_METHOD,
5151
RES_MULTISTEP_SAMPLE_METHOD,
5252
RES_2S_SAMPLE_METHOD,
53+
ER_SDE_SAMPLE_METHOD,
5354
SAMPLE_METHOD_COUNT
5455
};
5556

src/denoiser.hpp

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,140 @@ static sd::Tensor<float> sample_res_2s(denoise_cb_t model,
12851285
return x;
12861286
}
12871287

1288+
static sd::Tensor<float> sample_er_sde(denoise_cb_t model,
1289+
sd::Tensor<float> x,
1290+
std::vector<float> sigmas,
1291+
std::shared_ptr<RNG> rng,
1292+
bool is_flow_denoiser,
1293+
float eta) {
1294+
constexpr int max_stage = 3;
1295+
constexpr int num_integration_points = 200;
1296+
constexpr float num_integration_points_f = 200.0f;
1297+
float s_noise = eta;
1298+
1299+
auto er_sde_flow_sigma = [](float sigma) -> float {
1300+
sigma = std::max(sigma, 1e-6f);
1301+
sigma = std::min(sigma, 1.0f - 1e-4f);
1302+
return sigma;
1303+
};
1304+
1305+
auto sigma_to_er_sde_lambda = [&](float sigma, bool is_flow_denoiser) -> float {
1306+
if (is_flow_denoiser) {
1307+
sigma = er_sde_flow_sigma(sigma);
1308+
return sigma / std::max(1.0f - sigma, 1e-6f);
1309+
}
1310+
return std::max(sigma, 1e-6f);
1311+
};
1312+
1313+
auto sigma_to_er_sde_alpha = [&](float sigma, bool is_flow_denoiser) -> float {
1314+
if (is_flow_denoiser) {
1315+
sigma = er_sde_flow_sigma(sigma);
1316+
return 1.0f - sigma;
1317+
}
1318+
return 1.0f;
1319+
};
1320+
1321+
auto er_sde_noise_scaler = [](float x) -> float {
1322+
x = std::max(x, 0.0f);
1323+
return x * (std::exp(std::pow(x, 0.3f)) + 10.0f);
1324+
};
1325+
1326+
if (is_flow_denoiser) {
1327+
for (size_t i = 0; i + 1 < sigmas.size(); ++i) {
1328+
if (sigmas[i] > 1.0f) {
1329+
sigmas[i] = er_sde_flow_sigma(sigmas[i]);
1330+
}
1331+
}
1332+
}
1333+
1334+
std::vector<float> er_lambdas(sigmas.size(), 0.0f);
1335+
for (size_t i = 0; i < sigmas.size(); ++i) {
1336+
er_lambdas[i] = sigma_to_er_sde_lambda(sigmas[i], is_flow_denoiser);
1337+
}
1338+
1339+
sd::Tensor<float> old_denoised = x;
1340+
sd::Tensor<float> old_denoised_d = x;
1341+
bool have_old_denoised = false;
1342+
bool have_old_denoised_d = false;
1343+
1344+
int steps = static_cast<int>(sigmas.size()) - 1;
1345+
for (int i = 0; i < steps; i++) {
1346+
sd::Tensor<float> denoised = model(x, sigmas[i], i + 1);
1347+
if (denoised.empty()) {
1348+
return {};
1349+
}
1350+
1351+
int stage_used = std::min(max_stage, i + 1);
1352+
1353+
if (sigmas[i + 1] == 0.0f) {
1354+
x = denoised;
1355+
} else {
1356+
float er_lambda_s = er_lambdas[i];
1357+
float er_lambda_t = er_lambdas[i + 1];
1358+
float alpha_s = sigma_to_er_sde_alpha(sigmas[i], is_flow_denoiser);
1359+
float alpha_t = sigma_to_er_sde_alpha(sigmas[i + 1], is_flow_denoiser);
1360+
float scaled_s = er_sde_noise_scaler(er_lambda_s);
1361+
float scaled_t = er_sde_noise_scaler(er_lambda_t);
1362+
float r_alpha = alpha_s > 0.0f ? alpha_t / alpha_s : 0.0f;
1363+
float r = scaled_s > 0.0f ? scaled_t / scaled_s : 0.0f;
1364+
1365+
x = r_alpha * r * x + alpha_t * (1.0f - r) * denoised;
1366+
1367+
if (stage_used >= 2 && have_old_denoised) {
1368+
float dt = er_lambda_t - er_lambda_s;
1369+
float lambda_step_size = -dt / num_integration_points_f;
1370+
float s = 0.0f;
1371+
float s_u = 0.0f;
1372+
1373+
for (int p = 0; p < num_integration_points; ++p) {
1374+
float lambda_pos = er_lambda_t + p * lambda_step_size;
1375+
float scaled_pos = er_sde_noise_scaler(lambda_pos);
1376+
if (scaled_pos <= 0.0f) {
1377+
continue;
1378+
}
1379+
1380+
s += 1.0f / scaled_pos;
1381+
if (stage_used >= 3 && have_old_denoised_d) {
1382+
s_u += (lambda_pos - er_lambda_s) / scaled_pos;
1383+
}
1384+
}
1385+
1386+
s *= lambda_step_size;
1387+
1388+
float denom_d = er_lambda_s - er_lambdas[i - 1];
1389+
if (std::fabs(denom_d) > 1e-12f) {
1390+
float coeff_d = alpha_t * (dt + s * scaled_t);
1391+
sd::Tensor<float> denoised_d = (denoised - old_denoised) / denom_d;
1392+
x += coeff_d * denoised_d;
1393+
1394+
if (stage_used >= 3 && have_old_denoised_d) {
1395+
float denom_u = (er_lambda_s - er_lambdas[i - 2]) * 0.5f;
1396+
if (std::fabs(denom_u) > 1e-12f) {
1397+
s_u *= lambda_step_size;
1398+
float coeff_u = alpha_t * (0.5f * dt * dt + s_u * scaled_t);
1399+
sd::Tensor<float> denoised_u = (denoised_d - old_denoised_d) / denom_u;
1400+
x += coeff_u * denoised_u;
1401+
}
1402+
}
1403+
1404+
old_denoised_d = denoised_d;
1405+
have_old_denoised_d = true;
1406+
}
1407+
}
1408+
1409+
float noise_scale_sq = er_lambda_t * er_lambda_t - er_lambda_s * er_lambda_s * r * r;
1410+
if (s_noise > 0.0f && noise_scale_sq > 0.0f) {
1411+
float noise_scale = alpha_t * std::sqrt(std::max(noise_scale_sq, 0.0f));
1412+
x += sd::Tensor<float>::randn_like(x, rng) * noise_scale;
1413+
}
1414+
}
1415+
1416+
old_denoised = denoised;
1417+
have_old_denoised = true;
1418+
}
1419+
return x;
1420+
}
1421+
12881422
static sd::Tensor<float> sample_ddim_trailing(denoise_cb_t model,
12891423
sd::Tensor<float> x,
12901424
const std::vector<float>& sigmas,
@@ -1446,6 +1580,8 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
14461580
return sample_res_multistep(model, std::move(x), sigmas, rng, eta);
14471581
case RES_2S_SAMPLE_METHOD:
14481582
return sample_res_2s(model, std::move(x), sigmas, rng, eta);
1583+
case ER_SDE_SAMPLE_METHOD:
1584+
return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta);
14491585
case DDIM_TRAILING_SAMPLE_METHOD:
14501586
return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta);
14511587
case TCD_SAMPLE_METHOD:

src/stable-diffusion.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ const char* sampling_methods_str[] = {
7171
"TCD",
7272
"Res Multistep",
7373
"Res 2s",
74+
"ER-SDE",
7475
};
7576

7677
/*================================================== Helper Functions ================================================*/
@@ -1991,6 +1992,7 @@ const char* sample_method_to_str[] = {
19911992
"tcd",
19921993
"res_multistep",
19931994
"res_2s",
1995+
"er_sde",
19941996
};
19951997

19961998
const char* sd_sample_method_name(enum sample_method_t sample_method) {
@@ -2473,6 +2475,7 @@ static float resolve_eta(sd_ctx_t* sd_ctx,
24732475
return 0.0f;
24742476
case EULER_A_SAMPLE_METHOD:
24752477
case DPMPP2S_A_SAMPLE_METHOD:
2478+
case ER_SDE_SAMPLE_METHOD:
24762479
return 1.0f;
24772480
default:;
24782481
}

0 commit comments

Comments
 (0)