Skip to content

Commit 985fa1f

Browse files
committed
inline frequently used color utilities (up to 15% faster)
* moving color_blend, color_add, and color_fade to a seperate file, to allow the compiler to inline the functions. * inlining slightly increases firmware size - original non-inline functions get used on 8266, or when WLEDMM_SAVE_FLASH is defined.
1 parent 0eec8e4 commit 985fa1f

4 files changed

Lines changed: 188 additions & 2 deletions

File tree

wled00/colorTools.hpp

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#pragma once
2+
#if defined(ARDUINO_ARCH_ESP32) && defined(WLEDMM_FASTPATH) && !defined(WLEDMM_SAVE_FLASH)
3+
4+
#include "wled.h"
5+
/*
6+
* Color conversion & utility methods - moved here, so the compiler may inline these functions (up to 20% faster)
7+
*/
8+
9+
// WLEDMM make sure that color macros are always defined
10+
#if !defined(RGBW32)
11+
#define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b))))
12+
#endif
13+
#if !defined(W) && !defined(R)
14+
#define R(c) (byte((c) >> 16))
15+
#define G(c) (byte((c) >> 8))
16+
#define B(c) (byte(c))
17+
#define W(c) (byte((c) >> 24))
18+
#endif
19+
20+
#if !defined(FASTLED_VERSION) // pull in FastLED if we don't have it yet (we need the CRGB type)
21+
#define FASTLED_INTERNAL
22+
#include <FastLED.h>
23+
#endif
24+
25+
/*
26+
* color blend function (from colors.cpp)
27+
*/
28+
inline __attribute__((hot,const)) uint32_t color_blend(uint32_t color1, uint32_t color2, uint_fast16_t blend, bool b16=false) {
29+
if ((color1 == color2) || (blend == 0)) return color1; // WLEDMM
30+
const uint_fast16_t blendmax = b16 ? 0xFFFF : 0xFF;
31+
if(blend >= blendmax) return color2;
32+
const uint_fast8_t shift = b16 ? 16 : 8;
33+
34+
uint16_t w1 = W(color1); // WLEDMM 16bit to make sure the compiler uses 32bit (not 64bit) for the math
35+
uint16_t r1 = R(color1);
36+
uint16_t g1 = G(color1);
37+
uint16_t b1 = B(color1);
38+
39+
uint16_t w2 = W(color2);
40+
uint16_t r2 = R(color2);
41+
uint16_t g2 = G(color2);
42+
uint16_t b2 = B(color2);
43+
44+
if (b16 == false) {
45+
// WLEDMM based on fastled blend8() - better accuracy for 8bit
46+
uint8_t w3 = (w1+w2 == 0) ? 0 : (((w1 << 8)|w2) + (w2 * blend) - (w1*blend) ) >> 8;
47+
uint8_t r3 = (((r1 << 8)|r2) + (r2 * blend) - (r1*blend) ) >> 8;
48+
uint8_t g3 = (((g1 << 8)|g2) + (g2 * blend) - (g1*blend) ) >> 8;
49+
uint8_t b3 = (((b1 << 8)|b2) + (b2 * blend) - (b1*blend) ) >> 8;
50+
return RGBW32(r3, g3, b3, w3);
51+
} else {
52+
// old code has lots of "jumps" due to rounding errors
53+
const uint_fast16_t blend2 = blendmax - blend; // WLEDMM pre-calculate value
54+
uint32_t w3 = ((w2 * blend) + (w1 * blend2)) >> shift;
55+
uint32_t r3 = ((r2 * blend) + (r1 * blend2)) >> shift;
56+
uint32_t g3 = ((g2 * blend) + (g1 * blend2)) >> shift;
57+
uint32_t b3 = ((b2 * blend) + (b1 * blend2)) >> shift;
58+
return RGBW32(r3, g3, b3, w3);
59+
}
60+
}
61+
62+
/*
63+
* color add function that preserves ratio (from colors.cpp)
64+
* idea: https://github.com/Aircoookie/WLED/pull/2465 by https://github.com/Proto-molecule
65+
*/
66+
67+
inline __attribute__((hot,const)) uint32_t color_add(uint32_t c1, uint32_t c2, bool fast=false)
68+
{
69+
if (c2 == 0) return c1; // WLEDMM shortcut
70+
if (c1 == 0) return c2; // WLEDMM shortcut
71+
72+
if (fast) {
73+
uint8_t r = R(c1);
74+
uint8_t g = G(c1);
75+
uint8_t b = B(c1);
76+
uint8_t w = W(c1);
77+
r = qadd8(r, R(c2));
78+
g = qadd8(g, G(c2));
79+
b = qadd8(b, B(c2));
80+
w = qadd8(w, W(c2));
81+
return RGBW32(r,g,b,w);
82+
} else {
83+
uint32_t r = R(c1) + R(c2);
84+
uint32_t g = G(c1) + G(c2);
85+
uint32_t b = B(c1) + B(c2);
86+
uint32_t w = W(c1) + W(c2);
87+
uint32_t max = r;
88+
if (g > max) max = g;
89+
if (b > max) max = b;
90+
if (w > max) max = w;
91+
if (max < 256) return RGBW32(r, g, b, w);
92+
else return RGBW32(r * 255 / max, g * 255 / max, b * 255 / max, w * 255 / max);
93+
}
94+
}
95+
96+
/*
97+
* fades color toward black (from colors.cpp)
98+
* if using "video" method the resulting color will never become black unless it is already black
99+
*/
100+
101+
inline __attribute__((hot,const)) uint32_t color_fade(uint32_t c1, uint8_t amount, bool video=false)
102+
{
103+
if (amount == 255) return c1; // WLEDMM small optimization - plus it avoids over-fading in "video" mode
104+
if (amount == 0) return 0; // WLEDMM shortcut
105+
106+
uint32_t scaledcolor = 0; // color order is: W R G B from MSB to LSB
107+
uint16_t w = W(c1); // WLEDMM 16bit to make sure the compiler uses 32bit (not 64bit) for the math
108+
uint16_t r = R(c1);
109+
uint16_t g = G(c1);
110+
uint16_t b = B(c1);
111+
if (video) {
112+
uint16_t scale = amount; // 32bit for faster calculation
113+
// bugfix: doing "+1" after shifting is obviously wrong
114+
// optimization: ((r && scale) ? 1 : 0) can be simplified to "if (r > 0) +1" ; if we arive here, then scale != 0 and scale < 255
115+
if (w>0) scaledcolor |= (((w * scale) >> 8) +1) << 24; // WLEDMM small speedup when no white channel
116+
if (r>0) scaledcolor |= (((r * scale) >> 8) +1) << 16;
117+
if (g>0) scaledcolor |= (((g * scale) >> 8) +1) << 8;
118+
if (b>0) scaledcolor |= ((b * scale) >> 8) +1;
119+
return scaledcolor;
120+
}
121+
else {
122+
uint16_t scale = 1 + amount;
123+
if (w>0) scaledcolor |= ((w * scale) >> 8) << 24; // WLEDMM small speedup when no white channel
124+
scaledcolor |= ((r * scale) >> 8) << 16;
125+
scaledcolor |= (g * scale) & 0x0000FF00; // WLEDMM faster than right-left shift "" >>8 ) <<8"
126+
scaledcolor |= (b * scale) >> 8;
127+
return scaledcolor;
128+
}
129+
}
130+
131+
//scales the brightness with the briMultiplier factor (from led.cpp)
132+
extern uint_fast16_t briMultiplier; // defined in wled.h
133+
inline __attribute__((hot,const)) byte scaledBri(byte in) // WLEDMM added IRAM_ATTR_YN
134+
{
135+
if (briMultiplier == 100) return(in); // WLEDMM shortcut
136+
uint_fast16_t val = ((uint_fast16_t)in*(uint_fast16_t)briMultiplier)/100; // WLEDMM
137+
if (val > 255) val = 255;
138+
return (byte)val;
139+
}
140+
141+
//
142+
// overwrite FastLed colorFromPalette with an optimized version created by dedehai (https://github.com/Aircoookie/WLED/pull/4138)
143+
//
144+
// 1:1 replacement of fastled function optimized for ESP, slightly faster, more accurate and uses less flash (~ -200bytes)
145+
// WLEDMM: converted to inline
146+
#undef ColorFromPalette // overwrite any existing override
147+
inline __attribute__((hot)) CRGB ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t brightness=255, TBlendType blendType=LINEARBLEND)
148+
{
149+
if (blendType == LINEARBLEND_NOWRAP) {
150+
index = (index*240) >> 8; // Blend range is affected by lo4 blend of values, remap to avoid wrapping
151+
}
152+
unsigned hi4 = byte(index) >> 4;
153+
const CRGB* entry = (CRGB*)((uint8_t*)(&(pal[0])) + (hi4 * sizeof(CRGB)));
154+
unsigned red1 = entry->r;
155+
unsigned green1 = entry->g;
156+
unsigned blue1 = entry->b;
157+
if (blendType != NOBLEND) {
158+
if (hi4 == 15) entry = &(pal[0]);
159+
else ++entry;
160+
unsigned f2 = ((index & 0x0F) << 4) + 1; // +1 so we scale by 256 as a max value, then result can just be shifted by 8
161+
unsigned f1 = (257 - f2); // f2 is 1 minimum, so this is 256 max
162+
red1 = (red1 * f1 + (unsigned)entry->r * f2) >> 8;
163+
green1 = (green1 * f1 + (unsigned)entry->g * f2) >> 8;
164+
blue1 = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;
165+
}
166+
if (brightness < 255) { // note: zero checking could be done to return black but that is hardly ever used so it is omitted
167+
uint32_t scale = brightness + 1; // adjust for rounding (bitshift)
168+
red1 = (red1 * scale) >> 8;
169+
green1 = (green1 * scale) >> 8;
170+
blue1 = (blue1 * scale) >> 8;
171+
}
172+
return RGBW32(red1,green1,blue1,0);
173+
}
174+
#define ColorFromPalette ColorFromPaletteWLED // override fastled function
175+
176+
#endif

wled00/colors.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "wled.h"
22

3+
#if !defined(ARDUINO_ARCH_ESP32) || !defined(WLEDMM_FASTPATH) || defined(WLEDMM_SAVE_FLASH) // WLEDMM: color utils moved into colorTools.hpp, so comiler can inline calls (up to 12% faster)
34
/*
45
* Color conversion & utility methods
56
*/
@@ -108,6 +109,7 @@ IRAM_ATTR_YN __attribute__((hot)) uint32_t color_fade(uint32_t c1, uint8_t amoun
108109
return scaledcolor;
109110
}
110111
}
112+
#endif
111113

112114
void setRandomColor(byte* rgb)
113115
{

wled00/fcn_declare.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,14 @@ bool getJsonValue(const JsonVariant& element, DestType& destination, const Defau
5050

5151

5252
//colors.cpp
53+
#if !defined(ARDUINO_ARCH_ESP32) || !defined(WLEDMM_FASTPATH) || defined(WLEDMM_SAVE_FLASH) // WLEDMM: color utils moved into colorTools.hpp, so the compiler may inline these functions (faster)
5354
uint32_t __attribute__((const)) color_blend(uint32_t,uint32_t,uint_fast16_t,bool b16=false); // WLEDMM: added attribute const
5455
uint32_t __attribute__((const)) color_add(uint32_t,uint32_t, bool fast=false); // WLEDMM: added attribute const
5556
uint32_t __attribute__((const)) color_fade(uint32_t c1, uint8_t amount, bool video=false);
57+
#else
58+
#include "colorTools.hpp"
59+
#endif
60+
5661
inline uint32_t colorFromRgbw(byte* rgbw) { return uint32_t((byte(rgbw[3]) << 24) | (byte(rgbw[0]) << 16) | (byte(rgbw[1]) << 8) | (byte(rgbw[2]))); }
5762
void colorHStoRGB(uint16_t hue, byte sat, byte* rgb); //hue, sat to rgb
5863
void colorKtoRGB(uint16_t kelvin, byte* rgb);
@@ -179,7 +184,10 @@ void stateUpdated(byte callMode);
179184
void updateInterfaces(uint8_t callMode);
180185
void handleTransitions();
181186
void handleNightlight();
187+
188+
#if !defined(ARDUINO_ARCH_ESP32) || !defined(WLEDMM_FASTPATH) || defined(WLEDMM_SAVE_FLASH) // WLEDMM: color utils moved into colorTools.hpp, so comiler can inline calls (up to 12% faster)
182189
byte __attribute__((pure)) scaledBri(byte in); // WLEDMM: added attribute pure
190+
#endif
183191

184192
#ifdef WLED_ENABLE_LOXONE
185193
//lx_parser.cpp

wled00/led.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ void toggleOnOff()
6767
stateChanged = true;
6868
}
6969

70-
70+
#if !defined(ARDUINO_ARCH_ESP32) || !defined(WLEDMM_FASTPATH) || defined(WLEDMM_SAVE_FLASH) // WLEDMM color utils moved into colorTools.hpp for performance reasons
7171
//scales the brightness with the briMultiplier factor
7272
IRAM_ATTR_YN __attribute__((hot)) byte scaledBri(byte in) // WLEDMM added IRAM_ATTR_YN
7373
{
@@ -76,7 +76,7 @@ IRAM_ATTR_YN __attribute__((hot)) byte scaledBri(byte in) // WLEDMM added IRAM_
7676
if (val > 255) val = 255;
7777
return (byte)val;
7878
}
79-
79+
#endif
8080

8181
//applies global brightness
8282
void applyBri() {

0 commit comments

Comments
 (0)