Skip to content

Commit 40718ac

Browse files
authored
Merge pull request #364 from melloware/O363-CVE
Fix #363: CVE-2025-66021
2 parents 742cbc0 + 4149cf0 commit 40718ac

4 files changed

Lines changed: 322 additions & 3 deletions

File tree

owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,170 @@ public void closeDocument() {
9494

9595
public void text(String textChunk) {
9696
if (!skipText) {
97-
out.text(textChunk);
97+
// Check if we're inside a CDATA element (style/script) with allowTextIn
98+
// where tags are reclassified as UNESCAPED text and need to be validated
99+
// Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
100+
boolean insideCdataElement = false;
101+
for (int i = openElementStack.size() - 1; i >= 0; i -= 2) {
102+
String adjustedName = openElementStack.get(i);
103+
if (adjustedName != null
104+
&& allowedTextContainers.contains(adjustedName)
105+
&& ("style".equals(adjustedName) || "script".equals(adjustedName))) {
106+
insideCdataElement = true;
107+
break;
108+
}
109+
}
110+
111+
// If inside a CDATA element (style/script) with allowTextIn, we need to filter out
112+
// HTML tags that aren't allowed because tags inside these blocks are reclassified
113+
// as UNESCAPED text by the lexer
114+
if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) {
115+
// Strip out HTML tags that aren't in the allowed elements list
116+
String filtered = stripDisallowedTags(textChunk);
117+
out.text(filtered);
118+
} else {
119+
out.text(textChunk);
120+
}
121+
}
122+
}
123+
124+
/**
125+
* Strips out HTML tags that aren't in the allowed elements list from text content.
126+
* This is used when tags appear inside text containers (like style blocks) where
127+
* they're treated as text but should still be validated.
128+
*/
129+
private String stripDisallowedTags(String text) {
130+
if (text == null) {
131+
return text;
132+
}
133+
134+
StringBuilder result = new StringBuilder();
135+
int len = text.length();
136+
int i = 0;
137+
138+
while (i < len) {
139+
int tagStart = text.indexOf('<', i);
140+
if (tagStart < 0) {
141+
// No more tags, append the rest
142+
result.append(text.substring(i));
143+
break;
144+
}
145+
146+
// Append text before the tag
147+
if (tagStart > i) {
148+
result.append(text.substring(i, tagStart));
149+
}
150+
151+
// Find the end of the tag (either '>' or end of string)
152+
int tagEnd = text.indexOf('>', tagStart + 1);
153+
if (tagEnd < 0) {
154+
// Unclosed tag, skip it
155+
i = tagStart + 1;
156+
continue;
157+
}
158+
159+
// Extract the tag content (between < and >)
160+
String tagContent = text.substring(tagStart + 1, tagEnd);
161+
162+
// Only process if this looks like a valid HTML element tag
163+
// Valid tags start with a letter or / followed by a letter
164+
// Skip things like <, </>, <3, etc.
165+
// Also handle tags with leading whitespace like < script>
166+
boolean isValidTag = false;
167+
String tagName = null;
168+
169+
// Trim leading whitespace for tag name detection
170+
String trimmedTagContent = tagContent.trim();
171+
172+
if (trimmedTagContent.startsWith("/")) {
173+
// Closing tag - must have / followed by a letter
174+
if (trimmedTagContent.length() > 1) {
175+
char firstChar = trimmedTagContent.charAt(1);
176+
if (Character.isLetter(firstChar)) {
177+
isValidTag = true;
178+
tagName = trimmedTagContent.substring(1).trim().split("\\s")[0];
179+
tagName = HtmlLexer.canonicalElementName(tagName);
180+
}
181+
}
182+
} else {
183+
// Opening tag - must start with a letter (after trimming whitespace)
184+
if (trimmedTagContent.length() > 0) {
185+
char firstChar = trimmedTagContent.charAt(0);
186+
if (Character.isLetter(firstChar)) {
187+
isValidTag = true;
188+
tagName = trimmedTagContent.split("\\s")[0];
189+
tagName = HtmlLexer.canonicalElementName(tagName);
190+
}
191+
}
192+
}
193+
194+
if (!isValidTag) {
195+
// Not a valid HTML tag, just append it as-is
196+
result.append('<').append(tagContent).append('>');
197+
i = tagEnd + 1;
198+
continue;
199+
}
200+
201+
// Check if it's a closing tag
202+
if (tagContent.startsWith("/")) {
203+
// Only allow closing tags if the element is allowed
204+
if (elAndAttrPolicies.containsKey(tagName)) {
205+
result.append('<').append(tagContent).append('>');
206+
}
207+
// Otherwise skip the closing tag
208+
i = tagEnd + 1;
209+
} else {
210+
// Opening tag - only allow tags if the element is in the allowed list
211+
if (elAndAttrPolicies.containsKey(tagName)) {
212+
result.append('<').append(tagContent).append('>');
213+
i = tagEnd + 1;
214+
} else {
215+
// Skip disallowed tag and its content until matching closing tag
216+
i = tagEnd + 1;
217+
// Track nesting level to find the matching closing tag
218+
int nestingLevel = 1;
219+
while (i < len && nestingLevel > 0) {
220+
int nextTagStart = text.indexOf('<', i);
221+
if (nextTagStart < 0) {
222+
// No more tags, skip to end
223+
i = len;
224+
break;
225+
}
226+
int nextTagEnd = text.indexOf('>', nextTagStart + 1);
227+
if (nextTagEnd < 0) {
228+
// Unclosed tag, skip to end
229+
i = len;
230+
break;
231+
}
232+
String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd);
233+
String trimmedNextTagContent = nextTagContent.trim();
234+
String nextTagName = trimmedNextTagContent.split("\\s")[0];
235+
if (trimmedNextTagContent.startsWith("/")) {
236+
// Closing tag
237+
nextTagName = nextTagName.substring(1);
238+
nextTagName = HtmlLexer.canonicalElementName(nextTagName);
239+
if (nextTagName.equals(tagName)) {
240+
nestingLevel--;
241+
if (nestingLevel == 0) {
242+
// Found matching closing tag, skip it and continue
243+
i = nextTagEnd + 1;
244+
break;
245+
}
246+
}
247+
} else {
248+
// Opening tag
249+
nextTagName = HtmlLexer.canonicalElementName(nextTagName);
250+
if (nextTagName.equals(tagName)) {
251+
nestingLevel++;
252+
}
253+
}
254+
i = nextTagEnd + 1;
255+
}
256+
}
257+
}
98258
}
259+
260+
return result.toString();
99261
}
100262

101263
public void openTag(String elementName, List<String> attrs) {

owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase {
4646
public final void testHtmlLexer() throws Exception {
4747
// Do the lexing.
4848
String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
49+
// Normalize line endings in input to handle Windows/Unix differences
50+
input = input.replace("\r\n", "\n").replace("\r", "\n");
4951
StringBuilder actual = new StringBuilder();
5052
lex(input, actual);
5153

5254
// Get the golden.
5355
String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
56+
// Normalize line endings to handle Windows/Unix differences
57+
golden = golden.replace("\r\n", "\n").replace("\r", "\n");
58+
String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n");
5459

5560
// Compare.
56-
assertEquals(golden, actual.toString());
61+
assertEquals(golden, actualStr);
5762
}
5863

5964
@Test

owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,4 +162,4 @@ public void run() {
162162
}
163163
}
164164

165-
}
165+
}

owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,158 @@ public static final void testStylingCornerCase() {
454454
assertEquals(want, sanitize(input));
455455
}
456456

457+
/**
458+
* These 5 tests cover regression scenarios for CVE-2025-66021, which relates to
459+
* improper sanitization of HTML content involving <style> and <noscript> tags.
460+
* The tests ensure that HTMLSanitizer:
461+
* - properly closes any opened elements,
462+
* - only allows allowed elements inside <style> blocks,
463+
* - prevents injection of forbidden HTML or scripts within style or noscript,
464+
* - does not allow unexpected element escape or context breaking.
465+
*/
466+
467+
/**
468+
* Test #1:
469+
* Verify that unallowed elements (<div>) injected inside <style> are removed,
470+
* and only allowed content (CSS and allowed elements) remain.
471+
*/
472+
@Test
473+
public static final void testCVE202566021_1() {
474+
// Arrange: Attempt to inject a <div> inside <style>. Only 'style' and 'noscript' are allowed.
475+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"evil\">XSS?</div></style></noscript>";
476+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
477+
478+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
479+
PolicyFactory policy = htmlPolicyBuilder
480+
.allowElements("style", "noscript")
481+
.allowTextIn("style")
482+
.toFactory();
483+
484+
// Act
485+
String sanitized = policy.sanitize(actualPayload);
486+
487+
// Assert
488+
assertEquals(expectedPayload, sanitized);
489+
}
490+
491+
/**
492+
* Test #2:
493+
* Ensure that <script> tags (attempting script injection) are stripped out
494+
* even when they appear inside allowed <style> tags.
495+
*/
496+
@Test
497+
public static final void testCVE202566021_2() {
498+
// Arrange: Attempt to inject a <script> inside <style>. Only 'style' and 'noscript' are allowed.
499+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<script>alert('XSS Attack!')</script></style></noscript>";
500+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
501+
502+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
503+
PolicyFactory policy = htmlPolicyBuilder
504+
.allowElements("style", "noscript")
505+
.allowTextIn("style")
506+
.toFactory();
507+
508+
// Act
509+
String sanitized = policy.sanitize(actualPayload);
510+
511+
// Assert
512+
assertEquals(expectedPayload, sanitized);
513+
}
514+
515+
/**
516+
* Test #3:
517+
* Ensure that, if <div> is allowed, then <div> injected inside <style>
518+
* is retained by the sanitizer (since it is now in the policy).
519+
*/
520+
@Test
521+
public static final void testCVE202566021_3() {
522+
// Arrange: <div> is now allowed, so it should survive sanitization inside <style>.
523+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
524+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
525+
526+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
527+
PolicyFactory policy = htmlPolicyBuilder
528+
.allowElements("style", "noscript", "div")
529+
.allowTextIn("style")
530+
.toFactory();
531+
532+
// Act
533+
String sanitized = policy.sanitize(actualPayload);
534+
535+
// Assert
536+
assertEquals(expectedPayload, sanitized);
537+
}
538+
539+
/**
540+
* Test #4:
541+
* Confirm that an attempt to prematurely close <style> with </noscript>, then inject a script,
542+
* does not allow the injected script. Sanitizer closes elements properly and only emits allowed tags.
543+
*/
544+
@Test
545+
public static final void testCVE202566021_4() {
546+
// Arrange: Try to break out of <style> and <noscript>, then add a script. Only style/noscript/p allowed.
547+
String actualPayload = "<noscript><style></noscript><script>alert(1)</script>";
548+
String expectedPayload = "<noscript><style></noscript></style></noscript>";
549+
550+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
551+
PolicyFactory policy = htmlPolicyBuilder
552+
.allowElements("style", "noscript", "p")
553+
.allowTextIn("style")
554+
.toFactory();
555+
556+
// Act
557+
String sanitized = policy.sanitize(actualPayload);
558+
559+
// Assert
560+
assertEquals(expectedPayload, sanitized);
561+
}
562+
563+
/**
564+
* Test #5:
565+
* Like Test #4, but with <p> instead of <noscript>. Ensures sanitizer emits correctly closed tags
566+
* and strips the injected script tag completely.
567+
*/
568+
@Test
569+
public static final void testCVE202566021_5() {
570+
// Arrange: Try to break out of <style> through <p>, then add a script. Only style/noscript/p allowed.
571+
String actualPayload = "<p><style></p><script>alert(1)</script>";
572+
String expectedPayload = "<p><style></p></style></p>";
573+
574+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
575+
PolicyFactory policy = htmlPolicyBuilder
576+
.allowElements("style", "noscript", "p")
577+
.allowTextIn("style")
578+
.toFactory();
579+
580+
// Act
581+
String sanitized = policy.sanitize(actualPayload);
582+
583+
// Assert
584+
assertEquals(expectedPayload, sanitized);
585+
}
586+
587+
/**
588+
* Test that <script> tags with space < script> are sanitized correctly.
589+
*/
590+
@Test
591+
public static final void testCVE202566021_6() {
592+
// Arrange: Attempt to inject a <script> inside <style>. Only 'style' and 'noscript' elements are allowed.
593+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }< script>alert('XSS Attack!')</script></style></noscript>";
594+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
595+
596+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
597+
PolicyFactory policy = htmlPolicyBuilder
598+
.allowElements("style", "noscript")
599+
.allowTextIn("style")
600+
.toFactory();
601+
602+
// Act
603+
String sanitized = policy.sanitize(actualPayload);
604+
605+
// Assert
606+
assertEquals(expectedPayload, sanitized);
607+
}
608+
457609
private static String sanitize(@Nullable String html) {
458610
StringBuilder sb = new StringBuilder();
459611
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(

0 commit comments

Comments
 (0)