Merge pull request #364 from melloware/O363-CVE

jmanico · web-flow · commit 40718acfa4c1 · 2025-12-30T18:23:00.000Z
Fix #363: CVE-2025-66021
diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java
@@ -94,8 +94,170 @@ public void closeDocument() {
 
   public void text(String textChunk) {
     if (!skipText) {
-      out.text(textChunk);
+      // Check if we're inside a CDATA element (style/script) with allowTextIn
+      // where tags are reclassified as UNESCAPED text and need to be validated
+      // Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
+      boolean insideCdataElement = false;
+      for (int i = openElementStack.size() - 1; i >= 0; i -= 2) {
+        String adjustedName = openElementStack.get(i);
+        if (adjustedName != null 
+            && allowedTextContainers.contains(adjustedName)
+            && ("style".equals(adjustedName) || "script".equals(adjustedName))) {
+          insideCdataElement = true;
+          break;
+        }
+      }
+      
+      // If inside a CDATA element (style/script) with allowTextIn, we need to filter out 
+      // HTML tags that aren't allowed because tags inside these blocks are reclassified 
+      // as UNESCAPED text by the lexer
+      if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) {
+        // Strip out HTML tags that aren't in the allowed elements list
+        String filtered = stripDisallowedTags(textChunk);
+        out.text(filtered);
+      } else {
+        out.text(textChunk);
+      }
+    }
+  }
+  
+  /**
+   * Strips out HTML tags that aren't in the allowed elements list from text content.
+   * This is used when tags appear inside text containers (like style blocks) where
+   * they're treated as text but should still be validated.
+   */
+  private String stripDisallowedTags(String text) {
+    if (text == null) {
+      return text;
+    }
+    
+    StringBuilder result = new StringBuilder();
+    int len = text.length();
+    int i = 0;
+    
+    while (i < len) {
+      int tagStart = text.indexOf('<', i);
+      if (tagStart < 0) {
+        // No more tags, append the rest
+        result.append(text.substring(i));
+        break;
+      }
+      
+      // Append text before the tag
+      if (tagStart > i) {
+        result.append(text.substring(i, tagStart));
+      }
+      
+      // Find the end of the tag (either '>' or end of string)
+      int tagEnd = text.indexOf('>', tagStart + 1);
+      if (tagEnd < 0) {
+        // Unclosed tag, skip it
+        i = tagStart + 1;
+        continue;
+      }
+      
+      // Extract the tag content (between < and >)
+      String tagContent = text.substring(tagStart + 1, tagEnd);
+      
+      // Only process if this looks like a valid HTML element tag
+      // Valid tags start with a letter or / followed by a letter
+      // Skip things like <, </>, <3, etc.
+      // Also handle tags with leading whitespace like < script>
+      boolean isValidTag = false;
+      String tagName = null;
+      
+      // Trim leading whitespace for tag name detection
+      String trimmedTagContent = tagContent.trim();
+      
+      if (trimmedTagContent.startsWith("/")) {
+        // Closing tag - must have / followed by a letter
+        if (trimmedTagContent.length() > 1) {
+          char firstChar = trimmedTagContent.charAt(1);
+          if (Character.isLetter(firstChar)) {
+            isValidTag = true;
+            tagName = trimmedTagContent.substring(1).trim().split("\\s")[0];
+            tagName = HtmlLexer.canonicalElementName(tagName);
+          }
+        }
+      } else {
+        // Opening tag - must start with a letter (after trimming whitespace)
+        if (trimmedTagContent.length() > 0) {
+          char firstChar = trimmedTagContent.charAt(0);
+          if (Character.isLetter(firstChar)) {
+            isValidTag = true;
+            tagName = trimmedTagContent.split("\\s")[0];
+            tagName = HtmlLexer.canonicalElementName(tagName);
+          }
+        }
+      }
+      
+      if (!isValidTag) {
+        // Not a valid HTML tag, just append it as-is
+        result.append('<').append(tagContent).append('>');
+        i = tagEnd + 1;
+        continue;
+      }
+      
+      // Check if it's a closing tag
+      if (tagContent.startsWith("/")) {
+        // Only allow closing tags if the element is allowed
+        if (elAndAttrPolicies.containsKey(tagName)) {
+          result.append('<').append(tagContent).append('>');
+        }
+        // Otherwise skip the closing tag
+        i = tagEnd + 1;
+      } else {
+        // Opening tag - only allow tags if the element is in the allowed list
+        if (elAndAttrPolicies.containsKey(tagName)) {
+          result.append('<').append(tagContent).append('>');
+          i = tagEnd + 1;
+        } else {
+          // Skip disallowed tag and its content until matching closing tag
+          i = tagEnd + 1;
+          // Track nesting level to find the matching closing tag
+          int nestingLevel = 1;
+          while (i < len && nestingLevel > 0) {
+            int nextTagStart = text.indexOf('<', i);
+            if (nextTagStart < 0) {
+              // No more tags, skip to end
+              i = len;
+              break;
+            }
+            int nextTagEnd = text.indexOf('>', nextTagStart + 1);
+            if (nextTagEnd < 0) {
+              // Unclosed tag, skip to end
+              i = len;
+              break;
+            }
+            String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd);
+            String trimmedNextTagContent = nextTagContent.trim();
+            String nextTagName = trimmedNextTagContent.split("\\s")[0];
+            if (trimmedNextTagContent.startsWith("/")) {
+              // Closing tag
+              nextTagName = nextTagName.substring(1);
+              nextTagName = HtmlLexer.canonicalElementName(nextTagName);
+              if (nextTagName.equals(tagName)) {
+                nestingLevel--;
+                if (nestingLevel == 0) {
+                  // Found matching closing tag, skip it and continue
+                  i = nextTagEnd + 1;
+                  break;
+                }
+              }
+            } else {
+              // Opening tag
+              nextTagName = HtmlLexer.canonicalElementName(nextTagName);
+              if (nextTagName.equals(tagName)) {
+                nestingLevel++;
+              }
+            }
+            i = nextTagEnd + 1;
+          }
+        }
+      }
     }
+    
+    return result.toString();
   }
 
   public void openTag(String elementName, List<String> attrs) {
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
@@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase {
   public final void testHtmlLexer() throws Exception {
     // Do the lexing.
     String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
+    // Normalize line endings in input to handle Windows/Unix differences
+    input = input.replace("\r\n", "\n").replace("\r", "\n");
     StringBuilder actual = new StringBuilder();
     lex(input, actual);
 
     // Get the golden.
     String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
+    // Normalize line endings to handle Windows/Unix differences
+    golden = golden.replace("\r\n", "\n").replace("\r", "\n");
+    String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n");
 
     // Compare.
-    assertEquals(golden, actual.toString());
+    assertEquals(golden, actualStr);
   }
 
   @Test
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java
@@ -162,4 +162,4 @@ public void run() {
     }
   }
 
-}
+}
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java
@@ -454,6 +454,158 @@ public static final void testStylingCornerCase() {
     assertEquals(want, sanitize(input));
   }
 
+  /**
+   * These 5 tests cover regression scenarios for CVE-2025-66021, which relates to
+   * improper sanitization of HTML content involving <style> and <noscript> tags.
+   * The tests ensure that HTMLSanitizer:
+   *   - properly closes any opened elements,
+   *   - only allows allowed elements inside <style> blocks,
+   *   - prevents injection of forbidden HTML or scripts within style or noscript,
+   *   - does not allow unexpected element escape or context breaking.
+   */
+
+  /**
+   * Test #1:
+   * Verify that unallowed elements (<div>) injected inside <style> are removed,
+   * and only allowed content (CSS and allowed elements) remain.
+   */
+  @Test
+  public static final void testCVE202566021_1() {
+    // Arrange: Attempt to inject a <div> inside <style>. Only 'style' and 'noscript' are allowed.
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"evil\">XSS?</div></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  /**
+   * Test #2:
+   * Ensure that <script> tags (attempting script injection) are stripped out
+   * even when they appear inside allowed <style> tags.
+   */
+  @Test
+  public static final void testCVE202566021_2() {
+    // Arrange: Attempt to inject a <script> inside <style>. Only 'style' and 'noscript' are allowed.
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<script>alert('XSS Attack!')</script></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  /**
+   * Test #3:
+   * Ensure that, if <div> is allowed, then <div> injected inside <style>
+   * is retained by the sanitizer (since it is now in the policy).
+   */
+  @Test
+  public static final void testCVE202566021_3() {
+    // Arrange: <div> is now allowed, so it should survive sanitization inside <style>.
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript", "div")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  /**
+   * Test #4:
+   * Confirm that an attempt to prematurely close <style> with </noscript>, then inject a script,
+   * does not allow the injected script. Sanitizer closes elements properly and only emits allowed tags.
+   */
+  @Test
+  public static final void testCVE202566021_4() {
+    // Arrange: Try to break out of <style> and <noscript>, then add a script. Only style/noscript/p allowed.
+    String actualPayload = "<noscript><style></noscript><script>alert(1)</script>";
+    String expectedPayload = "<noscript><style></noscript></style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript", "p")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  /**
+   * Test #5:
+   * Like Test #4, but with <p> instead of <noscript>. Ensures sanitizer emits correctly closed tags
+   * and strips the injected script tag completely.
+   */
+  @Test
+  public static final void testCVE202566021_5() {
+    // Arrange: Try to break out of <style> through <p>, then add a script. Only style/noscript/p allowed.
+    String actualPayload = "<p><style></p><script>alert(1)</script>";
+    String expectedPayload = "<p><style></p></style></p>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript", "p")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  /**
+   * Test that <script> tags with space < script> are sanitized correctly.
+   */
+  @Test
+  public static final void testCVE202566021_6() {
+    // Arrange: Attempt to inject a <script> inside <style>. Only 'style' and 'noscript' elements are allowed.
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }< script>alert('XSS Attack!')</script></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
   private static String sanitize(@Nullable String html) {
     StringBuilder sb = new StringBuilder();
     HtmlStreamRenderer renderer = HtmlStreamRenderer.create(

Original file line number	Diff line number	Diff line change
`@@ -162,4 +162,4 @@ public void run() {`
`162`	`162`	`}`
`163`	`163`	`}`
`164`	`164`
`165`		`-}`
	`165`	`+}`