Skip to content

Commit 2ca4284

Browse files
committed
Properly parse srcset attributes
Fixes issue #112 Previously, extra URL handling for `src`, `href` and related attributes was naively applied to `srcset`. This failed safe but mangled content. I consolidated (in a previous commit) the code that ensures that URL and `style` attributes have extra checks done: the global URL and style policies respectively. This commit adds special handling for srcset which uses the global URL policy to vet each URL policy and leaves the metadata portion alone. Commas in the middle of URLs in a `srcset` value will be escaped to `%2c`.
1 parent 6557bfb commit 2ca4284

5 files changed

Lines changed: 302 additions & 1 deletion

File tree

change_log.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
Most recent at top.
44
* Pending
5+
* Properly parse `srcset` attribute values to apply URL policy to
6+
each URL in turn.
57
* Update dependency on guava version to 27.1-jre to avoid causing clients
68
problems with CVE-2018-10237. Specify Maven property `guava.version`
79
to override.

src/main/java/org/owasp/html/HtmlPolicyBuilder.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ AttributePolicy makeGuard(AttributeGuardIntermediates intermediates) {
610610
for (String urlAttributeName : new String[] {
611611
"action", "archive", "background", "cite", "classid", "codebase", "data",
612612
"dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
613-
"profile", "src", "srcset", "usemap",
613+
"profile", "src", "usemap",
614614
}) {
615615
b.put(urlAttributeName, identityGuard);
616616
}
@@ -634,6 +634,14 @@ public String apply(String url) {
634634
});
635635
}
636636

637+
});
638+
b.put("srcset", new AttributeGuardMaker() {
639+
640+
@Override
641+
AttributePolicy makeGuard(AttributeGuardIntermediates intermediates) {
642+
return new SrcsetAttributePolicy(intermediates.urlAttributePolicy);
643+
}
644+
637645
});
638646
ATTRIBUTE_GUARDS = b.build();
639647
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// Copyright (c) 2019, Mike Samuel
2+
// All rights reserved.
3+
//
4+
// Redistribution and use in source and binary forms, with or without
5+
// modification, are permitted provided that the following conditions
6+
// are met:
7+
//
8+
// Redistributions of source code must retain the above copyright
9+
// notice, this list of conditions and the following disclaimer.
10+
// Redistributions in binary form must reproduce the above copyright
11+
// notice, this list of conditions and the following disclaimer in the
12+
// documentation and/or other materials provided with the distribution.
13+
// Neither the name of the OWASP nor the names of its contributors may
14+
// be used to endorse or promote products derived from this software
15+
// without specific prior written permission.
16+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19+
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20+
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21+
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22+
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23+
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24+
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25+
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26+
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27+
// POSSIBILITY OF SUCH DAMAGE.
28+
29+
package org.owasp.html;
30+
31+
/**
32+
* Applies a URL policy to all URLs in a srcset attribute value.
33+
* <p>
34+
* https://html.spec.whatwg.org/multipage/images.html#srcset-attributes
35+
* explains srcset for images and other media content.
36+
* <p>
37+
* There is a pending draft to use it on <script> to allow loading
38+
* scripts compatible with different versions of JS.
39+
* <p>
40+
* The general form of srcset is
41+
* <pre>
42+
* srcset ::== space* srcplus ([,] space* srcplus)*
43+
* # Additionally, the URL may not start or end with a comma
44+
* srcplus ::== URL (space+ metadata)?
45+
* metadata ::== FLOAT [a-z]?
46+
* </pre>
47+
* <p>
48+
* This policy applies the given attribute policy to URLs and emits metadata
49+
* as given, but normalizing spaces.
50+
*/
51+
final class SrcsetAttributePolicy implements AttributePolicy {
52+
53+
private final AttributePolicy srcPolicy;
54+
55+
SrcsetAttributePolicy(AttributePolicy srcPolicy) {
56+
this.srcPolicy = srcPolicy;
57+
}
58+
59+
public String apply(String elementName, String attributeName, String value) {
60+
StringBuilder sb = new StringBuilder();
61+
62+
int i = 0, n = value.length();
63+
// Skip spaces.
64+
while (i < n && Strings.isHtmlSpace(value.charAt(i))) {
65+
++i;
66+
}
67+
68+
while (i < n) {
69+
// Find URL end.
70+
int urlStart = i;
71+
while (i < n && !Strings.isHtmlSpace(value.charAt(i))) {
72+
++i;
73+
}
74+
int urlEnd = i;
75+
// Find metadata end.
76+
while (i < n && Strings.isHtmlSpace(value.charAt(i))) {
77+
++i;
78+
}
79+
int metadataStart = i;
80+
if (urlEnd < i) { // Space required before metadata.
81+
int floatEnd = Strings.skipValidFloatingPointNumber(value, i);
82+
if (floatEnd >= 0) {
83+
i = floatEnd;
84+
if (i < n) {
85+
// Skip over width specifier 'w', or pixel density specifier 'x'.
86+
// We make this optional to support the <script srcset> proposal.
87+
int ch = value.charAt(i) | 32;
88+
if ('a' <= ch && ch <= 'z') {
89+
++i;
90+
}
91+
}
92+
}
93+
}
94+
int metadataEnd = i;
95+
96+
if (urlStart < urlEnd) {
97+
if (value.charAt(urlStart) == ',' || value.charAt(urlEnd - 1) == ',') {
98+
// These introduce lexical ambiguity and are called out in the spec.
99+
return null;
100+
}
101+
String okUrl = srcPolicy.apply(
102+
elementName, "src", value.substring(urlStart, urlEnd));
103+
if (okUrl != null && !okUrl.isEmpty()) {
104+
if (sb.length() != 0) {
105+
sb.append(", ");
106+
}
107+
sb.append(okUrl.replace(",", "%2c"));
108+
if (metadataStart < metadataEnd) {
109+
sb.append(' ');
110+
sb.append(value, metadataStart, metadataEnd);
111+
}
112+
}
113+
}
114+
// Skip space before comma
115+
while (i < n && Strings.isHtmlSpace(value.charAt(i))) {
116+
++i;
117+
}
118+
if (i == n || value.charAt(i) != ',') {
119+
break;
120+
}
121+
++i;
122+
while (i < n && Strings.isHtmlSpace(value.charAt(i))) {
123+
++i;
124+
}
125+
}
126+
127+
if (i < n // Unexpected trailing content.
128+
|| sb.length() == 0) { // No URLs found.
129+
return null;
130+
}
131+
return sb.toString();
132+
}
133+
134+
}

src/main/java/org/owasp/html/Strings.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,5 +181,86 @@ static String stripHtmlSpaces(String s) {
181181
return s.substring(i, n);
182182
}
183183

184+
/**
185+
* Parses a valid floating point number per the HTML5 spec.
186+
* https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#valid-floating-point-number
187+
*
188+
* @param start the start of the floating point number on s.
189+
* @return the end of the floating point number if valid or -1 if not.
190+
*/
191+
static int skipValidFloatingPointNumber(String value, int start) {
192+
// A string is a valid floating-point number if it consists of:
193+
int i = start;
194+
final int n = value.length();
195+
196+
if (i >= n) {
197+
return -1;
198+
}
199+
200+
// 1. Optionally, a U+002D HYPHEN-MINUS character (-).
201+
if (value.charAt(i) == '-') {
202+
++i;
203+
}
204+
// 2. One or both of the following, in the given order:
205+
boolean hasIntegerPart = false;
206+
// 1. A series of one or more ASCII digits.
207+
while (i < n) {
208+
char ch = value.charAt(i);
209+
if ('0' <= ch && ch <= '9') {
210+
++i;
211+
hasIntegerPart = true;
212+
} else {
213+
break;
214+
}
215+
}
216+
// 2. Both of the following, in the given order:
217+
// 1. A single U+002E FULL STOP character (.).
218+
// 2. A series of one or more ASCII digits.
219+
if (i < n && value.charAt(i) == '.') {
220+
++i;
221+
while (i < n) {
222+
char ch = value.charAt(i);
223+
if ('0' <= ch && ch <= '9') {
224+
++i;
225+
hasIntegerPart = true;
226+
} else {
227+
break;
228+
}
229+
}
230+
}
231+
if (!hasIntegerPart) {
232+
return -1;
233+
}
234+
// 3. Optionally:
235+
// 1. Either a U+0065 LATIN SMALL LETTER E character (e)
236+
// or a U+0045 LATIN CAPITAL LETTER E character (E).
237+
if (i < n && (value.charAt(i) | 32) == 'e') {
238+
++i;
239+
// 2. Optionally, a U+002D HYPHEN-MINUS character (-) or
240+
// U+002B PLUS SIGN character (+).
241+
if (i < n) {
242+
char ch = value.charAt(i);
243+
if (ch == '+' || ch == '-') {
244+
++i;
245+
}
246+
}
247+
// 3. A series of one or more ASCII digits.
248+
boolean hasExponent = false;
249+
while (i < n) {
250+
char ch = value.charAt(i);
251+
if ('0' <= ch && ch <= '9') {
252+
++i;
253+
hasExponent = true;
254+
} else {
255+
break;
256+
}
257+
}
258+
if (!hasExponent) {
259+
return -1;
260+
}
261+
}
262+
return i;
263+
}
264+
184265
private Strings() { /* uninstantiable */ }
185266
}

src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import java.util.List;
3232
import java.util.Locale;
33+
import java.util.regex.Pattern;
3334

3435
import org.junit.Test;
3536

@@ -350,6 +351,81 @@ public String apply(String elementName, List<String> attrs) {
350351
+ "<IMAGE>"));
351352
}
352353

354+
@Test
355+
public static final void testImgSrcsetSyntax() {
356+
assertEquals(
357+
""
358+
+ "<img srcset=\"http://example.com/foo.png\" />\n"
359+
+ "<img srcset=\"http://example.com/foo.png 640w\" />\n"
360+
+ "<img srcset=\"http://example.com/foo.png 48x\" />\n"
361+
+ "<img srcset=\"http://example.com/foo.png .123x\" />\n"
362+
+ "<img srcset=\"http://example.com/foo.png .123e2x\" />\n"
363+
+ "<img srcset=\"http://example.com/foo.png 123.456x\" />\n"
364+
+ "<img srcset=\"/big.png 64w, /little.png\" />\n"
365+
+ "<img srcset=\"/big.png 64w, /little.png\" />\n"
366+
+ "<img srcset=\"/big.png 64w, /little.png\" />\n"
367+
+ "<img srcset=\"foo%2cbar.png\" />\n"
368+
+ "empty: \n"
369+
+ "only space: \n"
370+
+ "only comma: \n"
371+
+ "comma at end: <img srcset=\"foo.png\" />\n"
372+
+ "comma stuck to url: \n"
373+
+ "commas inside: <img srcset=\"foo.png%2c%2cbar.png\" />\n"
374+
+ "double commas 1: \n"
375+
+ "double commas 2: \n"
376+
+ "bad url: <img srcset=\"foo.png 1w\" />\n",
377+
378+
apply(
379+
new HtmlPolicyBuilder()
380+
.allowElements("img")
381+
.allowAttributes("srcset").onElements("img")
382+
.allowStandardUrlProtocols(),
383+
""
384+
+ "<img srcset=\"http://example.com/foo.png\" />\n"
385+
+ "<img srcset=\"http://example.com/foo.png 640w\" />\n"
386+
+ "<img srcset=\"http://example.com/foo.png 48x\" />\n"
387+
+ "<img srcset=\"http://example.com/foo.png .123x\" />\n"
388+
+ "<img srcset=\"http://example.com/foo.png .123e2x\" />\n"
389+
+ "<img srcset=\"http://example.com/foo.png 123.456x\" />\n"
390+
+ "<img srcset=\"/big.png 64w, /little.png\" />\n"
391+
+ "<img srcset=\" /big.png 64w , /little.png\" />\n"
392+
+ "<img srcset=\"\t\t/big.png 64w\r\n,/little.png\t\t\" />\n"
393+
+ "<img srcset=\"foo,bar.png\" />\n"
394+
+ "empty: <img srcset=\"\" />\n"
395+
+ "only space: <img srcset=\" \" />\n"
396+
+ "only comma: <img srcset=\",\" />\n"
397+
+ "comma at end: <img srcset=\"foo.png ,\" />\n" // ok
398+
+ "comma stuck to url: <img srcset=\"bar.png,\" />\n" // not ok
399+
+ "commas inside: <img srcset=\"foo.png,,bar.png\" />\n" // escaped
400+
+ "double commas 1: <img srcset=\"a ,, b\" />\n" // not ok
401+
+ "double commas 2: <img srcset=\"a , , b\" />\n" // not ok
402+
+ "bad url: <img srcset=\"foo.png 1w, javascript:evil()\" />\n"
403+
));
404+
}
405+
406+
@Test
407+
public static final void testUrlChecksLayer() {
408+
assertEquals(
409+
""
410+
+ "<img src=\"http://example.com/OK.png\" />\n"
411+
+ "\n"
412+
+ "<img srcset=\"http://example.com/bar.png#OK 1w\" />",
413+
414+
apply(
415+
new HtmlPolicyBuilder()
416+
.allowElements("img")
417+
.allowAttributes("src", "srcset")
418+
.matching(Pattern.compile(".*OK.*"))
419+
.onElements("img")
420+
.allowStandardUrlProtocols(),
421+
""
422+
+ "<img src=\"http://example.com/OK.png\" />\n"
423+
+ "<img src=\"http://example.com/\" />\n"
424+
+ "<img srcset=\"http://example.com/bar.png#OK 1w, javascript:alert%28%27OK%27%29\">"
425+
)
426+
);
427+
}
428+
353429
@Test
354430
public static final void testDuplicateAttributesDoNotReachElementPolicy() {
355431
final int[] idCount = new int[1];

0 commit comments

Comments
 (0)