|
| 1 | +// Copyright (c) 2019, Mike Samuel |
| 2 | +// All rights reserved. |
| 3 | +// |
| 4 | +// Redistribution and use in source and binary forms, with or without |
| 5 | +// modification, are permitted provided that the following conditions |
| 6 | +// are met: |
| 7 | +// |
| 8 | +// Redistributions of source code must retain the above copyright |
| 9 | +// notice, this list of conditions and the following disclaimer. |
| 10 | +// Redistributions in binary form must reproduce the above copyright |
| 11 | +// notice, this list of conditions and the following disclaimer in the |
| 12 | +// documentation and/or other materials provided with the distribution. |
| 13 | +// Neither the name of the OWASP nor the names of its contributors may |
| 14 | +// be used to endorse or promote products derived from this software |
| 15 | +// without specific prior written permission. |
| 16 | +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 19 | +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 20 | +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 21 | +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 22 | +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 23 | +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 24 | +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 25 | +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| 26 | +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 27 | +// POSSIBILITY OF SUCH DAMAGE. |
| 28 | + |
| 29 | +package org.owasp.html; |
| 30 | + |
| 31 | +/** |
| 32 | + * Applies a URL policy to all URLs in a srcset attribute value. |
| 33 | + * <p> |
| 34 | + * https://html.spec.whatwg.org/multipage/images.html#srcset-attributes |
| 35 | + * explains srcset for images and other media content. |
| 36 | + * <p> |
| 37 | + * There is a pending draft to use it on <script> to allow loading |
| 38 | + * scripts compatible with different versions of JS. |
| 39 | + * <p> |
| 40 | + * The general form of srcset is |
| 41 | + * <pre> |
| 42 | + * srcset ::== space* srcplus ([,] space* srcplus)* |
| 43 | + * # Additionally, the URL may not start or end with a comma |
| 44 | + * srcplus ::== URL (space+ metadata)? |
| 45 | + * metadata ::== FLOAT [a-z]? |
| 46 | + * </pre> |
| 47 | + * <p> |
| 48 | + * This policy applies the given attribute policy to URLs and emits metadata |
| 49 | + * as given, but normalizing spaces. |
| 50 | + */ |
| 51 | +final class SrcsetAttributePolicy implements AttributePolicy { |
| 52 | + |
| 53 | + private final AttributePolicy srcPolicy; |
| 54 | + |
| 55 | + SrcsetAttributePolicy(AttributePolicy srcPolicy) { |
| 56 | + this.srcPolicy = srcPolicy; |
| 57 | + } |
| 58 | + |
| 59 | + public String apply(String elementName, String attributeName, String value) { |
| 60 | + StringBuilder sb = new StringBuilder(); |
| 61 | + |
| 62 | + int i = 0, n = value.length(); |
| 63 | + // Skip spaces. |
| 64 | + while (i < n && Strings.isHtmlSpace(value.charAt(i))) { |
| 65 | + ++i; |
| 66 | + } |
| 67 | + |
| 68 | + while (i < n) { |
| 69 | + // Find URL end. |
| 70 | + int urlStart = i; |
| 71 | + while (i < n && !Strings.isHtmlSpace(value.charAt(i))) { |
| 72 | + ++i; |
| 73 | + } |
| 74 | + int urlEnd = i; |
| 75 | + // Find metadata end. |
| 76 | + while (i < n && Strings.isHtmlSpace(value.charAt(i))) { |
| 77 | + ++i; |
| 78 | + } |
| 79 | + int metadataStart = i; |
| 80 | + if (urlEnd < i) { // Space required before metadata. |
| 81 | + int floatEnd = Strings.skipValidFloatingPointNumber(value, i); |
| 82 | + if (floatEnd >= 0) { |
| 83 | + i = floatEnd; |
| 84 | + if (i < n) { |
| 85 | + // Skip over width specifier 'w', or pixel density specifier 'x'. |
| 86 | + // We make this optional to support the <script srcset> proposal. |
| 87 | + int ch = value.charAt(i) | 32; |
| 88 | + if ('a' <= ch && ch <= 'z') { |
| 89 | + ++i; |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + } |
| 94 | + int metadataEnd = i; |
| 95 | + |
| 96 | + if (urlStart < urlEnd) { |
| 97 | + if (value.charAt(urlStart) == ',' || value.charAt(urlEnd - 1) == ',') { |
| 98 | + // These introduce lexical ambiguity and are called out in the spec. |
| 99 | + return null; |
| 100 | + } |
| 101 | + String okUrl = srcPolicy.apply( |
| 102 | + elementName, "src", value.substring(urlStart, urlEnd)); |
| 103 | + if (okUrl != null && !okUrl.isEmpty()) { |
| 104 | + if (sb.length() != 0) { |
| 105 | + sb.append(", "); |
| 106 | + } |
| 107 | + sb.append(okUrl.replace(",", "%2c")); |
| 108 | + if (metadataStart < metadataEnd) { |
| 109 | + sb.append(' '); |
| 110 | + sb.append(value, metadataStart, metadataEnd); |
| 111 | + } |
| 112 | + } |
| 113 | + } |
| 114 | + // Skip space before comma |
| 115 | + while (i < n && Strings.isHtmlSpace(value.charAt(i))) { |
| 116 | + ++i; |
| 117 | + } |
| 118 | + if (i == n || value.charAt(i) != ',') { |
| 119 | + break; |
| 120 | + } |
| 121 | + ++i; |
| 122 | + while (i < n && Strings.isHtmlSpace(value.charAt(i))) { |
| 123 | + ++i; |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + if (i < n // Unexpected trailing content. |
| 128 | + || sb.length() == 0) { // No URLs found. |
| 129 | + return null; |
| 130 | + } |
| 131 | + return sb.toString(); |
| 132 | + } |
| 133 | + |
| 134 | +} |
0 commit comments