lib/goog/html/safeurl.js

1// Copyright 2013 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview The SafeUrl type and its builders.
17 *
18 * TODO(user): Link to document stating type contract.
19 */
20
21goog.provide('goog.html.SafeUrl');
22
23goog.require('goog.asserts');
24goog.require('goog.fs.url');
25goog.require('goog.i18n.bidi.Dir');
26goog.require('goog.i18n.bidi.DirectionalString');
27goog.require('goog.string.Const');
28goog.require('goog.string.TypedString');
29
30
31
32/**
33 * A string that is safe to use in URL context in DOM APIs and HTML documents.
34 *
35 * A SafeUrl is a string-like object that carries the security type contract
36 * that its value as a string will not cause untrusted script execution
37 * when evaluated as a hyperlink URL in a browser.
38 *
39 * Values of this type are guaranteed to be safe to use in URL/hyperlink
40 * contexts, such as, assignment to URL-valued DOM properties, or
41 * interpolation into a HTML template in URL context (e.g., inside a href
42 * attribute), in the sense that the use will not result in a
43 * Cross-Site-Scripting vulnerability.
44 *
45 * Note that, as documented in {@code goog.html.SafeUrl.unwrap}, this type's
46 * contract does not guarantee that instances are safe to interpolate into HTML
47 * without appropriate escaping.
48 *
49 * Note also that this type's contract does not imply any guarantees regarding
50 * the resource the URL refers to. In particular, SafeUrls are <b>not</b>
51 * safe to use in a context where the referred-to resource is interpreted as
52 * trusted code, e.g., as the src of a script tag.
53 *
54 * Instances of this type must be created via the factory methods
55 * ({@code goog.html.SafeUrl.fromConstant}, {@code goog.html.SafeUrl.sanitize}),
56 * etc and not by invoking its constructor. The constructor intentionally
57 * takes no parameters and the type is immutable; hence only a default instance
58 * corresponding to the empty string can be obtained via constructor invocation.
59 *
60 * @see goog.html.SafeUrl#fromConstant
61 * @see goog.html.SafeUrl#from
62 * @see goog.html.SafeUrl#sanitize
63 * @constructor
64 * @final
65 * @struct
66 * @implements {goog.i18n.bidi.DirectionalString}
67 * @implements {goog.string.TypedString}
68 */
69goog.html.SafeUrl = function() {
70 /**
71 * The contained value of this SafeUrl. The field has a purposely ugly
72 * name to make (non-compiled) code that attempts to directly access this
73 * field stand out.
74 * @private {string}
75 */
76 this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = '';
77
78 /**
79 * A type marker used to implement additional run-time type checking.
80 * @see goog.html.SafeUrl#unwrap
81 * @const
82 * @private
83 */
84 this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ =
85 goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_;
86};
87
88
89/**
90 * The innocuous string generated by goog.html.SafeUrl.sanitize when passed
91 * an unsafe URL.
92 *
93 * about:invalid is registered in
94 * http://www.w3.org/TR/css3-values/#about-invalid.
95 * http://tools.ietf.org/html/rfc6694#section-2.2.1 permits about URLs to
96 * contain a fragment, which is not to be considered when determining if an
97 * about URL is well-known.
98 *
99 * Using about:invalid seems preferable to using a fixed data URL, since
100 * browsers might choose to not report CSP violations on it, as legitimate
101 * CSS function calls to attr() can result in this URL being produced. It is
102 * also a standard URL which matches exactly the semantics we need:
103 * "The about:invalid URI references a non-existent document with a generic
104 * error condition. It can be used when a URI is necessary, but the default
105 * value shouldn't be resolveable as any type of document".
106 *
107 * @const {string}
108 */
109goog.html.SafeUrl.INNOCUOUS_STRING = 'about:invalid#zClosurez';
110
111
112/**
113 * @override
114 * @const
115 */
116goog.html.SafeUrl.prototype.implementsGoogStringTypedString = true;
117
118
119/**
120 * Returns this SafeUrl's value a string.
121 *
122 * IMPORTANT: In code where it is security relevant that an object's type is
123 * indeed {@code SafeUrl}, use {@code goog.html.SafeUrl.unwrap} instead of this
124 * method. If in doubt, assume that it's security relevant. In particular, note
125 * that goog.html functions which return a goog.html type do not guarantee that
126 * the returned instance is of the right type. For example:
127 *
128 * <pre>
129 * var fakeSafeHtml = new String('fake');
130 * fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype;
131 * var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml);
132 * // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by
133 * // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml instanceof
134 * // goog.html.SafeHtml.
135 * </pre>
136 *
137 * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the
138 * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST
139 * be appropriately escaped before embedding in a HTML document. Note that the
140 * required escaping is context-sensitive (e.g. a different escaping is
141 * required for embedding a URL in a style property within a style
142 * attribute, as opposed to embedding in a href attribute).
143 *
144 * @see goog.html.SafeUrl#unwrap
145 * @override
146 */
147goog.html.SafeUrl.prototype.getTypedStringValue = function() {
148 return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
149};
150
151
152/**
153 * @override
154 * @const
155 */
156goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString = true;
157
158
159/**
160 * Returns this URLs directionality, which is always {@code LTR}.
161 * @override
162 */
163goog.html.SafeUrl.prototype.getDirection = function() {
164 return goog.i18n.bidi.Dir.LTR;
165};
166
167
168if (goog.DEBUG) {
169 /**
170 * Returns a debug string-representation of this value.
171 *
172 * To obtain the actual string value wrapped in a SafeUrl, use
173 * {@code goog.html.SafeUrl.unwrap}.
174 *
175 * @see goog.html.SafeUrl#unwrap
176 * @override
177 */
178 goog.html.SafeUrl.prototype.toString = function() {
179 return 'SafeUrl{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ +
180 '}';
181 };
182}
183
184
185/**
186 * Performs a runtime check that the provided object is indeed a SafeUrl
187 * object, and returns its value.
188 *
189 * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the
190 * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST
191 * be appropriately escaped before embedding in a HTML document. Note that the
192 * required escaping is context-sensitive (e.g. a different escaping is
193 * required for embedding a URL in a style property within a style
194 * attribute, as opposed to embedding in a href attribute).
195 *
196 * Note that the returned value does not necessarily correspond to the string
197 * with which the SafeUrl was constructed, since goog.html.SafeUrl.sanitize
198 * will percent-encode many characters.
199 *
200 * @param {!goog.html.SafeUrl} safeUrl The object to extract from.
201 * @return {string} The SafeUrl object's contained string, unless the run-time
202 * type check fails. In that case, {@code unwrap} returns an innocuous
203 * string, or, if assertions are enabled, throws
204 * {@code goog.asserts.AssertionError}.
205 */
206goog.html.SafeUrl.unwrap = function(safeUrl) {
207 // Perform additional Run-time type-checking to ensure that safeUrl is indeed
208 // an instance of the expected type. This provides some additional protection
209 // against security bugs due to application code that disables type checks.
210 // Specifically, the following checks are performed:
211 // 1. The object is an instance of the expected type.
212 // 2. The object is not an instance of a subclass.
213 // 3. The object carries a type marker for the expected type. "Faking" an
214 // object requires a reference to the type marker, which has names intended
215 // to stand out in code reviews.
216 if (safeUrl instanceof goog.html.SafeUrl &&
217 safeUrl.constructor === goog.html.SafeUrl &&
218 safeUrl.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ ===
219 goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) {
220 return safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
221 } else {
222 goog.asserts.fail('expected object of type SafeUrl, got \'' +
223 safeUrl + '\'');
224 return 'type_error:SafeUrl';
225
226 }
227};
228
229
230/**
231 * Creates a SafeUrl object from a compile-time constant string.
232 *
233 * Compile-time constant strings are inherently program-controlled and hence
234 * trusted.
235 *
236 * @param {!goog.string.Const} url A compile-time-constant string from which to
237 * create a SafeUrl.
238 * @return {!goog.html.SafeUrl} A SafeUrl object initialized to {@code url}.
239 */
240goog.html.SafeUrl.fromConstant = function(url) {
241 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(
242 goog.string.Const.unwrap(url));
243};
244
245
246/**
247 * A pattern that matches Blob types that can have SafeUrls created from
248 * URL.createObjectURL(blob). Only matches image types, currently.
249 * @const
250 * @private
251 */
252goog.html.SAFE_BLOB_TYPE_PATTERN_ =
253 /^image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)$/i;
254
255
256/**
257 * Creates a SafeUrl wrapping a blob URL for the given {@code blob}. The
258 * blob URL is created with {@code URL.createObjectURL}. If the MIME type
259 * for {@code blob} is not of a known safe image MIME type, then the
260 * SafeUrl will wrap {@link #INNOCUOUS_STRING}.
261 * @see http://www.w3.org/TR/FileAPI/#url
262 * @param {!Blob} blob
263 * @return {!goog.html.SafeUrl} The blob URL, or an innocuous string wrapped
264 * as a SafeUrl.
265 */
266goog.html.SafeUrl.fromBlob = function(blob) {
267 var url = goog.html.SAFE_BLOB_TYPE_PATTERN_.test(blob.type) ?
268 goog.fs.url.createObjectUrl(blob) : goog.html.SafeUrl.INNOCUOUS_STRING;
269 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url);
270};
271
272
273/**
274 * A pattern that recognizes a commonly useful subset of URLs that satisfy
275 * the SafeUrl contract.
276 *
277 * This regular expression matches a subset of URLs that will not cause script
278 * execution if used in URL context within a HTML document. Specifically, this
279 * regular expression matches if (comment from here on and regex copied from
280 * Soy's EscapingConventions):
281 * (1) Either a protocol in a whitelist (http, https, mailto or ftp).
282 * (2) or no protocol. A protocol must be followed by a colon. The below
283 * allows that by allowing colons only after one of the characters [/?#].
284 * A colon after a hash (#) must be in the fragment.
285 * Otherwise, a colon after a (?) must be in a query.
286 * Otherwise, a colon after a single solidus (/) must be in a path.
287 * Otherwise, a colon after a double solidus (//) must be in the authority
288 * (before port).
289 *
290 * The pattern disallows &, used in HTML entity declarations before
291 * one of the characters in [/?#]. This disallows HTML entities used in the
292 * protocol name, which should never happen, e.g. "h&#116;tp" for "http".
293 * It also disallows HTML entities in the first path part of a relative path,
294 * e.g. "foo&lt;bar/baz". Our existing escaping functions should not produce
295 * that. More importantly, it disallows masking of a colon,
296 * e.g. "javascript&#58;...".
297 *
298 * @private
299 * @const {!RegExp}
300 */
301goog.html.SAFE_URL_PATTERN_ =
302 /^(?:(?:https?|mailto|ftp):|[^&:/?#]*(?:[/?#]|$))/i;
303
304
305/**
306 * Creates a SafeUrl object from {@code url}. If {@code url} is a
307 * goog.html.SafeUrl then it is simply returned. Otherwise the input string is
308 * validated to match a pattern of commonly used safe URLs. The string is
309 * converted to UTF-8 and non-whitelisted characters are percent-encoded. The
310 * string wrapped by the created SafeUrl will thus contain only ASCII printable
311 * characters.
312 *
313 * {@code url} may be a URL with the http, https, mailto or ftp scheme,
314 * or a relative URL (i.e., a URL without a scheme; specifically, a
315 * scheme-relative, absolute-path-relative, or path-relative URL).
316 *
317 * {@code url} is converted to UTF-8 and non-whitelisted characters are
318 * percent-encoded. Whitelisted characters are '%' and, from RFC 3986,
319 * unreserved characters and reserved characters, with the exception of '\'',
320 * '(' and ')'. This ensures the the SafeUrl contains only ASCII-printable
321 * characters and reduces the chance of security bugs were it to be
322 * interpolated into a specific context without the necessary escaping.
323 *
324 * If {@code url} fails validation or does not UTF-16 decode correctly
325 * (JavaScript strings are UTF-16 encoded), this function returns a SafeUrl
326 * object containing an innocuous string, goog.html.SafeUrl.INNOCUOUS_STRING.
327 *
328 * @see http://url.spec.whatwg.org/#concept-relative-url
329 * @param {string|!goog.string.TypedString} url The URL to validate.
330 * @return {!goog.html.SafeUrl} The validated URL, wrapped as a SafeUrl.
331 */
332goog.html.SafeUrl.sanitize = function(url) {
333 if (url instanceof goog.html.SafeUrl) {
334 return url;
335 }
336 else if (url.implementsGoogStringTypedString) {
337 url = url.getTypedStringValue();
338 } else {
339 url = String(url);
340 }
341 if (!goog.html.SAFE_URL_PATTERN_.test(url)) {
342 url = goog.html.SafeUrl.INNOCUOUS_STRING;
343 } else {
344 url = goog.html.SafeUrl.normalize_(url);
345 }
346 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url);
347};
348
349
350/**
351 * Normalizes {@code url} the UTF-8 encoding of url, using a whitelist of
352 * characters. Whitelisted characters are not percent-encoded.
353 * @param {string} url The URL to normalize.
354 * @return {string} The normalized URL.
355 * @private
356 */
357goog.html.SafeUrl.normalize_ = function(url) {
358 try {
359 var normalized = encodeURI(url);
360 } catch (e) { // Happens if url contains invalid surrogate sequences.
361 return goog.html.SafeUrl.INNOCUOUS_STRING;
362 }
363
364 return normalized.replace(
365 goog.html.SafeUrl.NORMALIZE_MATCHER_,
366 function(match) {
367 return goog.html.SafeUrl.NORMALIZE_REPLACER_MAP_[match];
368 });
369};
370
371
372/**
373 * Matches characters and strings which need to be replaced in the string
374 * generated by encodeURI. Specifically:
375 *
376 * - '\'', '(' and ')' are not encoded. They are part of the reserved
377 * characters group in RFC 3986 but only appear in the obsolete mark
378 * production in Appendix D.2 of RFC 3986, so they can be encoded without
379 * changing semantics.
380 * - '[' and ']' are encoded by encodeURI, despite being reserved characters
381 * which can be used to represent IPv6 addresses. So they need to be decoded.
382 * - '%' is encoded by encodeURI. However, encoding '%' characters that are
383 * already part of a valid percent-encoded sequence changes the semantics of a
384 * URL, and hence we need to preserve them. Note that this may allow
385 * non-encoded '%' characters to remain in the URL (i.e., occurrences of '%'
386 * that are not part of a valid percent-encoded sequence, for example,
387 * 'ab%xy').
388 *
389 * @const {!RegExp}
390 * @private
391 */
392goog.html.SafeUrl.NORMALIZE_MATCHER_ = /[()']|%5B|%5D|%25/g;
393
394
395/**
396 * Map of replacements to be done in string generated by encodeURI.
397 * @const {!Object<string, string>}
398 * @private
399 */
400goog.html.SafeUrl.NORMALIZE_REPLACER_MAP_ = {
401 '\'': '%27',
402 '(': '%28',
403 ')': '%29',
404 '%5B': '[',
405 '%5D': ']',
406 '%25': '%'
407};
408
409
410/**
411 * Type marker for the SafeUrl type, used to implement additional run-time
412 * type checking.
413 * @const
414 * @private
415 */
416goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {};
417
418
419/**
420 * Package-internal utility method to create SafeUrl instances.
421 *
422 * @param {string} url The string to initialize the SafeUrl object with.
423 * @return {!goog.html.SafeUrl} The initialized SafeUrl object.
424 * @package
425 */
426goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse = function(
427 url) {
428 var safeUrl = new goog.html.SafeUrl();
429 safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = url;
430 return safeUrl;
431};