lib/goog/html/safeurl.js

1// Copyright 2013 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview The SafeUrl type and its builders.
17 *
18 * TODO(xtof): Link to document stating type contract.
19 */
20
21goog.provide('goog.html.SafeUrl');
22
23goog.require('goog.asserts');
24goog.require('goog.fs.url');
25goog.require('goog.i18n.bidi.Dir');
26goog.require('goog.i18n.bidi.DirectionalString');
27goog.require('goog.string.Const');
28goog.require('goog.string.TypedString');
29
30
31
32/**
33 * A string that is safe to use in URL context in DOM APIs and HTML documents.
34 *
35 * A SafeUrl is a string-like object that carries the security type contract
36 * that its value as a string will not cause untrusted script execution
37 * when evaluated as a hyperlink URL in a browser.
38 *
39 * Values of this type are guaranteed to be safe to use in URL/hyperlink
40 * contexts, such as, assignment to URL-valued DOM properties, or
41 * interpolation into a HTML template in URL context (e.g., inside a href
42 * attribute), in the sense that the use will not result in a
43 * Cross-Site-Scripting vulnerability.
44 *
45 * Note that, as documented in {@code goog.html.SafeUrl.unwrap}, this type's
46 * contract does not guarantee that instances are safe to interpolate into HTML
47 * without appropriate escaping.
48 *
49 * Note also that this type's contract does not imply any guarantees regarding
50 * the resource the URL refers to. In particular, SafeUrls are <b>not</b>
51 * safe to use in a context where the referred-to resource is interpreted as
52 * trusted code, e.g., as the src of a script tag.
53 *
54 * Instances of this type must be created via the factory methods
55 * ({@code goog.html.SafeUrl.fromConstant}, {@code goog.html.SafeUrl.sanitize}),
56 * etc and not by invoking its constructor. The constructor intentionally
57 * takes no parameters and the type is immutable; hence only a default instance
58 * corresponding to the empty string can be obtained via constructor invocation.
59 *
60 * @see goog.html.SafeUrl#fromConstant
61 * @see goog.html.SafeUrl#from
62 * @see goog.html.SafeUrl#sanitize
63 * @constructor
64 * @final
65 * @struct
66 * @implements {goog.i18n.bidi.DirectionalString}
67 * @implements {goog.string.TypedString}
68 */
69goog.html.SafeUrl = function() {
70 /**
71 * The contained value of this SafeUrl. The field has a purposely ugly
72 * name to make (non-compiled) code that attempts to directly access this
73 * field stand out.
74 * @private {string}
75 */
76 this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = '';
77
78 /**
79 * A type marker used to implement additional run-time type checking.
80 * @see goog.html.SafeUrl#unwrap
81 * @const
82 * @private
83 */
84 this.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ =
85 goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_;
86};
87
88
89/**
90 * The innocuous string generated by goog.html.SafeUrl.sanitize when passed
91 * an unsafe URL.
92 *
93 * about:invalid is registered in
94 * http://www.w3.org/TR/css3-values/#about-invalid.
95 * http://tools.ietf.org/html/rfc6694#section-2.2.1 permits about URLs to
96 * contain a fragment, which is not to be considered when determining if an
97 * about URL is well-known.
98 *
99 * Using about:invalid seems preferable to using a fixed data URL, since
100 * browsers might choose to not report CSP violations on it, as legitimate
101 * CSS function calls to attr() can result in this URL being produced. It is
102 * also a standard URL which matches exactly the semantics we need:
103 * "The about:invalid URI references a non-existent document with a generic
104 * error condition. It can be used when a URI is necessary, but the default
105 * value shouldn't be resolveable as any type of document".
106 *
107 * @const {string}
108 */
109goog.html.SafeUrl.INNOCUOUS_STRING = 'about:invalid#zClosurez';
110
111
112/**
113 * @override
114 * @const
115 */
116goog.html.SafeUrl.prototype.implementsGoogStringTypedString = true;
117
118
119/**
120 * Returns this SafeUrl's value a string.
121 *
122 * IMPORTANT: In code where it is security relevant that an object's type is
123 * indeed {@code SafeUrl}, use {@code goog.html.SafeUrl.unwrap} instead of this
124 * method. If in doubt, assume that it's security relevant. In particular, note
125 * that goog.html functions which return a goog.html type do not guarantee that
126 * the returned instance is of the right type. For example:
127 *
128 * <pre>
129 * var fakeSafeHtml = new String('fake');
130 * fakeSafeHtml.__proto__ = goog.html.SafeHtml.prototype;
131 * var newSafeHtml = goog.html.SafeHtml.htmlEscape(fakeSafeHtml);
132 * // newSafeHtml is just an alias for fakeSafeHtml, it's passed through by
133 * // goog.html.SafeHtml.htmlEscape() as fakeSafeHtml instanceof
134 * // goog.html.SafeHtml.
135 * </pre>
136 *
137 * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the
138 * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST
139 * be appropriately escaped before embedding in a HTML document. Note that the
140 * required escaping is context-sensitive (e.g. a different escaping is
141 * required for embedding a URL in a style property within a style
142 * attribute, as opposed to embedding in a href attribute).
143 *
144 * @see goog.html.SafeUrl#unwrap
145 * @override
146 */
147goog.html.SafeUrl.prototype.getTypedStringValue = function() {
148 return this.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
149};
150
151
152/**
153 * @override
154 * @const
155 */
156goog.html.SafeUrl.prototype.implementsGoogI18nBidiDirectionalString = true;
157
158
159/**
160 * Returns this URLs directionality, which is always {@code LTR}.
161 * @override
162 */
163goog.html.SafeUrl.prototype.getDirection = function() {
164 return goog.i18n.bidi.Dir.LTR;
165};
166
167
168if (goog.DEBUG) {
169 /**
170 * Returns a debug string-representation of this value.
171 *
172 * To obtain the actual string value wrapped in a SafeUrl, use
173 * {@code goog.html.SafeUrl.unwrap}.
174 *
175 * @see goog.html.SafeUrl#unwrap
176 * @override
177 */
178 goog.html.SafeUrl.prototype.toString = function() {
179 return 'SafeUrl{' + this.privateDoNotAccessOrElseSafeHtmlWrappedValue_ +
180 '}';
181 };
182}
183
184
185/**
186 * Performs a runtime check that the provided object is indeed a SafeUrl
187 * object, and returns its value.
188 *
189 * IMPORTANT: The guarantees of the SafeUrl type contract only extend to the
190 * behavior of browsers when interpreting URLs. Values of SafeUrl objects MUST
191 * be appropriately escaped before embedding in a HTML document. Note that the
192 * required escaping is context-sensitive (e.g. a different escaping is
193 * required for embedding a URL in a style property within a style
194 * attribute, as opposed to embedding in a href attribute).
195 *
196 * @param {!goog.html.SafeUrl} safeUrl The object to extract from.
197 * @return {string} The SafeUrl object's contained string, unless the run-time
198 * type check fails. In that case, {@code unwrap} returns an innocuous
199 * string, or, if assertions are enabled, throws
200 * {@code goog.asserts.AssertionError}.
201 */
202goog.html.SafeUrl.unwrap = function(safeUrl) {
203 // Perform additional Run-time type-checking to ensure that safeUrl is indeed
204 // an instance of the expected type. This provides some additional protection
205 // against security bugs due to application code that disables type checks.
206 // Specifically, the following checks are performed:
207 // 1. The object is an instance of the expected type.
208 // 2. The object is not an instance of a subclass.
209 // 3. The object carries a type marker for the expected type. "Faking" an
210 // object requires a reference to the type marker, which has names intended
211 // to stand out in code reviews.
212 if (safeUrl instanceof goog.html.SafeUrl &&
213 safeUrl.constructor === goog.html.SafeUrl &&
214 safeUrl.SAFE_URL_TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ ===
215 goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_) {
216 return safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_;
217 } else {
218 goog.asserts.fail('expected object of type SafeUrl, got \'' +
219 safeUrl + '\'');
220 return 'type_error:SafeUrl';
221
222 }
223};
224
225
226/**
227 * Creates a SafeUrl object from a compile-time constant string.
228 *
229 * Compile-time constant strings are inherently program-controlled and hence
230 * trusted.
231 *
232 * @param {!goog.string.Const} url A compile-time-constant string from which to
233 * create a SafeUrl.
234 * @return {!goog.html.SafeUrl} A SafeUrl object initialized to {@code url}.
235 */
236goog.html.SafeUrl.fromConstant = function(url) {
237 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(
238 goog.string.Const.unwrap(url));
239};
240
241
242/**
243 * A pattern that matches Blob or data types that can have SafeUrls created
244 * from URL.createObjectURL(blob) or via a data: URI. Only matches image and
245 * video types, currently.
246 * @const
247 * @private
248 */
249goog.html.SAFE_MIME_TYPE_PATTERN_ =
250 /^(?:image\/(?:bmp|gif|jpeg|jpg|png|tiff|webp)|video\/(?:mpeg|mp4|ogg|webm))$/i;
251
252
253/**
254 * Creates a SafeUrl wrapping a blob URL for the given {@code blob}.
255 *
256 * The blob URL is created with {@code URL.createObjectURL}. If the MIME type
257 * for {@code blob} is not of a known safe image or video MIME type, then the
258 * SafeUrl will wrap {@link #INNOCUOUS_STRING}.
259 *
260 * @see http://www.w3.org/TR/FileAPI/#url
261 * @param {!Blob} blob
262 * @return {!goog.html.SafeUrl} The blob URL, or an innocuous string wrapped
263 * as a SafeUrl.
264 */
265goog.html.SafeUrl.fromBlob = function(blob) {
266 var url = goog.html.SAFE_MIME_TYPE_PATTERN_.test(blob.type) ?
267 goog.fs.url.createObjectUrl(blob) : goog.html.SafeUrl.INNOCUOUS_STRING;
268 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url);
269};
270
271
272/**
273 * Matches a base-64 data URL, with the first match group being the MIME type.
274 * @const
275 * @private
276 */
277goog.html.DATA_URL_PATTERN_ = /^data:([^;,]*);base64,[a-z0-9+\/]+=*$/i;
278
279
280/**
281 * Creates a SafeUrl wrapping a data: URL, after validating it matches a
282 * known-safe image or video MIME type.
283 *
284 * @param {string} dataUrl A valid base64 data URL with one of the whitelisted
285 * image or video MIME types.
286 * @return {!goog.html.SafeUrl} A matching safe URL, or {@link INNOCUOUS_STRING}
287 * wrapped as a SafeUrl if it does not pass.
288 */
289goog.html.SafeUrl.fromDataUrl = function(dataUrl) {
290 // There's a slight risk here that a browser sniffs the content type if it
291 // doesn't know the MIME type and executes HTML within the data: URL. For this
292 // to cause XSS it would also have to execute the HTML in the same origin
293 // of the page with the link. It seems unlikely that both of these will
294 // happen, particularly in not really old IEs.
295 var match = dataUrl.match(goog.html.DATA_URL_PATTERN_);
296 var valid = match && goog.html.SAFE_MIME_TYPE_PATTERN_.test(match[1]);
297 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(
298 valid ? dataUrl : goog.html.SafeUrl.INNOCUOUS_STRING);
299};
300
301
302/**
303 * A pattern that recognizes a commonly useful subset of URLs that satisfy
304 * the SafeUrl contract.
305 *
306 * This regular expression matches a subset of URLs that will not cause script
307 * execution if used in URL context within a HTML document. Specifically, this
308 * regular expression matches if (comment from here on and regex copied from
309 * Soy's EscapingConventions):
310 * (1) Either a protocol in a whitelist (http, https, mailto or ftp).
311 * (2) or no protocol. A protocol must be followed by a colon. The below
312 * allows that by allowing colons only after one of the characters [/?#].
313 * A colon after a hash (#) must be in the fragment.
314 * Otherwise, a colon after a (?) must be in a query.
315 * Otherwise, a colon after a single solidus (/) must be in a path.
316 * Otherwise, a colon after a double solidus (//) must be in the authority
317 * (before port).
318 *
319 * The pattern disallows &, used in HTML entity declarations before
320 * one of the characters in [/?#]. This disallows HTML entities used in the
321 * protocol name, which should never happen, e.g. "h&#116;tp" for "http".
322 * It also disallows HTML entities in the first path part of a relative path,
323 * e.g. "foo&lt;bar/baz". Our existing escaping functions should not produce
324 * that. More importantly, it disallows masking of a colon,
325 * e.g. "javascript&#58;...".
326 *
327 * @private
328 * @const {!RegExp}
329 */
330goog.html.SAFE_URL_PATTERN_ =
331 /^(?:(?:https?|mailto|ftp):|[^&:/?#]*(?:[/?#]|$))/i;
332
333
334/**
335 * Creates a SafeUrl object from {@code url}. If {@code url} is a
336 * goog.html.SafeUrl then it is simply returned. Otherwise the input string is
337 * validated to match a pattern of commonly used safe URLs. The string is
338 * converted to UTF-8 and non-whitelisted characters are percent-encoded. The
339 * string wrapped by the created SafeUrl will thus contain only ASCII printable
340 * characters.
341 *
342 * {@code url} may be a URL with the http, https, mailto or ftp scheme,
343 * or a relative URL (i.e., a URL without a scheme; specifically, a
344 * scheme-relative, absolute-path-relative, or path-relative URL).
345 *
346 * {@code url} is converted to UTF-8 and non-whitelisted characters are
347 * percent-encoded. Whitelisted characters are '%' and, from RFC 3986,
348 * unreserved characters and reserved characters, with the exception of '\'',
349 * '(' and ')'. This ensures the the SafeUrl contains only ASCII-printable
350 * characters and reduces the chance of security bugs were it to be
351 * interpolated into a specific context without the necessary escaping.
352 *
353 * If {@code url} fails validation or does not UTF-16 decode correctly
354 * (JavaScript strings are UTF-16 encoded), this function returns a SafeUrl
355 * object containing an innocuous string, goog.html.SafeUrl.INNOCUOUS_STRING.
356 *
357 * @see http://url.spec.whatwg.org/#concept-relative-url
358 * @param {string|!goog.string.TypedString} url The URL to validate.
359 * @return {!goog.html.SafeUrl} The validated URL, wrapped as a SafeUrl.
360 */
361goog.html.SafeUrl.sanitize = function(url) {
362 if (url instanceof goog.html.SafeUrl) {
363 return url;
364 }
365 else if (url.implementsGoogStringTypedString) {
366 url = url.getTypedStringValue();
367 } else {
368 url = String(url);
369 }
370 if (!goog.html.SAFE_URL_PATTERN_.test(url)) {
371 url = goog.html.SafeUrl.INNOCUOUS_STRING;
372 }
373 return goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse(url);
374};
375
376
377/**
378 * Type marker for the SafeUrl type, used to implement additional run-time
379 * type checking.
380 * @const {!Object}
381 * @private
382 */
383goog.html.SafeUrl.TYPE_MARKER_GOOG_HTML_SECURITY_PRIVATE_ = {};
384
385
386/**
387 * Package-internal utility method to create SafeUrl instances.
388 *
389 * @param {string} url The string to initialize the SafeUrl object with.
390 * @return {!goog.html.SafeUrl} The initialized SafeUrl object.
391 * @package
392 */
393goog.html.SafeUrl.createSafeUrlSecurityPrivateDoNotAccessOrElse = function(
394 url) {
395 var safeUrl = new goog.html.SafeUrl();
396 safeUrl.privateDoNotAccessOrElseSafeHtmlWrappedValue_ = url;
397 return safeUrl;
398};