blob: 21ccb13e5b2980e0db8098f1c6f230ea136b0f69 [file] [log] [blame]
Avi Drissman3e1a26c2022-09-15 20:26:031// Copyright 2015 The Chromium Authors
mkwst28c7c112015-07-14 22:41:062// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "url/scheme_host_port.h"
6
avic0c60312015-12-21 21:03:507#include <stdint.h>
mkwst28c7c112015-07-14 22:41:068#include <string.h>
9
Chris Fredrickson29f143f2025-01-17 17:42:2910#include <compare>
Tom Sepezb03f3f82021-12-09 23:43:3811#include <ostream>
David Benjaminbf75caf2023-09-29 04:39:5912#include <string_view>
jsbell938b0252015-11-30 23:17:4113#include <tuple>
14
Hans Wennborg0e223682020-04-27 21:51:2915#include "base/check_op.h"
Jan Wilken Dörrie721926d2020-12-09 23:57:4716#include "base/containers/contains.h"
Hans Wennborg0e223682020-04-27 21:51:2917#include "base/notreached.h"
tyoshino11a7c9fe2015-08-19 08:51:4618#include "base/numerics/safe_conversions.h"
mkwst28c7c112015-07-14 22:41:0619#include "base/strings/string_number_conversions.h"
Xiaochen Zhou4350e462023-08-21 15:06:1220#include "base/trace_event/memory_usage_estimator.h"
mkwst28c7c112015-07-14 22:41:0621#include "url/gurl.h"
csharrison048bee12016-10-04 00:08:2122#include "url/third_party/mozilla/url_parse.h"
mkwst28c7c112015-07-14 22:41:0623#include "url/url_canon.h"
24#include "url/url_canon_stdstring.h"
25#include "url/url_constants.h"
Hayato Itocddb08782024-03-13 01:02:2426#include "url/url_features.h"
mkwst28c7c112015-07-14 22:41:0627#include "url/url_util.h"
28
29namespace url {
30
tyoshino11a7c9fe2015-08-19 08:51:4631namespace {
mkwst28c7c112015-07-14 22:41:0632
Daniel Clarkdc916842024-10-08 18:28:1933bool IsCanonicalHost(std::string_view host, bool is_file_scheme) {
msramek9b7972dd2015-08-18 13:04:1934 std::string canon_host;
tyoshino11a7c9fe2015-08-19 08:51:4635
36 // Try to canonicalize the host (copy/pasted from net/base. :( ).
37 const Component raw_host_component(0,
38 base::checked_cast<int>(host.length()));
39 StdStringCanonOutput canon_host_output(&canon_host);
40 CanonHostInfo host_info;
Daniel Clarkdc916842024-10-08 18:28:1941 if (is_file_scheme) {
42 CanonicalizeFileHostVerbose(host.data(), raw_host_component,
43 canon_host_output, host_info);
44 } else {
45 CanonicalizeSpecialHostVerbose(host.data(), raw_host_component,
46 canon_host_output, host_info);
47 }
mkwst28c7c112015-07-14 22:41:0648
49 if (host_info.out_host.is_nonempty() &&
tyoshino11a7c9fe2015-08-19 08:51:4650 host_info.family != CanonHostInfo::BROKEN) {
mkwst28c7c112015-07-14 22:41:0651 // Success! Assert that there's no extra garbage.
52 canon_host_output.Complete();
53 DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
54 } else {
55 // Empty host, or canonicalization failed.
56 canon_host.clear();
57 }
mkwst28c7c112015-07-14 22:41:0658
tyoshino11a7c9fe2015-08-19 08:51:4659 return host == canon_host;
mkwst28c7c112015-07-14 22:41:0660}
61
Lukasz Anforowicz6a6a05d2021-01-12 01:40:0762// Note: When changing IsValidInput, consider also updating
63// ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
64// behavior between these 2 layers, but we should avoid introducing new
65// differences).
Daniel Cheng3462e2a62024-09-13 01:23:2166bool IsValidInput(std::string_view scheme,
67 std::string_view host,
csharrisonedf893f2016-10-12 01:42:5668 uint16_t port,
69 SchemeHostPort::ConstructPolicy policy) {
Daniel Chengc2b752b2018-11-28 00:03:2970 // Empty schemes are never valid.
71 if (scheme.empty())
72 return false;
73
Lukasz Anforowicz6a6a05d2021-01-12 01:40:0774 // about:blank and other no-access schemes translate into an opaque origin.
75 // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
76 if (base::Contains(GetNoAccessSchemes(), scheme))
77 return false;
78
Nick Carter123ca192018-03-30 23:25:3679 SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
tyoshino11a7c9fe2015-08-19 08:51:4680 bool is_standard = GetStandardSchemeType(
81 scheme.data(),
82 Component(0, base::checked_cast<int>(scheme.length())),
83 &scheme_type);
Nick Carter8aa718ed2018-09-07 21:39:5184 if (!is_standard) {
Lukasz Anforowicz6a6a05d2021-01-12 01:40:0785 // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
86 // non-standard schemes are currently allowed to be tuple origins.
Nick Carter8aa718ed2018-09-07 21:39:5187 //
88 // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
89 // remove this local scheme exception.
Hayato Itocddb08782024-03-13 01:02:2490 if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
91 // If the flag is enabled, a host can be empty for non-special URLs.
92 // Therefore, we don't check a host nor port.
93 if (base::Contains(GetLocalSchemes(), scheme)) {
94 return true;
95 }
96 } else {
97 if (base::Contains(GetLocalSchemes(), scheme) && host.empty() &&
98 port == 0) {
99 return true;
100 }
101 }
Daniel Chengc2b752b2018-11-28 00:03:29102
103 // Otherwise, allow non-standard schemes only if the Android WebView
104 // workaround is enabled.
105 return AllowNonStandardSchemesForAndroidWebView();
Nick Carter8aa718ed2018-09-07 21:39:51106 }
mkwst28c7c112015-07-14 22:41:06107
tyoshino11a7c9fe2015-08-19 08:51:46108 switch (scheme_type) {
Nick Carterff69a102018-04-04 00:15:17109 case SCHEME_WITH_HOST_AND_PORT:
Nick Carter123ca192018-03-30 23:25:36110 case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
Lukasz Anforowiczc664e8b2020-04-13 20:08:55111 // A URL with |scheme| is required to have the host and port, so return an
112 // invalid instance if host is not given. Note that a valid port is
113 // always provided by SchemeHostPort(const GURL&) constructor (a missing
114 // port is replaced with a default port if needed by
115 // GURL::EffectiveIntPort()).
116 if (host.empty())
tyoshino11a7c9fe2015-08-19 08:51:46117 return false;
118
csharrisonedf893f2016-10-12 01:42:56119 // Don't do an expensive canonicalization if the host is already
120 // canonicalized.
121 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
Daniel Clarkdc916842024-10-08 18:28:19122 IsCanonicalHost(host, scheme == url::kFileScheme));
csharrisonedf893f2016-10-12 01:42:56123 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
Daniel Clarkdc916842024-10-08 18:28:19124 !IsCanonicalHost(host, scheme == url::kFileScheme)) {
tyoshino11a7c9fe2015-08-19 08:51:46125 return false;
csharrisonedf893f2016-10-12 01:42:56126 }
tyoshino11a7c9fe2015-08-19 08:51:46127
128 return true;
129
Nick Carter123ca192018-03-30 23:25:36130 case SCHEME_WITH_HOST:
tyoshino11a7c9fe2015-08-19 08:51:46131 if (port != 0) {
132 // Return an invalid object if a URL with the scheme never represents
133 // the port data but the given |port| is non-zero.
134 return false;
135 }
136
csharrisonedf893f2016-10-12 01:42:56137 // Don't do an expensive canonicalization if the host is already
138 // canonicalized.
139 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
Daniel Clarkdc916842024-10-08 18:28:19140 IsCanonicalHost(host, scheme == url::kFileScheme));
csharrisonedf893f2016-10-12 01:42:56141 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
Daniel Clarkdc916842024-10-08 18:28:19142 !IsCanonicalHost(host, scheme == url::kFileScheme)) {
tyoshino11a7c9fe2015-08-19 08:51:46143 return false;
csharrisonedf893f2016-10-12 01:42:56144 }
tyoshino11a7c9fe2015-08-19 08:51:46145
146 return true;
147
148 case SCHEME_WITHOUT_AUTHORITY:
149 return false;
150
151 default:
Peter Boström89c827082024-09-20 10:54:38152 NOTREACHED();
tyoshino11a7c9fe2015-08-19 08:51:46153 }
154}
155
156} // namespace
157
Victor Costan7c9d0b1482020-07-07 14:13:03158SchemeHostPort::SchemeHostPort() = default;
tyoshino11a7c9fe2015-08-19 08:51:46159
csharrisonf07ac3c2016-12-13 04:15:02160SchemeHostPort::SchemeHostPort(std::string scheme,
161 std::string host,
csharrisonedf893f2016-10-12 01:42:56162 uint16_t port,
Victor Costan7c9d0b1482020-07-07 14:13:03163 ConstructPolicy policy) {
Hayato Itocddb08782024-03-13 01:02:24164 if (ShouldDiscardHostAndPort(scheme)) {
165 host = "";
166 port = 0;
167 }
168
Nick Carter8aa718ed2018-09-07 21:39:51169 if (!IsValidInput(scheme, host, port, policy)) {
Nasko Oskov55119382020-01-17 18:22:18170 DCHECK(!IsValid());
mkwst28c7c112015-07-14 22:41:06171 return;
Nick Carter8aa718ed2018-09-07 21:39:51172 }
mkwst28c7c112015-07-14 22:41:06173
csharrisonf07ac3c2016-12-13 04:15:02174 scheme_ = std::move(scheme);
175 host_ = std::move(host);
tyoshino11a7c9fe2015-08-19 08:51:46176 port_ = port;
Nasko Oskov55119382020-01-17 18:22:18177 DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
178 << " Port: " << port;
tyoshino11a7c9fe2015-08-19 08:51:46179}
180
David Benjaminbf75caf2023-09-29 04:39:59181SchemeHostPort::SchemeHostPort(std::string_view scheme,
182 std::string_view host,
csharrisonedf893f2016-10-12 01:42:56183 uint16_t port)
Peter Kasting95e78e42021-04-29 23:37:51184 : SchemeHostPort(std::string(scheme),
185 std::string(host),
csharrisonedf893f2016-10-12 01:42:56186 port,
187 ConstructPolicy::CHECK_CANONICALIZATION) {}
188
Victor Costan7c9d0b1482020-07-07 14:13:03189SchemeHostPort::SchemeHostPort(const GURL& url) {
tyoshino11a7c9fe2015-08-19 08:51:46190 if (!url.is_valid())
191 return;
192
David Benjaminbf75caf2023-09-29 04:39:59193 std::string_view scheme = url.scheme_piece();
194 std::string_view host = url.host_piece();
tyoshino11a7c9fe2015-08-19 08:51:46195
196 // A valid GURL never returns PORT_INVALID.
197 int port = url.EffectiveIntPort();
Takeshi Yoshinoe0ec80392017-12-13 11:40:53198 if (port == PORT_UNSPECIFIED) {
tyoshino11a7c9fe2015-08-19 08:51:46199 port = 0;
Takeshi Yoshinoe0ec80392017-12-13 11:40:53200 } else {
201 DCHECK_GE(port, 0);
202 DCHECK_LE(port, 65535);
203 }
tyoshino11a7c9fe2015-08-19 08:51:46204
Hayato Itocddb08782024-03-13 01:02:24205 if (ShouldDiscardHostAndPort(scheme)) {
206 host = "";
207 port = 0;
208 }
209
csharrisonedf893f2016-10-12 01:42:56210 if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
tyoshino11a7c9fe2015-08-19 08:51:46211 return;
212
Jan Wilken Dörriec15b4bc2020-01-30 07:04:12213 scheme_ = std::string(scheme);
214 host_ = std::string(host);
tyoshino11a7c9fe2015-08-19 08:51:46215 port_ = port;
mkwst28c7c112015-07-14 22:41:06216}
217
Chris Watkins3e06be12017-11-29 01:40:54218SchemeHostPort::~SchemeHostPort() = default;
mkwst28c7c112015-07-14 22:41:06219
Nasko Oskov55119382020-01-17 18:22:18220bool SchemeHostPort::IsValid() const {
Nick Carter8aa718ed2018-09-07 21:39:51221 // It suffices to just check |scheme_| for emptiness; the other fields are
222 // never present without it.
223 DCHECK(!scheme_.empty() || host_.empty());
224 DCHECK(!scheme_.empty() || port_ == 0);
Nasko Oskov55119382020-01-17 18:22:18225 return !scheme_.empty();
mkwst28c7c112015-07-14 22:41:06226}
227
228std::string SchemeHostPort::Serialize() const {
csharrison048bee12016-10-04 00:08:21229 // Null checking for |parsed| in SerializeInternal is probably slower than
230 // just filling it in and discarding it here.
231 url::Parsed parsed;
232 return SerializeInternal(&parsed);
233}
234
235GURL SchemeHostPort::GetURL() const {
236 url::Parsed parsed;
237 std::string serialized = SerializeInternal(&parsed);
238
Nasko Oskov55119382020-01-17 18:22:18239 if (!IsValid())
csharrison6fbc9fa2016-10-08 03:31:50240 return GURL(std::move(serialized), parsed, false);
241
Charles Harrison60f0c532018-03-12 17:30:55242 // SchemeHostPort does not have enough information to determine if an empty
243 // host is valid or not for the given scheme. Force re-parsing.
244 DCHECK(!scheme_.empty());
245 if (host_.empty())
246 return GURL(serialized);
247
csharrison048bee12016-10-04 00:08:21248 // If the serialized string is passed to GURL for parsing, it will append an
Hayato Itocddb08782024-03-13 01:02:24249 // empty path "/" for standard URLs. Add that here. Note: per RFC 6454 we
250 // cannot do this for normal Origin serialization.
csharrison048bee12016-10-04 00:08:21251 DCHECK(!parsed.path.is_valid());
Hayato Itocddb08782024-03-13 01:02:24252 if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
253 // Append "/" only if the URL is standard. If the flag is enabled,
254 // non-special URLs can have an empty path and GURL doesn't append "/" to
255 // that.
256 if (IsStandardScheme(scheme_)) {
257 parsed.path = Component(serialized.length(), 1);
258 serialized.append("/");
259 }
260 } else {
261 parsed.path = Component(serialized.length(), 1);
262 serialized.append("/");
263 }
csharrison048bee12016-10-04 00:08:21264 return GURL(std::move(serialized), parsed, true);
265}
266
Xiaochen Zhou4350e462023-08-21 15:06:12267size_t SchemeHostPort::EstimateMemoryUsage() const {
268 return base::trace_event::EstimateMemoryUsage(scheme_) +
269 base::trace_event::EstimateMemoryUsage(host_);
270}
271
csharrison048bee12016-10-04 00:08:21272std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
mkwst28c7c112015-07-14 22:41:06273 std::string result;
Nasko Oskov55119382020-01-17 18:22:18274 if (!IsValid())
mkwst28c7c112015-07-14 22:41:06275 return result;
276
csharrison8b5c3702016-12-09 12:55:41277 // Reserve enough space for the "normal" case of scheme://host/.
278 result.reserve(scheme_.size() + host_.size() + 4);
279
csharrison6fbc9fa2016-10-08 03:31:50280 if (!scheme_.empty()) {
281 parsed->scheme = Component(0, scheme_.length());
282 result.append(scheme_);
283 }
csharrison048bee12016-10-04 00:08:21284
mkwst28c7c112015-07-14 22:41:06285 result.append(kStandardSchemeSeparator);
csharrison048bee12016-10-04 00:08:21286
csharrison6fbc9fa2016-10-08 03:31:50287 if (!host_.empty()) {
288 parsed->host = Component(result.length(), host_.length());
289 result.append(host_);
290 }
mkwst28c7c112015-07-14 22:41:06291
tyoshino11a7c9fe2015-08-19 08:51:46292 // Omit the port component if the port matches with the default port
293 // defined for the scheme, if any.
Scott Violeta02c94b142024-08-28 03:29:18294 int default_port = DefaultPortForScheme(scheme_);
tyoshino11a7c9fe2015-08-19 08:51:46295 if (default_port == PORT_UNSPECIFIED)
296 return result;
297 if (port_ != default_port) {
mkwst28c7c112015-07-14 22:41:06298 result.push_back(':');
Raul Tambreb3eaf052019-02-09 07:22:58299 std::string port(base::NumberToString(port_));
csharrison048bee12016-10-04 00:08:21300 parsed->port = Component(result.length(), port.length());
301 result.append(std::move(port));
mkwst28c7c112015-07-14 22:41:06302 }
303
304 return result;
305}
306
Tom Sepezb55f2f12024-10-02 21:54:33307bool SchemeHostPort::ShouldDiscardHostAndPort(std::string_view scheme) {
Hayato Itocddb08782024-03-13 01:02:24308 return IsAndroidWebViewHackEnabledScheme(scheme) &&
309 IsUsingStandardCompliantNonSpecialSchemeURLParsing();
310}
311
Nasko Oskov9277dfc2018-09-17 23:20:54312std::ostream& operator<<(std::ostream& out,
313 const SchemeHostPort& scheme_host_port) {
314 return out << scheme_host_port.Serialize();
315}
316
mkwst28c7c112015-07-14 22:41:06317} // namespace url