diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2014-06-22 00:22:19 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2014-06-22 00:22:19 +0000 |
commit | bb83f32dc3e0424d25fa4e55d8ff32b061320e41 (patch) | |
tree | e57c8b459aabfc0a32e459f76eb2f192aa5f097a /lib/uri/rfc3986_parser.rb | |
parent | 97d36e5bd0fd062fb1c1922d08170038aba61baa (diff) |
support RFC3986 [Feature #2542]
* lib/uri/common.rb (URI::REGEXP): move to lib/uri/rfc2396_parser.rb.
* lib/uri/common.rb (URI::Parser): ditto.
* lib/uri/common.rb (URI.split): use RFC3986_Parser.
* lib/uri/common.rb (URI.parse): ditto.
* lib/uri/common.rb (URI.join): ditto.
* lib/uri/common.rb (URI.extract): deprecated.
* lib/uri/common.rb (URI.regexp): ditto.
* lib/uri/rfc2396_parser.rb: added.
* lib/uri/rfc3986_parser.rb: added.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46491 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/uri/rfc3986_parser.rb')
-rw-r--r-- | lib/uri/rfc3986_parser.rb | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb new file mode 100644 index 0000000000..779693c80c --- /dev/null +++ b/lib/uri/rfc3986_parser.rb @@ -0,0 +1,101 @@ +module URI + class RFC3986_Parser # :nodoc: + # URI defined in RFC3986 + # this regexp is modified not to host is not empty string + RFC3986_URI = /\A(?<URI>(?<scheme>[A-Za-z][+\-.0-9A-Za-z]*):(?<hier-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-rootless>\g<segment-nz>(?:\/\g<segment>)*)|(?<path-empty>))(?:\?(?<query>(?:%\h\h|[!$&-.0-;=@-Z_a-z~]|[\/?])*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~]|[\/?])*))?)\z/ + RFC3986_relative_ref = /\A(?<relative-ref>(?<relative-part>\/\/(?<authority>(?:(?<userinfo>(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*)@)?(?<host>(?<IP-literal>\[(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{1,4}?::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:){,1}\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+)\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])+))?(?::(?<port>\d*))?)(?<path-abempty>(?:\/(?<segment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*))*)|(?<path-absolute>\/(?:(?<segment-nz>(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+)(?:\/\g<segment>)*)?)|(?<path-noscheme>(?<segment-nz-nc>(?:%\h\h|[!$&-.0-9;=@-Z_a-z~])+)(?:\/\g<segment>)*)|(?<path-empty>(?:%\h\h|[!$&-.0-;=@-Z_a-z~]){0}))(?:\?(?<query>(?:%\h\h|[!$&-.0-;=@-Z_a-z~]|[\/?])*))?(?:\#(?<fragment>(?:%\h\h|[!$&-.0-;=@-Z_a-z~]|[\/?])*))?)\z/ + + def split(uri) #:nodoc: + if m = RFC3986_URI.match(uri) + ary = [] + ary << m["scheme"] + if m["path-rootless"] # opaque + ary << nil # userinfo + ary << nil # host + ary << nil # port + ary << nil # registry + ary << nil # path + ary << m["path-rootless"] + ary[-1] << '?' << m["query"] if m["query"] + ary << nil # query + ary << m["fragment"] + else # normal + ary << m["userinfo"] + ary << m["host"] + ary << m["port"] + ary << nil # registry + ary << (m["path-abempty"] || m["path-absolute"] || m["path-empty"]) + ary << nil # opaque + ary << m["query"] + ary << m["fragment"] + end + elsif m = RFC3986_relative_ref.match(uri) + ary = [nil] + ary << m["userinfo"] + ary << m["host"] + ary << m["port"] + ary << nil # registry + ary << (m["path-abempty"] || m["path-absolute"] || m["path-noscheme"] || m["path-empty"]) + ary << nil # opaque + ary << m["query"] + ary << m["fragment"] + else + raise InvalidURIError, "bad URI(is not URI?): #{uri}" + end + end + + def parse(uri) # :nodoc: + scheme, userinfo, host, port, + registry, path, opaque, query, fragment = self.split(uri) + + if scheme && URI.scheme_list.include?(scheme.upcase) + URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) + else + Generic.new(scheme, userinfo, host, port, + registry, path, opaque, query, + fragment, self) + end + end + + + def join(*uris) # :nodoc: + uris[0] = convert_to_uri(uris[0]) + uris.inject :merge + end + + @@to_s = Kernel.instance_method(:to_s) + def inspect + @@to_s.bind(self).call + end + + def regexp + { + SCHEME: /\A[A-Za-z][A-Za-z0-9+\-.]*\z/, + USERINFO: /\A(?:%\h\h|[!$&-.0-;=A-Z_a-z~])*\z/, + HOST: /\A(?:(?<IP-literal>\[(?:(?<IPv6address>(?:\h{1,4}:){6}(?<ls32>\h{1,4}:\h{1,4}|(?<IPv4address>(?<dec-octet>[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]|\d)\.\g<dec-octet>\.\g<dec-octet>\.\g<dec-octet>))|::(?:\h{1,4}:){5}\g<ls32>|\h{,4}::(?:\h{1,4}:){4}\g<ls32>|(?:(?:\h{1,4}:)?\h{1,4})?::(?:\h{1,4}:){3}\g<ls32>|(?:(?:\h{1,4}:){,2}\h{1,4})?::(?:\h{1,4}:){2}\g<ls32>|(?:(?:\h{1,4}:){,3}\h{1,4})?::\h{1,4}:\g<ls32>|(?:(?:\h{1,4}:){,4}\h{1,4})?::\g<ls32>|(?:(?:\h{1,4}:){,5}\h{1,4})?::\h{1,4}|(?:(?:\h{1,4}:){,6}\h{1,4})?::)|(?<IPvFuture>v\h+\.[!$&-.0-;=A-Z_a-z~]+))\])|\g<IPv4address>|(?<reg-name>(?:%\h\h|[!$&-.0-9;=A-Z_a-z~])*))\z/, + ABS_PATH: /\A\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, + REL_PATH: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~])+(?:\/(?:%\h\h|[!$&-.0-;=@-Z_a-z~])*)*\z/, + QUERY: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~]|[\/?])*\z/, + FRAGMENT: /\A(?:%\h\h|[!$&-.0-;=@-Z_a-z~]|[\/?])*\z/, + OPAQUE: nil, + PORT: nil, + } + end + + private + + def convert_to_uri(uri) + if uri.is_a?(URI::Generic) + uri + elsif uri = String.try_convert(uri) + parse(uri) + else + raise ArgumentError, + "bad argument (expected URI object or URI string)" + end + end + + end # class Parser +end # module URI |