4 These regex are directly derived from the collected ABNF in RFC3986
5 (except for DIGIT, ALPHA and HEXDIG, defined by RFC2234).
7 They should be processed with re.VERBOSE.
9 Thanks Mark Nottingham for this code - https://gist.github.com/138549
11 from __future__ import unicode_literals
16 DIGIT = r"[\x30-\x39]"
18 ALPHA = r"[\x41-\x5A\x61-\x7A]"
20 HEXDIG = r"[\x30-\x39A-Fa-f]"
22 # pct-encoded = "%" HEXDIG HEXDIG
23 pct_encoded = r" %% %(HEXDIG)s %(HEXDIG)s" % locals()
25 # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
26 unreserved = r"(?: %(ALPHA)s | %(DIGIT)s | \- | \. | _ | ~ )" % locals()
28 # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
29 gen_delims = r"(?: : | / | \? | \# | \[ | \] | @ )"
31 # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
32 # / "*" / "+" / "," / ";" / "="
33 sub_delims = r"""(?: ! | \$ | & | ' | \( | \) |
34 \* | \+ | , | ; | = )"""
36 # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
37 pchar = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | : | @ )" % locals(
40 # reserved = gen-delims / sub-delims
41 reserved = r"(?: %(gen_delims)s | %(sub_delims)s )" % locals()
46 # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
47 scheme = r"%(ALPHA)s (?: %(ALPHA)s | %(DIGIT)s | \+ | \- | \. )*" % locals()
52 # dec-octet = DIGIT ; 0-9
53 # / %x31-39 DIGIT ; 10-99
54 # / "1" 2DIGIT ; 100-199
55 # / "2" %x30-34 DIGIT ; 200-249
56 # / "25" %x30-35 ; 250-255
57 dec_octet = r"""(?: %(DIGIT)s |
58 [\x31-\x39] %(DIGIT)s |
60 2 [\x30-\x34] %(DIGIT)s |
65 # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
66 IPv4address = r"%(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s \. %(dec_octet)s" % locals(
70 h16 = r"(?: %(HEXDIG)s ){1,4}" % locals()
72 # ls32 = ( h16 ":" h16 ) / IPv4address
73 ls32 = r"(?: (?: %(h16)s : %(h16)s ) | %(IPv4address)s )" % locals()
75 # IPv6address = 6( h16 ":" ) ls32
76 # / "::" 5( h16 ":" ) ls32
77 # / [ h16 ] "::" 4( h16 ":" ) ls32
78 # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
79 # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
80 # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
81 # / [ *4( h16 ":" ) h16 ] "::" ls32
82 # / [ *5( h16 ":" ) h16 ] "::" h16
83 # / [ *6( h16 ":" ) h16 ] "::"
84 IPv6address = r"""(?: (?: %(h16)s : ){6} %(ls32)s |
85 :: (?: %(h16)s : ){5} %(ls32)s |
86 %(h16)s :: (?: %(h16)s : ){4} %(ls32)s |
87 (?: %(h16)s : ) %(h16)s :: (?: %(h16)s : ){3} %(ls32)s |
88 (?: %(h16)s : ){2} %(h16)s :: (?: %(h16)s : ){2} %(ls32)s |
89 (?: %(h16)s : ){3} %(h16)s :: %(h16)s : %(ls32)s |
90 (?: %(h16)s : ){4} %(h16)s :: %(ls32)s |
91 (?: %(h16)s : ){5} %(h16)s :: %(h16)s |
92 (?: %(h16)s : ){6} %(h16)s ::
96 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
97 IPvFuture = r"v %(HEXDIG)s+ \. (?: %(unreserved)s | %(sub_delims)s | : )+" % locals()
99 # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
100 IP_literal = r"\[ (?: %(IPv6address)s | %(IPvFuture)s ) \]" % locals()
102 # reg-name = *( unreserved / pct-encoded / sub-delims )
103 reg_name = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s )*" % locals()
105 # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
106 userinfo = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | : )" % locals(
109 # host = IP-literal / IPv4address / reg-name
110 host = r"(?: %(IP_literal)s | %(IPv4address)s | %(reg_name)s )" % locals()
113 port = r"(?: %(DIGIT)s )*" % locals()
115 # authority = [ userinfo "@" ] host [ ":" port ]
116 authority = r"(?: %(userinfo)s @)? %(host)s (?: : %(port)s)?" % locals()
121 segment = r"%(pchar)s*" % locals()
123 # segment-nz = 1*pchar
124 segment_nz = r"%(pchar)s+" % locals()
126 # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
127 # ; non-zero-length segment without any colon ":"
128 segment_nz_nc = r"(?: %(unreserved)s | %(pct_encoded)s | %(sub_delims)s | @ )+" % locals()
130 # path-abempty = *( "/" segment )
131 path_abempty = r"(?: / %(segment)s )*" % locals()
133 # path-absolute = "/" [ segment-nz *( "/" segment ) ]
134 path_absolute = r"/ (?: %(segment_nz)s (?: / %(segment)s )* )?" % locals()
136 # path-noscheme = segment-nz-nc *( "/" segment )
137 path_noscheme = r"%(segment_nz_nc)s (?: / %(segment)s )*" % locals()
139 # path-rootless = segment-nz *( "/" segment )
140 path_rootless = r"%(segment_nz)s (?: / %(segment)s )*" % locals()
142 # path-empty = 0<pchar>
143 path_empty = r"" # FIXME
145 # path = path-abempty ; begins with "/" or is empty
146 # / path-absolute ; begins with "/" but not "//"
147 # / path-noscheme ; begins with a non-colon segment
148 # / path-rootless ; begins with a segment
149 # / path-empty ; zero characters
150 path = r"""(?: %(path_abempty)s |
158 ### Query and Fragment
160 # query = *( pchar / "/" / "?" )
161 query = r"(?: %(pchar)s | / | \? )*" % locals()
163 # fragment = *( pchar / "/" / "?" )
164 fragment = r"(?: %(pchar)s | / | \? )*" % locals()
168 # hier-part = "//" authority path-abempty
172 hier_part = r"""(?: (?: // %(authority)s %(path_abempty)s ) |
179 # relative-part = "//" authority path-abempty
183 relative_part = r"""(?: (?: // %(authority)s %(path_abempty)s ) |
190 # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
191 relative_ref = r"%(relative_part)s (?: \? %(query)s)? (?: \# %(fragment)s)?" % locals(
194 # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
195 URI = r"^(?: %(scheme)s : %(hier_part)s (?: \? %(query)s )? (?: \# %(fragment)s )? )$" % locals(
198 # URI-reference = URI / relative-ref
199 URI_reference = r"^(?: %(URI)s | %(relative_ref)s )$" % locals()
201 # absolute-URI = scheme ":" hier-part [ "?" query ]
202 absolute_URI = r"^(?: %(scheme)s : %(hier_part)s (?: \? %(query)s )? )$" % locals(
207 return re.match(URI, uri, re.VERBOSE)
210 def is_uri_reference(uri):
211 return re.match(URI_reference, uri, re.VERBOSE)
214 def is_absolute_uri(uri):
215 return re.match(absolute_URI, uri, re.VERBOSE)