github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/urlparse.py (about) 1 """Parse (absolute and relative) URLs. 2 3 urlparse module is based upon the following RFC specifications. 4 5 RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding 6 and L. Masinter, January 2005. 7 8 RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter 9 and L.Masinter, December 1999. 10 11 RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. 12 Berners-Lee, R. Fielding, and L. Masinter, August 1998. 13 14 RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998. 15 16 RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 17 1995. 18 19 RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. 20 McCahill, December 1994 21 22 RFC 3986 is considered the current standard and any future changes to 23 urlparse module should conform with it. The urlparse module is 24 currently not entirely compliant with this RFC due to defacto 25 scenarios for parsing, and for backward compatibility purposes, some 26 parsing quirks from older RFCs are retained. The testcases in 27 test_urlparse.py provides a good indicator of parsing behavior. 28 29 """ 30 31 import re 32 33 import operator 34 _itemgetter = operator.itemgetter 35 _property = property 36 _tuple = tuple 37 38 __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", 39 "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"] 40 41 # A classification of schemes ('' means apply by default) 42 uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', 43 'wais', 'file', 'https', 'shttp', 'mms', 44 'prospero', 'rtsp', 'rtspu', '', 'sftp', 45 'svn', 'svn+ssh'] 46 uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 47 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 48 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 49 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh'] 50 uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 51 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 52 'mms', '', 'sftp', 'tel'] 53 54 # These are not actually used anymore, but should stay for backwards 55 # compatibility. (They are undocumented, but have a public-looking name.) 56 non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 57 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] 58 uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', 59 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] 60 uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 61 'nntp', 'wais', 'https', 'shttp', 'snews', 62 'file', 'prospero', ''] 63 64 # Characters valid in scheme names 65 scheme_chars = ('abcdefghijklmnopqrstuvwxyz' 66 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 67 '0123456789' 68 '+-.') 69 70 MAX_CACHE_SIZE = 20 71 _parse_cache = {} 72 73 74 def clear_cache(): 75 """Clear the parse cache.""" 76 _parse_cache.clear() 77 78 79 class ResultMixin(object): 80 """Shared methods for the parsed result objects.""" 81 82 # @property 83 def username(self): 84 netloc = self.netloc 85 if "@" in netloc: 86 userinfo = netloc.rsplit("@", 1)[0] 87 if ":" in userinfo: 88 userinfo = userinfo.split(":", 1)[0] 89 return userinfo 90 return None 91 username = property(username) 92 93 # @property 94 def password(self): 95 netloc = self.netloc 96 if "@" in netloc: 97 userinfo = netloc.rsplit("@", 1)[0] 98 if ":" in userinfo: 99 return userinfo.split(":", 1)[1] 100 return None 101 password = property(password) 102 103 # @property 104 def hostname(self): 105 netloc = self.netloc.split('@')[-1] 106 if '[' in netloc and ']' in netloc: 107 return netloc.split(']')[0][1:].lower() 108 elif ':' in netloc: 109 return netloc.split(':')[0].lower() 110 elif netloc == '': 111 return None 112 else: 113 return netloc.lower() 114 hostname = property(hostname) 115 116 # @property 117 def port(self): 118 netloc = self.netloc.split('@')[-1].split(']')[-1] 119 if ':' in netloc: 120 port = netloc.split(':')[1] 121 if port: 122 port = int(port, 10) 123 # verify legal port 124 if (0 <= port <= 65535): 125 return port 126 return None 127 port = property(port) 128 129 # from collections import namedtuple 130 class _SplitResult(tuple): 131 'SplitResult(scheme, netloc, path, query, fragment)' 132 133 __slots__ = () 134 135 _fields = ('scheme', 'netloc', 'path', 'query', 'fragment') 136 137 def __new__(_cls, scheme, netloc, path, query, fragment): 138 'Create new instance of SplitResult(scheme, netloc, path, query, fragment)' 139 return _tuple.__new__(_cls, (scheme, netloc, path, query, fragment)) 140 141 # @classmethod 142 def _make(cls, iterable, new=tuple.__new__, len=len): 143 'Make a new SplitResult object from a sequence or iterable' 144 result = new(cls, iterable) 145 if len(result) != 5: 146 raise TypeError('Expected 5 arguments, got %d' % len(result)) 147 return result 148 _make = classmethod(_make) 149 150 def __repr__(self): 151 'Return a nicely formatted representation string' 152 return 'SplitResult(scheme=%r, netloc=%r, path=%r, query=%r, fragment=%r)' % self 153 154 def _asdict(self): 155 'Return a new OrderedDict which maps field names to their values' 156 return OrderedDict(zip(self._fields, self)) 157 158 def _replace(_self, **kwds): 159 'Return a new SplitResult object replacing specified fields with new values' 160 result = _self._make(map(kwds.pop, ('scheme', 'netloc', 'path', 'query', 'fragment'), _self)) 161 if kwds: 162 raise ValueError('Got unexpected field names: %r' % kwds.keys()) 163 return result 164 165 def __getnewargs__(self): 166 'Return self as a plain tuple. Used by copy and pickle.' 167 return tuple(self) 168 169 __dict__ = _property(_asdict) 170 171 def __getstate__(self): 172 'Exclude the OrderedDict from pickling' 173 pass 174 175 scheme = _property(_itemgetter(0), doc='Alias for field number 0') 176 177 netloc = _property(_itemgetter(1), doc='Alias for field number 1') 178 179 path = _property(_itemgetter(2), doc='Alias for field number 2') 180 181 query = _property(_itemgetter(3), doc='Alias for field number 3') 182 183 fragment = _property(_itemgetter(4), doc='Alias for field number 4') 184 185 # class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): 186 class SplitResult(_SplitResult, ResultMixin): 187 188 __slots__ = () 189 190 def geturl(self): 191 return urlunsplit(self) 192 193 class _ParseResult(tuple): 194 'ParseResult(scheme, netloc, path, params, query, fragment)' 195 196 __slots__ = () 197 198 _fields = ('scheme', 'netloc', 'path', 'params', 'query', 'fragment') 199 200 def __new__(_cls, scheme, netloc, path, params, query, fragment): 201 'Create new instance of ParseResult(scheme, netloc, path, params, query, fragment)' 202 return _tuple.__new__(_cls, (scheme, netloc, path, params, query, fragment)) 203 204 # @classmethod 205 def _make(cls, iterable, new=tuple.__new__, len=len): 206 'Make a new ParseResult object from a sequence or iterable' 207 result = new(cls, iterable) 208 if len(result) != 6: 209 raise TypeError('Expected 6 arguments, got %d' % len(result)) 210 return result 211 _make = classmethod(_make) 212 213 def __repr__(self): 214 'Return a nicely formatted representation string' 215 return 'ParseResult(scheme=%r, netloc=%r, path=%r, params=%r, query=%r, fragment=%r)' % self 216 217 def _asdict(self): 218 'Return a new OrderedDict which maps field names to their values' 219 return OrderedDict(zip(self._fields, self)) 220 221 def _replace(_self, **kwds): 222 'Return a new ParseResult object replacing specified fields with new values' 223 result = _self._make(map(kwds.pop, ('scheme', 'netloc', 'path', 'params', 'query', 'fragment'), _self)) 224 if kwds: 225 raise ValueError('Got unexpected field names: %r' % kwds.keys()) 226 return result 227 228 def __getnewargs__(self): 229 'Return self as a plain tuple. Used by copy and pickle.' 230 return tuple(self) 231 232 __dict__ = _property(_asdict) 233 234 def __getstate__(self): 235 'Exclude the OrderedDict from pickling' 236 pass 237 238 scheme = _property(_itemgetter(0), doc='Alias for field number 0') 239 240 netloc = _property(_itemgetter(1), doc='Alias for field number 1') 241 242 path = _property(_itemgetter(2), doc='Alias for field number 2') 243 244 params = _property(_itemgetter(3), doc='Alias for field number 3') 245 246 query = _property(_itemgetter(4), doc='Alias for field number 4') 247 248 fragment = _property(_itemgetter(5), doc='Alias for field number 5') 249 250 # class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): 251 class ParseResult(_ParseResult, ResultMixin): 252 253 __slots__ = () 254 255 def geturl(self): 256 return urlunparse(self) 257 258 259 def urlparse(url, scheme='', allow_fragments=True): 260 """Parse a URL into 6 components: 261 <scheme>://<netloc>/<path>;<params>?<query>#<fragment> 262 Return a 6-tuple: (scheme, netloc, path, params, query, fragment). 263 Note that we don't break the components up in smaller bits 264 (e.g. netloc is a single string) and we don't expand % escapes.""" 265 tuple = urlsplit(url, scheme, allow_fragments) 266 scheme, netloc, url, query, fragment = tuple 267 if scheme in uses_params and ';' in url: 268 url, params = _splitparams(url) 269 else: 270 params = '' 271 return ParseResult(scheme, netloc, url, params, query, fragment) 272 273 def _splitparams(url): 274 if '/' in url: 275 i = url.find(';', url.rfind('/')) 276 if i < 0: 277 return url, '' 278 else: 279 i = url.find(';') 280 return url[:i], url[i+1:] 281 282 def _splitnetloc(url, start=0): 283 delim = len(url) # position of end of domain part of url, default is end 284 for c in '/?#': # look for delimiters; the order is NOT important 285 wdelim = url.find(c, start) # find first of this delim 286 if wdelim >= 0: # if found 287 delim = min(delim, wdelim) # use earliest delim position 288 return url[start:delim], url[delim:] # return (domain, rest) 289 290 def urlsplit(url, scheme='', allow_fragments=True): 291 """Parse a URL into 5 components: 292 <scheme>://<netloc>/<path>?<query>#<fragment> 293 Return a 5-tuple: (scheme, netloc, path, query, fragment). 294 Note that we don't break the components up in smaller bits 295 (e.g. netloc is a single string) and we don't expand % escapes.""" 296 allow_fragments = bool(allow_fragments) 297 key = url, scheme, allow_fragments, type(url), type(scheme) 298 cached = _parse_cache.get(key, None) 299 if cached: 300 return cached 301 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth 302 clear_cache() 303 netloc = query = fragment = '' 304 i = url.find(':') 305 if i > 0: 306 if url[:i] == 'http': # optimize the common case 307 scheme = url[:i].lower() 308 url = url[i+1:] 309 if url[:2] == '//': 310 netloc, url = _splitnetloc(url, 2) 311 if (('[' in netloc and ']' not in netloc) or 312 (']' in netloc and '[' not in netloc)): 313 raise ValueError("Invalid IPv6 URL") 314 if allow_fragments and '#' in url: 315 url, fragment = url.split('#', 1) 316 if '?' in url: 317 url, query = url.split('?', 1) 318 v = SplitResult(scheme, netloc, url, query, fragment) 319 _parse_cache[key] = v 320 return v 321 for c in url[:i]: 322 if c not in scheme_chars: 323 break 324 else: 325 # make sure "url" is not actually a port number (in which case 326 # "scheme" is really part of the path) 327 rest = url[i+1:] 328 if not rest or any(c not in '0123456789' for c in rest): 329 # not a port number 330 scheme, url = url[:i].lower(), rest 331 332 if url[:2] == '//': 333 netloc, url = _splitnetloc(url, 2) 334 if (('[' in netloc and ']' not in netloc) or 335 (']' in netloc and '[' not in netloc)): 336 raise ValueError("Invalid IPv6 URL") 337 if allow_fragments and '#' in url: 338 url, fragment = url.split('#', 1) 339 if '?' in url: 340 url, query = url.split('?', 1) 341 v = SplitResult(scheme, netloc, url, query, fragment) 342 _parse_cache[key] = v 343 return v 344 345 def urlunparse(data): 346 """Put a parsed URL back together again. This may result in a 347 slightly different, but equivalent URL, if the URL that was parsed 348 originally had redundant delimiters, e.g. a ? with an empty query 349 (the draft states that these are equivalent).""" 350 scheme, netloc, url, params, query, fragment = data 351 if params: 352 url = "%s;%s" % (url, params) 353 return urlunsplit((scheme, netloc, url, query, fragment)) 354 355 def urlunsplit(data): 356 """Combine the elements of a tuple as returned by urlsplit() into a 357 complete URL as a string. The data argument can be any five-item iterable. 358 This may result in a slightly different, but equivalent URL, if the URL that 359 was parsed originally had unnecessary delimiters (for example, a ? with an 360 empty query; the RFC states that these are equivalent).""" 361 scheme, netloc, url, query, fragment = data 362 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): 363 if url and url[:1] != '/': url = '/' + url 364 url = '//' + (netloc or '') + url 365 if scheme: 366 url = scheme + ':' + url 367 if query: 368 url = url + '?' + query 369 if fragment: 370 url = url + '#' + fragment 371 return url 372 373 def urljoin(base, url, allow_fragments=True): 374 """Join a base URL and a possibly relative URL to form an absolute 375 interpretation of the latter.""" 376 if not base: 377 return url 378 if not url: 379 return base 380 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ 381 urlparse(base, '', allow_fragments) 382 scheme, netloc, path, params, query, fragment = \ 383 urlparse(url, bscheme, allow_fragments) 384 if scheme != bscheme or scheme not in uses_relative: 385 return url 386 if scheme in uses_netloc: 387 if netloc: 388 return urlunparse((scheme, netloc, path, 389 params, query, fragment)) 390 netloc = bnetloc 391 if path[:1] == '/': 392 return urlunparse((scheme, netloc, path, 393 params, query, fragment)) 394 if not path and not params: 395 path = bpath 396 params = bparams 397 if not query: 398 query = bquery 399 return urlunparse((scheme, netloc, path, 400 params, query, fragment)) 401 segments = bpath.split('/')[:-1] + path.split('/') 402 # XXX The stuff below is bogus in various ways... 403 if segments[-1] == '.': 404 segments[-1] = '' 405 while '.' in segments: 406 segments.remove('.') 407 while 1: 408 i = 1 409 n = len(segments) - 1 410 while i < n: 411 if (segments[i] == '..' 412 and segments[i-1] not in ('', '..')): 413 del segments[i-1:i+1] 414 break 415 i = i+1 416 else: 417 break 418 if segments == ['', '..']: 419 segments[-1] = '' 420 elif len(segments) >= 2 and segments[-1] == '..': 421 segments[-2:] = [''] 422 return urlunparse((scheme, netloc, '/'.join(segments), 423 params, query, fragment)) 424 425 def urldefrag(url): 426 """Removes any existing fragment from URL. 427 428 Returns a tuple of the defragmented URL and the fragment. If 429 the URL contained no fragments, the second element is the 430 empty string. 431 """ 432 if '#' in url: 433 s, n, p, a, q, frag = urlparse(url) 434 defrag = urlunparse((s, n, p, a, q, '')) 435 return defrag, frag 436 else: 437 return url, '' 438 439 try: 440 unicode 441 except NameError: 442 def _is_unicode(x): 443 return 0 444 else: 445 def _is_unicode(x): 446 return isinstance(x, unicode) 447 448 # unquote method for parse_qs and parse_qsl 449 # Cannot use directly from urllib as it would create a circular reference 450 # because urllib uses urlparse methods (urljoin). If you update this function, 451 # update it also in urllib. This code duplication does not existin in Python3. 452 453 _hexdig = '0123456789ABCDEFabcdef' 454 _hextochr = dict((a+b, chr(int(a+b,16))) 455 for a in _hexdig for b in _hexdig) 456 _asciire = re.compile('([\x00-\x7f]+)') 457 458 def unquote(s): 459 """unquote('abc%20def') -> 'abc def'.""" 460 if _is_unicode(s): 461 if '%' not in s: 462 return s 463 bits = _asciire.split(s) 464 res = [bits[0]] 465 append = res.append 466 for i in range(1, len(bits), 2): 467 append(unquote(str(bits[i])).decode('latin1')) 468 append(bits[i + 1]) 469 return ''.join(res) 470 471 bits = s.split('%') 472 # fastpath 473 if len(bits) == 1: 474 return s 475 res = [bits[0]] 476 append = res.append 477 for item in bits[1:]: 478 try: 479 append(_hextochr[item[:2]]) 480 append(item[2:]) 481 except KeyError: 482 append('%') 483 append(item) 484 return ''.join(res) 485 486 def parse_qs(qs, keep_blank_values=0, strict_parsing=0): 487 """Parse a query given as a string argument. 488 489 Arguments: 490 491 qs: percent-encoded query string to be parsed 492 493 keep_blank_values: flag indicating whether blank values in 494 percent-encoded queries should be treated as blank strings. 495 A true value indicates that blanks should be retained as 496 blank strings. The default false value indicates that 497 blank values are to be ignored and treated as if they were 498 not included. 499 500 strict_parsing: flag indicating what to do with parsing errors. 501 If false (the default), errors are silently ignored. 502 If true, errors raise a ValueError exception. 503 """ 504 dict = {} 505 for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): 506 if name in dict: 507 dict[name].append(value) 508 else: 509 dict[name] = [value] 510 return dict 511 512 def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): 513 """Parse a query given as a string argument. 514 515 Arguments: 516 517 qs: percent-encoded query string to be parsed 518 519 keep_blank_values: flag indicating whether blank values in 520 percent-encoded queries should be treated as blank strings. A 521 true value indicates that blanks should be retained as blank 522 strings. The default false value indicates that blank values 523 are to be ignored and treated as if they were not included. 524 525 strict_parsing: flag indicating what to do with parsing errors. If 526 false (the default), errors are silently ignored. If true, 527 errors raise a ValueError exception. 528 529 Returns a list, as G-d intended. 530 """ 531 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] 532 r = [] 533 for name_value in pairs: 534 if not name_value and not strict_parsing: 535 continue 536 nv = name_value.split('=', 1) 537 if len(nv) != 2: 538 if strict_parsing: 539 raise ValueError, "bad query field: %r" % (name_value,) 540 # Handle case of a control-name with no equal sign 541 if keep_blank_values: 542 nv.append('') 543 else: 544 continue 545 if len(nv[1]) or keep_blank_values: 546 name = unquote(nv[0].replace('+', ' ')) 547 value = unquote(nv[1].replace('+', ' ')) 548 r.append((name, value)) 549 550 return r