github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/urlparse.py (about)

     1  """Parse (absolute and relative) URLs.
     2  
     3  urlparse module is based upon the following RFC specifications.
     4  
     5  RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
     6  and L.  Masinter, January 2005.
     7  
     8  RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
     9  and L.Masinter, December 1999.
    10  
    11  RFC 2396:  "Uniform Resource Identifiers (URI)": Generic Syntax by T.
    12  Berners-Lee, R. Fielding, and L. Masinter, August 1998.
    13  
    14  RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998.
    15  
    16  RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
    17  1995.
    18  
    19  RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
    20  McCahill, December 1994
    21  
    22  RFC 3986 is considered the current standard and any future changes to
    23  urlparse module should conform with it.  The urlparse module is
    24  currently not entirely compliant with this RFC due to defacto
    25  scenarios for parsing, and for backward compatibility purposes, some
    26  parsing quirks from older RFCs are retained. The testcases in
    27  test_urlparse.py provides a good indicator of parsing behavior.
    28  
    29  """
    30  
    31  import re
    32  
    33  import operator
    34  _itemgetter = operator.itemgetter
    35  _property = property
    36  _tuple = tuple
    37  
    38  __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
    39             "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"]
    40  
    41  # A classification of schemes ('' means apply by default)
    42  uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
    43                   'wais', 'file', 'https', 'shttp', 'mms',
    44                   'prospero', 'rtsp', 'rtspu', '', 'sftp',
    45                   'svn', 'svn+ssh']
    46  uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
    47                 'imap', 'wais', 'file', 'mms', 'https', 'shttp',
    48                 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
    49                 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh']
    50  uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
    51                 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
    52                 'mms', '', 'sftp', 'tel']
    53  
    54  # These are not actually used anymore, but should stay for backwards
    55  # compatibility.  (They are undocumented, but have a public-looking name.)
    56  non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
    57                      'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
    58  uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
    59                'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
    60  uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
    61                   'nntp', 'wais', 'https', 'shttp', 'snews',
    62                   'file', 'prospero', '']
    63  
    64  # Characters valid in scheme names
    65  scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
    66                  'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    67                  '0123456789'
    68                  '+-.')
    69  
    70  MAX_CACHE_SIZE = 20
    71  _parse_cache = {}
    72  
    73  
    74  def clear_cache():
    75      """Clear the parse cache."""
    76      _parse_cache.clear()
    77  
    78  
    79  class ResultMixin(object):
    80      """Shared methods for the parsed result objects."""
    81  
    82      # @property
    83      def username(self):
    84          netloc = self.netloc
    85          if "@" in netloc:
    86              userinfo = netloc.rsplit("@", 1)[0]
    87              if ":" in userinfo:
    88                  userinfo = userinfo.split(":", 1)[0]
    89              return userinfo
    90          return None
    91      username = property(username)
    92  
    93      # @property
    94      def password(self):
    95          netloc = self.netloc
    96          if "@" in netloc:
    97              userinfo = netloc.rsplit("@", 1)[0]
    98              if ":" in userinfo:
    99                  return userinfo.split(":", 1)[1]
   100          return None
   101      password = property(password)
   102  
   103      # @property
   104      def hostname(self):
   105          netloc = self.netloc.split('@')[-1]
   106          if '[' in netloc and ']' in netloc:
   107              return netloc.split(']')[0][1:].lower()
   108          elif ':' in netloc:
   109              return netloc.split(':')[0].lower()
   110          elif netloc == '':
   111              return None
   112          else:
   113              return netloc.lower()
   114      hostname = property(hostname)
   115  
   116      # @property
   117      def port(self):
   118          netloc = self.netloc.split('@')[-1].split(']')[-1]
   119          if ':' in netloc:
   120              port = netloc.split(':')[1]
   121              if port:
   122                  port = int(port, 10)
   123                  # verify legal port
   124                  if (0 <= port <= 65535):
   125                      return port
   126          return None
   127      port = property(port)
   128  
   129  # from collections import namedtuple
   130  class _SplitResult(tuple):
   131      'SplitResult(scheme, netloc, path, query, fragment)'
   132  
   133      __slots__ = ()
   134  
   135      _fields = ('scheme', 'netloc', 'path', 'query', 'fragment')
   136  
   137      def __new__(_cls, scheme, netloc, path, query, fragment):
   138          'Create new instance of SplitResult(scheme, netloc, path, query, fragment)'
   139          return _tuple.__new__(_cls, (scheme, netloc, path, query, fragment))
   140  
   141      # @classmethod
   142      def _make(cls, iterable, new=tuple.__new__, len=len):
   143          'Make a new SplitResult object from a sequence or iterable'
   144          result = new(cls, iterable)
   145          if len(result) != 5:
   146              raise TypeError('Expected 5 arguments, got %d' % len(result))
   147          return result
   148      _make = classmethod(_make)
   149  
   150      def __repr__(self):
   151          'Return a nicely formatted representation string'
   152          return 'SplitResult(scheme=%r, netloc=%r, path=%r, query=%r, fragment=%r)' % self
   153  
   154      def _asdict(self):
   155          'Return a new OrderedDict which maps field names to their values'
   156          return OrderedDict(zip(self._fields, self))
   157  
   158      def _replace(_self, **kwds):
   159          'Return a new SplitResult object replacing specified fields with new values'
   160          result = _self._make(map(kwds.pop, ('scheme', 'netloc', 'path', 'query', 'fragment'), _self))
   161          if kwds:
   162              raise ValueError('Got unexpected field names: %r' % kwds.keys())
   163          return result
   164  
   165      def __getnewargs__(self):
   166          'Return self as a plain tuple.  Used by copy and pickle.'
   167          return tuple(self)
   168  
   169      __dict__ = _property(_asdict)
   170  
   171      def __getstate__(self):
   172          'Exclude the OrderedDict from pickling'
   173          pass
   174  
   175      scheme = _property(_itemgetter(0), doc='Alias for field number 0')
   176  
   177      netloc = _property(_itemgetter(1), doc='Alias for field number 1')
   178  
   179      path = _property(_itemgetter(2), doc='Alias for field number 2')
   180  
   181      query = _property(_itemgetter(3), doc='Alias for field number 3')
   182  
   183      fragment = _property(_itemgetter(4), doc='Alias for field number 4')
   184  
   185  # class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin):
   186  class SplitResult(_SplitResult, ResultMixin):
   187  
   188      __slots__ = ()
   189  
   190      def geturl(self):
   191          return urlunsplit(self)
   192  
   193  class _ParseResult(tuple):
   194      'ParseResult(scheme, netloc, path, params, query, fragment)'
   195  
   196      __slots__ = ()
   197  
   198      _fields = ('scheme', 'netloc', 'path', 'params', 'query', 'fragment')
   199  
   200      def __new__(_cls, scheme, netloc, path, params, query, fragment):
   201          'Create new instance of ParseResult(scheme, netloc, path, params, query, fragment)'
   202          return _tuple.__new__(_cls, (scheme, netloc, path, params, query, fragment))
   203  
   204      # @classmethod
   205      def _make(cls, iterable, new=tuple.__new__, len=len):
   206          'Make a new ParseResult object from a sequence or iterable'
   207          result = new(cls, iterable)
   208          if len(result) != 6:
   209              raise TypeError('Expected 6 arguments, got %d' % len(result))
   210          return result
   211      _make = classmethod(_make)
   212  
   213      def __repr__(self):
   214          'Return a nicely formatted representation string'
   215          return 'ParseResult(scheme=%r, netloc=%r, path=%r, params=%r, query=%r, fragment=%r)' % self
   216  
   217      def _asdict(self):
   218          'Return a new OrderedDict which maps field names to their values'
   219          return OrderedDict(zip(self._fields, self))
   220  
   221      def _replace(_self, **kwds):
   222          'Return a new ParseResult object replacing specified fields with new values'
   223          result = _self._make(map(kwds.pop, ('scheme', 'netloc', 'path', 'params', 'query', 'fragment'), _self))
   224          if kwds:
   225              raise ValueError('Got unexpected field names: %r' % kwds.keys())
   226          return result
   227  
   228      def __getnewargs__(self):
   229          'Return self as a plain tuple.  Used by copy and pickle.'
   230          return tuple(self)
   231  
   232      __dict__ = _property(_asdict)
   233  
   234      def __getstate__(self):
   235          'Exclude the OrderedDict from pickling'
   236          pass
   237  
   238      scheme = _property(_itemgetter(0), doc='Alias for field number 0')
   239  
   240      netloc = _property(_itemgetter(1), doc='Alias for field number 1')
   241  
   242      path = _property(_itemgetter(2), doc='Alias for field number 2')
   243  
   244      params = _property(_itemgetter(3), doc='Alias for field number 3')
   245  
   246      query = _property(_itemgetter(4), doc='Alias for field number 4')
   247  
   248      fragment = _property(_itemgetter(5), doc='Alias for field number 5')
   249  
   250  # class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin):
   251  class ParseResult(_ParseResult, ResultMixin):
   252  
   253      __slots__ = ()
   254  
   255      def geturl(self):
   256          return urlunparse(self)
   257  
   258  
   259  def urlparse(url, scheme='', allow_fragments=True):
   260      """Parse a URL into 6 components:
   261      <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
   262      Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
   263      Note that we don't break the components up in smaller bits
   264      (e.g. netloc is a single string) and we don't expand % escapes."""
   265      tuple = urlsplit(url, scheme, allow_fragments)
   266      scheme, netloc, url, query, fragment = tuple
   267      if scheme in uses_params and ';' in url:
   268          url, params = _splitparams(url)
   269      else:
   270          params = ''
   271      return ParseResult(scheme, netloc, url, params, query, fragment)
   272  
   273  def _splitparams(url):
   274      if '/'  in url:
   275          i = url.find(';', url.rfind('/'))
   276          if i < 0:
   277              return url, ''
   278      else:
   279          i = url.find(';')
   280      return url[:i], url[i+1:]
   281  
   282  def _splitnetloc(url, start=0):
   283      delim = len(url)   # position of end of domain part of url, default is end
   284      for c in '/?#':    # look for delimiters; the order is NOT important
   285          wdelim = url.find(c, start)        # find first of this delim
   286          if wdelim >= 0:                    # if found
   287              delim = min(delim, wdelim)     # use earliest delim position
   288      return url[start:delim], url[delim:]   # return (domain, rest)
   289  
   290  def urlsplit(url, scheme='', allow_fragments=True):
   291      """Parse a URL into 5 components:
   292      <scheme>://<netloc>/<path>?<query>#<fragment>
   293      Return a 5-tuple: (scheme, netloc, path, query, fragment).
   294      Note that we don't break the components up in smaller bits
   295      (e.g. netloc is a single string) and we don't expand % escapes."""
   296      allow_fragments = bool(allow_fragments)
   297      key = url, scheme, allow_fragments, type(url), type(scheme)
   298      cached = _parse_cache.get(key, None)
   299      if cached:
   300          return cached
   301      if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
   302          clear_cache()
   303      netloc = query = fragment = ''
   304      i = url.find(':')
   305      if i > 0:
   306          if url[:i] == 'http': # optimize the common case
   307              scheme = url[:i].lower()
   308              url = url[i+1:]
   309              if url[:2] == '//':
   310                  netloc, url = _splitnetloc(url, 2)
   311                  if (('[' in netloc and ']' not in netloc) or
   312                          (']' in netloc and '[' not in netloc)):
   313                      raise ValueError("Invalid IPv6 URL")
   314              if allow_fragments and '#' in url:
   315                  url, fragment = url.split('#', 1)
   316              if '?' in url:
   317                  url, query = url.split('?', 1)
   318              v = SplitResult(scheme, netloc, url, query, fragment)
   319              _parse_cache[key] = v
   320              return v
   321          for c in url[:i]:
   322              if c not in scheme_chars:
   323                  break
   324          else:
   325              # make sure "url" is not actually a port number (in which case
   326              # "scheme" is really part of the path)
   327              rest = url[i+1:]
   328              if not rest or any(c not in '0123456789' for c in rest):
   329                  # not a port number
   330                  scheme, url = url[:i].lower(), rest
   331  
   332      if url[:2] == '//':
   333          netloc, url = _splitnetloc(url, 2)
   334          if (('[' in netloc and ']' not in netloc) or
   335                  (']' in netloc and '[' not in netloc)):
   336              raise ValueError("Invalid IPv6 URL")
   337      if allow_fragments and '#' in url:
   338          url, fragment = url.split('#', 1)
   339      if '?' in url:
   340          url, query = url.split('?', 1)
   341      v = SplitResult(scheme, netloc, url, query, fragment)
   342      _parse_cache[key] = v
   343      return v
   344  
   345  def urlunparse(data):
   346      """Put a parsed URL back together again.  This may result in a
   347      slightly different, but equivalent URL, if the URL that was parsed
   348      originally had redundant delimiters, e.g. a ? with an empty query
   349      (the draft states that these are equivalent)."""
   350      scheme, netloc, url, params, query, fragment = data
   351      if params:
   352          url = "%s;%s" % (url, params)
   353      return urlunsplit((scheme, netloc, url, query, fragment))
   354  
   355  def urlunsplit(data):
   356      """Combine the elements of a tuple as returned by urlsplit() into a
   357      complete URL as a string. The data argument can be any five-item iterable.
   358      This may result in a slightly different, but equivalent URL, if the URL that
   359      was parsed originally had unnecessary delimiters (for example, a ? with an
   360      empty query; the RFC states that these are equivalent)."""
   361      scheme, netloc, url, query, fragment = data
   362      if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
   363          if url and url[:1] != '/': url = '/' + url
   364          url = '//' + (netloc or '') + url
   365      if scheme:
   366          url = scheme + ':' + url
   367      if query:
   368          url = url + '?' + query
   369      if fragment:
   370          url = url + '#' + fragment
   371      return url
   372  
   373  def urljoin(base, url, allow_fragments=True):
   374      """Join a base URL and a possibly relative URL to form an absolute
   375      interpretation of the latter."""
   376      if not base:
   377          return url
   378      if not url:
   379          return base
   380      bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
   381              urlparse(base, '', allow_fragments)
   382      scheme, netloc, path, params, query, fragment = \
   383              urlparse(url, bscheme, allow_fragments)
   384      if scheme != bscheme or scheme not in uses_relative:
   385          return url
   386      if scheme in uses_netloc:
   387          if netloc:
   388              return urlunparse((scheme, netloc, path,
   389                                 params, query, fragment))
   390          netloc = bnetloc
   391      if path[:1] == '/':
   392          return urlunparse((scheme, netloc, path,
   393                             params, query, fragment))
   394      if not path and not params:
   395          path = bpath
   396          params = bparams
   397          if not query:
   398              query = bquery
   399          return urlunparse((scheme, netloc, path,
   400                             params, query, fragment))
   401      segments = bpath.split('/')[:-1] + path.split('/')
   402      # XXX The stuff below is bogus in various ways...
   403      if segments[-1] == '.':
   404          segments[-1] = ''
   405      while '.' in segments:
   406          segments.remove('.')
   407      while 1:
   408          i = 1
   409          n = len(segments) - 1
   410          while i < n:
   411              if (segments[i] == '..'
   412                  and segments[i-1] not in ('', '..')):
   413                  del segments[i-1:i+1]
   414                  break
   415              i = i+1
   416          else:
   417              break
   418      if segments == ['', '..']:
   419          segments[-1] = ''
   420      elif len(segments) >= 2 and segments[-1] == '..':
   421          segments[-2:] = ['']
   422      return urlunparse((scheme, netloc, '/'.join(segments),
   423                         params, query, fragment))
   424  
   425  def urldefrag(url):
   426      """Removes any existing fragment from URL.
   427  
   428      Returns a tuple of the defragmented URL and the fragment.  If
   429      the URL contained no fragments, the second element is the
   430      empty string.
   431      """
   432      if '#' in url:
   433          s, n, p, a, q, frag = urlparse(url)
   434          defrag = urlunparse((s, n, p, a, q, ''))
   435          return defrag, frag
   436      else:
   437          return url, ''
   438  
   439  try:
   440      unicode
   441  except NameError:
   442      def _is_unicode(x):
   443          return 0
   444  else:
   445      def _is_unicode(x):
   446          return isinstance(x, unicode)
   447  
   448  # unquote method for parse_qs and parse_qsl
   449  # Cannot use directly from urllib as it would create a circular reference
   450  # because urllib uses urlparse methods (urljoin).  If you update this function,
   451  # update it also in urllib.  This code duplication does not existin in Python3.
   452  
   453  _hexdig = '0123456789ABCDEFabcdef'
   454  _hextochr = dict((a+b, chr(int(a+b,16)))
   455                   for a in _hexdig for b in _hexdig)
   456  _asciire = re.compile('([\x00-\x7f]+)')
   457  
   458  def unquote(s):
   459      """unquote('abc%20def') -> 'abc def'."""
   460      if _is_unicode(s):
   461          if '%' not in s:
   462              return s
   463          bits = _asciire.split(s)
   464          res = [bits[0]]
   465          append = res.append
   466          for i in range(1, len(bits), 2):
   467              append(unquote(str(bits[i])).decode('latin1'))
   468              append(bits[i + 1])
   469          return ''.join(res)
   470  
   471      bits = s.split('%')
   472      # fastpath
   473      if len(bits) == 1:
   474          return s
   475      res = [bits[0]]
   476      append = res.append
   477      for item in bits[1:]:
   478          try:
   479              append(_hextochr[item[:2]])
   480              append(item[2:])
   481          except KeyError:
   482              append('%')
   483              append(item)
   484      return ''.join(res)
   485  
   486  def parse_qs(qs, keep_blank_values=0, strict_parsing=0):
   487      """Parse a query given as a string argument.
   488  
   489          Arguments:
   490  
   491          qs: percent-encoded query string to be parsed
   492  
   493          keep_blank_values: flag indicating whether blank values in
   494              percent-encoded queries should be treated as blank strings.
   495              A true value indicates that blanks should be retained as
   496              blank strings.  The default false value indicates that
   497              blank values are to be ignored and treated as if they were
   498              not included.
   499  
   500          strict_parsing: flag indicating what to do with parsing errors.
   501              If false (the default), errors are silently ignored.
   502              If true, errors raise a ValueError exception.
   503      """
   504      dict = {}
   505      for name, value in parse_qsl(qs, keep_blank_values, strict_parsing):
   506          if name in dict:
   507              dict[name].append(value)
   508          else:
   509              dict[name] = [value]
   510      return dict
   511  
   512  def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
   513      """Parse a query given as a string argument.
   514  
   515      Arguments:
   516  
   517      qs: percent-encoded query string to be parsed
   518  
   519      keep_blank_values: flag indicating whether blank values in
   520          percent-encoded queries should be treated as blank strings.  A
   521          true value indicates that blanks should be retained as blank
   522          strings.  The default false value indicates that blank values
   523          are to be ignored and treated as if they were  not included.
   524  
   525      strict_parsing: flag indicating what to do with parsing errors. If
   526          false (the default), errors are silently ignored. If true,
   527          errors raise a ValueError exception.
   528  
   529      Returns a list, as G-d intended.
   530      """
   531      pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
   532      r = []
   533      for name_value in pairs:
   534          if not name_value and not strict_parsing:
   535              continue
   536          nv = name_value.split('=', 1)
   537          if len(nv) != 2:
   538              if strict_parsing:
   539                  raise ValueError, "bad query field: %r" % (name_value,)
   540              # Handle case of a control-name with no equal sign
   541              if keep_blank_values:
   542                  nv.append('')
   543              else:
   544                  continue
   545          if len(nv[1]) or keep_blank_values:
   546              name = unquote(nv[0].replace('+', ' '))
   547              value = unquote(nv[1].replace('+', ' '))
   548              r.append((name, value))
   549  
   550      return r