github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/defusedxml/lxml.py (about)

     1  # defusedxml
     2  #
     3  # Copyright (c) 2013 by Christian Heimes <christian@python.org>
     4  # Licensed to PSF under a Contributor Agreement.
     5  # See http://www.python.org/psf/license for licensing details.
     6  """Example code for lxml.etree protection
     7  
     8  The code has NO protection against decompression bombs.
     9  """
    10  from __future__ import print_function, absolute_import
    11  
    12  import threading
    13  from lxml import etree as _etree
    14  
    15  from .common import DTDForbidden, EntitiesForbidden, NotSupportedError
    16  
    17  LXML3 = _etree.LXML_VERSION[0] >= 3
    18  
    19  __origin__ = "lxml.etree"
    20  
    21  tostring = _etree.tostring
    22  
    23  
    24  class RestrictedElement(_etree.ElementBase):
    25      """A restricted Element class that filters out instances of some classes
    26      """
    27      __slots__ = ()
    28      # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment)
    29      blacklist = _etree._Entity
    30  
    31      def _filter(self, iterator):
    32          blacklist = self.blacklist
    33          for child in iterator:
    34              if isinstance(child, blacklist):
    35                  continue
    36              yield child
    37  
    38      def __iter__(self):
    39          iterator = super(RestrictedElement, self).__iter__()
    40          return self._filter(iterator)
    41  
    42      def iterchildren(self, tag=None, reversed=False):
    43          iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed)
    44          return self._filter(iterator)
    45  
    46      def iter(self, tag=None, *tags):
    47          iterator = super(RestrictedElement, self).iter(tag=tag, *tags)
    48          return self._filter(iterator)
    49  
    50      def iterdescendants(self, tag=None, *tags):
    51          iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags)
    52          return self._filter(iterator)
    53  
    54      def itersiblings(self, tag=None, preceding=False):
    55          iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding)
    56          return self._filter(iterator)
    57  
    58      def getchildren(self):
    59          iterator = super(RestrictedElement, self).__iter__()
    60          return list(self._filter(iterator))
    61  
    62      def getiterator(self, tag=None):
    63          iterator = super(RestrictedElement, self).getiterator(tag)
    64          return self._filter(iterator)
    65  
    66  
    67  class GlobalParserTLS(threading.local):
    68      """Thread local context for custom parser instances
    69      """
    70      parser_config = {
    71          'resolve_entities': False,
    72          #'remove_comments': True,
    73          #'remove_pis': True,
    74      }
    75  
    76      element_class = RestrictedElement
    77  
    78      def createDefaultParser(self):
    79          parser = _etree.XMLParser(**self.parser_config)
    80          element_class = self.element_class
    81          if self.element_class is not None:
    82              lookup = _etree.ElementDefaultClassLookup(element=element_class)
    83              parser.set_element_class_lookup(lookup)
    84          return parser
    85  
    86      def setDefaultParser(self, parser):
    87          self._default_parser = parser
    88  
    89      def getDefaultParser(self):
    90          parser = getattr(self, "_default_parser", None)
    91          if parser is None:
    92              parser = self.createDefaultParser()
    93              self.setDefaultParser(parser)
    94          return parser
    95  
    96  
    97  _parser_tls = GlobalParserTLS()
    98  getDefaultParser = _parser_tls.getDefaultParser
    99  
   100  
   101  def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
   102      """Check docinfo of an element tree for DTD and entity declarations
   103  
   104      The check for entity declarations needs lxml 3 or newer. lxml 2.x does
   105      not support dtd.iterentities().
   106      """
   107      docinfo = elementtree.docinfo
   108      if docinfo.doctype:
   109          if forbid_dtd:
   110              raise DTDForbidden(docinfo.doctype,
   111                                 docinfo.system_url,
   112                                 docinfo.public_id)
   113          if forbid_entities and not LXML3:
   114              # lxml < 3 has no iterentities()
   115              raise NotSupportedError("Unable to check for entity declarations "
   116                                      "in lxml 2.x")
   117  
   118      if forbid_entities:
   119          for dtd in docinfo.internalDTD, docinfo.externalDTD:
   120              if dtd is None:
   121                  continue
   122              for entity in dtd.iterentities():
   123                  raise EntitiesForbidden(entity.name, entity.content, None,
   124                                          None, None, None)
   125  
   126  
   127  def parse(source, parser=None, base_url=None, forbid_dtd=False,
   128            forbid_entities=True):
   129      if parser is None:
   130          parser = getDefaultParser()
   131      elementtree = _etree.parse(source, parser, base_url=base_url)
   132      check_docinfo(elementtree, forbid_dtd, forbid_entities)
   133      return elementtree
   134  
   135  
   136  def fromstring(text, parser=None, base_url=None, forbid_dtd=False,
   137                 forbid_entities=True):
   138      if parser is None:
   139          parser = getDefaultParser()
   140      rootelement = _etree.fromstring(text, parser, base_url=base_url)
   141      elementtree = rootelement.getroottree()
   142      check_docinfo(elementtree, forbid_dtd, forbid_entities)
   143      return rootelement
   144  
   145  XML = fromstring
   146  
   147  
   148  def iterparse(*args, **kwargs):
   149      raise NotSupportedError("defused lxml.etree.iterparse not available")