github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/defusedxml/lxml.py (about) 1 # defusedxml 2 # 3 # Copyright (c) 2013 by Christian Heimes <christian@python.org> 4 # Licensed to PSF under a Contributor Agreement. 5 # See http://www.python.org/psf/license for licensing details. 6 """Example code for lxml.etree protection 7 8 The code has NO protection against decompression bombs. 9 """ 10 from __future__ import print_function, absolute_import 11 12 import threading 13 from lxml import etree as _etree 14 15 from .common import DTDForbidden, EntitiesForbidden, NotSupportedError 16 17 LXML3 = _etree.LXML_VERSION[0] >= 3 18 19 __origin__ = "lxml.etree" 20 21 tostring = _etree.tostring 22 23 24 class RestrictedElement(_etree.ElementBase): 25 """A restricted Element class that filters out instances of some classes 26 """ 27 __slots__ = () 28 # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment) 29 blacklist = _etree._Entity 30 31 def _filter(self, iterator): 32 blacklist = self.blacklist 33 for child in iterator: 34 if isinstance(child, blacklist): 35 continue 36 yield child 37 38 def __iter__(self): 39 iterator = super(RestrictedElement, self).__iter__() 40 return self._filter(iterator) 41 42 def iterchildren(self, tag=None, reversed=False): 43 iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed) 44 return self._filter(iterator) 45 46 def iter(self, tag=None, *tags): 47 iterator = super(RestrictedElement, self).iter(tag=tag, *tags) 48 return self._filter(iterator) 49 50 def iterdescendants(self, tag=None, *tags): 51 iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags) 52 return self._filter(iterator) 53 54 def itersiblings(self, tag=None, preceding=False): 55 iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding) 56 return self._filter(iterator) 57 58 def getchildren(self): 59 iterator = super(RestrictedElement, self).__iter__() 60 return list(self._filter(iterator)) 61 62 def getiterator(self, tag=None): 63 iterator = super(RestrictedElement, self).getiterator(tag) 64 return self._filter(iterator) 65 66 67 class GlobalParserTLS(threading.local): 68 """Thread local context for custom parser instances 69 """ 70 parser_config = { 71 'resolve_entities': False, 72 #'remove_comments': True, 73 #'remove_pis': True, 74 } 75 76 element_class = RestrictedElement 77 78 def createDefaultParser(self): 79 parser = _etree.XMLParser(**self.parser_config) 80 element_class = self.element_class 81 if self.element_class is not None: 82 lookup = _etree.ElementDefaultClassLookup(element=element_class) 83 parser.set_element_class_lookup(lookup) 84 return parser 85 86 def setDefaultParser(self, parser): 87 self._default_parser = parser 88 89 def getDefaultParser(self): 90 parser = getattr(self, "_default_parser", None) 91 if parser is None: 92 parser = self.createDefaultParser() 93 self.setDefaultParser(parser) 94 return parser 95 96 97 _parser_tls = GlobalParserTLS() 98 getDefaultParser = _parser_tls.getDefaultParser 99 100 101 def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True): 102 """Check docinfo of an element tree for DTD and entity declarations 103 104 The check for entity declarations needs lxml 3 or newer. lxml 2.x does 105 not support dtd.iterentities(). 106 """ 107 docinfo = elementtree.docinfo 108 if docinfo.doctype: 109 if forbid_dtd: 110 raise DTDForbidden(docinfo.doctype, 111 docinfo.system_url, 112 docinfo.public_id) 113 if forbid_entities and not LXML3: 114 # lxml < 3 has no iterentities() 115 raise NotSupportedError("Unable to check for entity declarations " 116 "in lxml 2.x") 117 118 if forbid_entities: 119 for dtd in docinfo.internalDTD, docinfo.externalDTD: 120 if dtd is None: 121 continue 122 for entity in dtd.iterentities(): 123 raise EntitiesForbidden(entity.name, entity.content, None, 124 None, None, None) 125 126 127 def parse(source, parser=None, base_url=None, forbid_dtd=False, 128 forbid_entities=True): 129 if parser is None: 130 parser = getDefaultParser() 131 elementtree = _etree.parse(source, parser, base_url=base_url) 132 check_docinfo(elementtree, forbid_dtd, forbid_entities) 133 return elementtree 134 135 136 def fromstring(text, parser=None, base_url=None, forbid_dtd=False, 137 forbid_entities=True): 138 if parser is None: 139 parser = getDefaultParser() 140 rootelement = _etree.fromstring(text, parser, base_url=base_url) 141 elementtree = rootelement.getroottree() 142 check_docinfo(elementtree, forbid_dtd, forbid_entities) 143 return rootelement 144 145 XML = fromstring 146 147 148 def iterparse(*args, **kwargs): 149 raise NotSupportedError("defused lxml.etree.iterparse not available")