How to use the lxml.etree.XMLParser function in lxml

To help you get started, we’ve selected a few lxml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github RUB-NDS / DTD-Attacks / code / python / lxml / testLxml.py View on Github external
def testURLInvocation_externalGeneralEntity_attribute_defaults(self):                                    
		#Reset the server back to "0"                                           
		r = requests.get("http://127.0.0.1:5000/reset")                         
		url_counter = "http://127.0.0.1:5000/getCounter"                        
		r = requests.get(url_counter)                                           
		request_content = r.text.replace("\r\n","")                             
		self.assertEqual("0", request_content)  

		parser = XMLParser(attribute_defaults=True) 
		with self.assertRaises(XMLSyntaxError):
			root = parse('../../xml_files_windows/ssrf/url_invocation_externalGeneralEntity.xml',parser)

		#Check if a request has been made                                       
		r = requests.get(url_counter)                                           
		request_content = r.text.replace("\r\n","")                             
		self.assertEqual("0", request_content)
github RUB-NDS / DTD-Attacks / code / python / lxml / testLxml.py View on Github external
def testDOS_entitySize(self):
			parser = XMLParser()
			tree = parse('../../xml_files_windows/dos/dos_entitySize.xml',parser)
			root = tree.getroot()
			count = root.text.count("dos")
			expectedCount = 3400000 
			self.assertEqual(expectedCount, count)
github RUB-NDS / DTD-Attacks / code / python / lxml / testLxml.py View on Github external
def testParameterEntity_doctype_dtd_validation_no_network(self):		
		parser = XMLParser(dtd_validation=True, no_network=False)
		tree = parse('../../xml_files_windows/xxep/parameterEntity_doctype.xml',parser)
		root = tree.getroot()
		self.assertEquals("it_works", root.text)
github Marduke / CalimeplPacz / plugins / kdb / __init__.py View on Github external
XPath = partial(etree.XPath, namespaces=self.NAMESPACES)
        entry = XPath('//x:table[@class="default_class_table"][1]/x:tbody/x:tr')

        query = self.create_query(title=title, authors=authors,
                identifiers=identifiers)
        if not query:
            self.log('Insufficient metadata to construct query')
            return

        br = self.browser
        try:
            self.log('download book page search %s'%query)
            raw = br.open(query, timeout=timeout).read().strip()
            try:
                parser = etree.XMLParser(recover=True)
                clean = clean_ascii_chars(raw)
                feed = fromstring(clean, parser=parser)
            except Exception as e:
                self.log.exception('Failed to parse xpath')
        except Exception as e:
            self.log.exception('Failed to make identify query: %r'%query)

        try:
            entries = entry(feed)
            self.log('Found %i matches'%len(entries))
            act_authors = []
            for act in authors:
                act_authors.append(act.split(" ")[-1])

            ident_found = False
            tmp_entries = []
github mynlp / ccg2lambda / jigg2transccg.py View on Github external
""")

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION)
    parser.add_argument("jigg_fname", help="Jigg XML filename with CCG trees.")
    args = parser.parse_args()
      
    if not os.path.exists(args.jigg_fname):
        print('File does not exist: {0}'.format(args.jigg_fname), file=sys.stderr)
        parser.print_help(file=sys.stderr)
        sys.exit(1)
    
    logging.basicConfig(level=logging.WARNING)

    parser = etree.XMLParser(remove_blank_text=True)
    jigg_doc = etree.parse(args.jigg_fname, parser)
    transccg_doc = jigg2transccg_doc(jigg_doc)
    result = etree.tostring(transccg_doc, encoding='utf-8',
        xml_declaration=True, pretty_print=True)
    print(result.decode('utf-8'))
github lubosz / radiotray / radiotraylib / AsxPlaylistDecoder.py View on Github external
def extractStream(self,  url):
            
            print "Downloading playlist..."
            
            req = urllib2.Request(url)
            f = urllib2.urlopen(req)
            str = f.read()
            f.close()
            
            print "Playlist downloaded"
            print "Decoding playlist..."

	    parser = etree.XMLParser(recover=True)
	    root = etree.parse(StringIO(str),parser)
	    result = root.xpath("//ref/@href")

	    if (len(result) > 0):
		return result
   	    else:
            	return None
github opencord / xos / xos / xos / xml_util.py View on Github external
def parse_xml(self, xml):
        """
        parse rspec into etree
        """
        parser = etree.XMLParser(remove_blank_text=True)
        try:
            tree = etree.parse(xml, parser)
        except IOError:
            # 'rspec' file doesnt exist. 'rspec' is proably an xml string
            try:
                tree = etree.parse(StringIO(xml), parser)
            except Exception as e:
                raise Exception(str(e))
        root = tree.getroot()
        self.namespaces = dict(root.nsmap)
        # set namespaces map
        if "default" not in self.namespaces and None in self.namespaces:
            # If the 'None' exist, then it's pointing to the default namespace. This makes
            # it hard for us to write xpath queries for the default naemspace because lxml
            # wont understand a None prefix. We will just associate the default namespeace
            # with a key named 'default'.
github willu47 / git_diff_xlsx / parse_xml.py View on Github external
def get_shared_strings(shared_strings_file):
    '''
    Obtains the shared strings from the specified xml file and returns them
    in a list
    '''
    shared_string_dict = []
    parser = etree.XMLParser(ns_clean=True)
    stree = objectify.parse(shared_strings_file, parser)
    sroot = stree.getroot()
    srows = list(sroot)
    for srow in srows:
        shared_string_dict.append(process_shared_string_row(srow))
    return shared_string_dict
github mvantellingen / python-zeep / src / zeep / loader.py View on Github external
:param content: The XML string
    :type content: str
    :param transport: The transport instance to load imported documents
    :type transport: zeep.transports.Transport
    :param base_url: The base url of the document, used to make relative
      lookups absolute.
    :type base_url: str
    :param settings: A zeep.settings.Settings object containing parse settings.
    :type settings: zeep.settings.Settings
    :returns: The document root
    :rtype: lxml.etree._Element

    """
    settings = settings or Settings()
    recover = not settings.strict
    parser = etree.XMLParser(
        remove_comments=True,
        resolve_entities=False,
        recover=recover,
        huge_tree=settings.xml_huge_tree,
    )
    parser.resolvers.add(ImportResolver(transport))
    try:
        return fromstring(
            content,
            parser=parser,
            base_url=base_url,
            forbid_dtd=settings.forbid_dtd,
            forbid_entities=settings.forbid_entities,
        )
    except etree.XMLSyntaxError as exc:
        raise XMLSyntaxError(
github flow-project / flow / flow / core / kernel / network / traci.py View on Github external
Returns
        -------
        net_data : dict 
            Key = name of the edge/junction
            Element = lanes, speed, length
        connection_data : dict < dict < list < (edge, pos) > > >
            Key = "prev" or "next", indicating coming from or to this
            edge/lane pair
                Key = name of the edge
                    Key = lane index
                    Element = list of edge/lane pairs preceding or following
                    the edge/lane pairs
        """
        # import the .net.xml file containing all edge/type data
        parser = etree.XMLParser(recover=True)
        net_path = os.path.join(self.cfg_path, self.netfn) \
            if net_params.template is None else self.netfn
        tree = ElementTree.parse(net_path, parser=parser)
        root = tree.getroot()

        # Collect information on the available types (if any are available).
        # This may be used when specifying some edge data.
        types_data = dict()

        for typ in root.findall('type'):
            type_id = typ.attrib['id']
            types_data[type_id] = dict()

            if 'speed' in typ.attrib:
                types_data[type_id]['speed'] = float(typ.attrib['speed'])
            else: