How to use the lxml.etree.parse function in lxml

To help you get started, we’ve selected a few lxml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Bcfg2 / bcfg2 / src / lib / Server / Lint / Validate.py View on Github external
def validate(self, filename, schemafile, schema=None):
        """validate a file against the given lxml.etree.Schema.
        return True on success, False on failure """
        if schema is None:
            # if no schema object was provided, instantiate one
            try:
                schema = lxml.etree.XMLSchema(lxml.etree.parse(schemafile))
            except:
                self.LintError("schema-failed-to-parse",
                               "Failed to process schema %s" % schemafile)
                return False

        try:
            datafile = lxml.etree.parse(filename)
        except SyntaxError:
            lint = Popen(["xmllint", filename], stdout=PIPE, stderr=STDOUT)
            self.LintError("xml-failed-to-parse",
                           "%s fails to parse:\n%s" % (filename,
                                                       lint.communicate()[0]))
            lint.wait()
            return False
        except IOError:
            self.LintError("xml-failed-to-read",
                           "Failed to open file %s" % filename)
            return False
    
        if not schema.validate(datafile):
            cmd = ["xmllint"]
            if self.files is None:
                cmd.append("--xinclude")
github KurtJacobson / hazzy / hazzy / gui / hazzy_window.py View on Github external
def load_from_xml(self):

        if not os.path.exists(Paths.XML_FILE):
            return

        try:
            tree = etree.parse(Paths.XML_FILE)
        except etree.XMLSyntaxError as e:
            error_str = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
            log.error(error_str)
            return

        root = tree.getroot()

        # Windows (Might support multiple windows in future, so iterate)
        for window in root.iter('window'):
            window_name = window.get('name')
            window_title = window.get('title')

            props = self.get_propertys(window)

            self.set_default_size(int(props['w']), int(props['h']))
            self.move(int(props['x']), int(props['y']))
github openmpf / openmpf / trunk / ansible / install / ansible / roles / upgrade / files / customPipelines / customPipelines.py View on Github external
def load_xml_from_file(xml_filepath):
    """

    :param xml_filepath: Absolute filepath to an XML file. Example: /opt/mpf/data/Actions.xml
    :return: An ElementTree object.
    """

    # Use custom parser to remove blank text
    custom_parser = et.XMLParser(remove_blank_text=True)

    # Parse the XML file into a tree
    xml_tree = et.parse(source=xml_filepath, parser=custom_parser)

    # Return an ElementTree object parsed from the input file.
    return xml_tree
github kieranjol / IFIscripts / as11fixity.py View on Github external
)
    if checkfile is True:
        print "CSV file already exists."
    for dirpath, dirnames, filenames in os.walk(starting_dir):
        for filename in [f for f in filenames if f.endswith(".mxf")]:
            full_path = os.path.join(dirpath, filename)
            file_no_path = os.path.basename(full_path)
            file_no_extension = os.path.splitext(os.path.basename(file_no_path))[0]
            xml_file = file_no_extension + '.xml'
            full_xml_path = os.path.join(dirpath, xml_file)
            checkfile = os.path.isfile(os.path.join(dirpath, xml_file))
            if checkfile == False:
                print 'No XML file exists.'
            print "Generating md5 for ", filename
            mxf_checksum = str(digest_with_progress(full_path, 1024))
            dpp_xml_parse = etree.parse(full_xml_path)
            dpp_xml_namespace = dpp_xml_parse.xpath('namespace-uri(.)')
            #parsed values
            series_title = dpp_xml_parse.findtext(
                '//ns:SeriesTitle',
                namespaces={'ns':dpp_xml_namespace}
            )
            prog_title = dpp_xml_parse.findtext(
                '//ns:ProgrammeTitle',
                namespaces={'ns':dpp_xml_namespace}
            )
            ep_num = dpp_xml_parse.findtext(
                '//ns:EpisodeTitleNumber',
                namespaces={'ns':dpp_xml_namespace}
            )
            checksum = dpp_xml_parse.findtext(
                '//ns:MediaChecksumValue',
github Bcfg2 / bcfg2 / tools / create-rpm-pkglist.py View on Github external
def main():
    options, args = parse_command_line_parameters()
    filename = options.filename
    packagelist = transformXML()

    if options.show == True:
        tree = etree.parse(filename)
        for node in tree.findall("//Package"):
            print(node.attrib["name"])
        indent(packagelist.getroot())
        packagelist.write(filename, encoding="utf-8")

    if options.pkgversion == True:
        tree = etree.parse(filename)
        for node in tree.findall("//Package"):
            print("%s-%s" % (node.attrib["name"], node.attrib["version"]))

#FIXME : This should be changed to the standard way of optparser
#FIXME : Make an option available to strip the version number of the pkg
    if options.pkgversion == None and options.show == None:
        indent(packagelist.getroot())
        packagelist.write(filename, encoding="utf-8")
github Arelle / Arelle / arelle / plugin / xbrlDB / XbrlSemanticJsonDB.py View on Github external
headers=headers)
        try:
            with self.conn.open(request, timeout=self.timeout) as fp:
                results = fp.read().decode('utf-8')
            try:
                results = json.loads(results)
            except ValueError:
                pass # leave results as string
        except HTTPError as err:
            results = err.fp.read().decode('utf-8')
        if TRACEJSONFILE:
            with io.open(TRACEJSONFILE, "a", encoding='utf-8') as fh:
                fh.write("\n\n>>> received: \n{0}".format(str(results)))
        if isinstance(results, str) and query is not None:
            parser = etree.HTMLParser()
            htmlDoc = etree.parse(io.StringIO(results), parser)
            body = htmlDoc.find("//body")
            if body is not None:
                error = "".join(text for text in body.itertext())
            else:
                error = results
            raise XJDBException("jsonDB:DatabaseError",
                                _("%(activity)s not successful: %(error)s"),
                                activity=activity, error=error) 
        return results
github mbakeranalecta / sam / samparser.py View on Github external
html_string = "".join(samParser.serialize('html')).encode('utf-8')
                    else:
                        xml_string = "".join(samParser.serialize('xml')).encode('utf-8')


                    if intermediatefile:
                        with open(intermediatefile, "wb") as intermediate:
                            intermediate.write(xml_string)

                    if args.xslt:
                        try:
                            transform = etree.XSLT(etree.parse(args.xslt))
                        except FileNotFoundError as e:
                            raise SAMParserError(e.strerror + ' ' + e.filename)

                        xml_input = etree.parse(open(intermediatefile, 'r', encoding="utf-8-sig"))
                        try:
                            transformed = transform(xml_input)
                        except etree.XSLTError as e:
                            raise SAMParserError("XSLT processor reported error: " + str(e))
                        finally:
                            if transform.error_log:
                                SAM_parser_warning("Messages from the XSLT transformation:")
                                for entry in transform.error_log:
                                    print('message from line %s, col %s: %s' % (
                                        entry.line, entry.column, entry.message), file=sys.stderr)
                                    print('domain: %s (%d)' % (entry.domain_name, entry.domain), file=sys.stderr)
                                    print('type: %s (%d)' % (entry.type_name, entry.type), file=sys.stderr)
                                    print('level: %s (%d)' % (entry.level_name, entry.level), file=sys.stderr)


                        if transform.error_log:
github clalancette / oz / oz / TDL.py View on Github external
def __init__(self, xmlstring, rootpw_required=False):
        # open the XML document
        tree = lxml.etree.parse(StringIO(xmlstring))
        tree.xinclude()
        self.doc = tree.getroot()

        # then validate the schema
        relaxng = lxml.etree.RelaxNG(file=os.path.join(os.path.dirname(__file__),
                                                       'tdl.rng'))
        valid = relaxng.validate(self.doc)
        if not valid:
            errstr = "\nXML schema validation failed:\n"
            for error in relaxng.error_log:
                errstr += "\tline %s: %s\n" % (error.line, error.message)
            raise oz.OzException.OzException(errstr)

        template = self.doc.xpath('/template')
        if len(template) != 1:
            raise oz.OzException.OzException("Expected 1 template section in TDL, saw %d" % (len(template)))
github theatlantic / django-xml / djxml / xmlmodels / fields.py View on Github external
def get_schematron_tree(self, model_instance):
        if self._schematron_tree is None:
            parser = self.parser
            if parser is None:
                parser = model_instance._meta.get_parser()
            if self.schematron_file is not None:
                self._schematron_tree = etree.parse(self.schematron_file, parser)
            elif self.schematron_string is not None:
                self._schematron_tree = etree.XML(self.schematron_string, parser)
        return self._schematron_tree
github braph / emuparadise / emu_browse.py View on Github external
def get_as_etree(url):
    response = requests.get(url)
    parser = etree.HTMLParser()
    return etree.parse(StringIO(response.text), parser)