How to use the oletools.ooxml.XmlParser function in oletools

To help you get started, we’ve selected a few oletools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github decalage2 / oletools / oletools / ooxml.py View on Github external
def test():
    """ Main function, called when running file as script

    see module doc for more info
    """
    log_helper.enable_logging(False, 'debug')
    if len(sys.argv) != 2:
        print(u'To test this code, give me a single file as arg')
        return 2

    # test get_type
    print('Detected type: ' + get_type(sys.argv[1]))

    # test complete parsing
    parser = XmlParser(sys.argv[1])
    for subfile, elem, depth in parser.iter_xml():
        if depth < 4:
            print(u'{0} {1}{2}'.format(subfile, '  ' * depth, debug_str(elem)))
    for index, (subfile, content_type, _) in enumerate(parser.iter_non_xml()):
        print(u'Non-XML subfile: {0} of type {1}'
              .format(subfile, content_type or u'unknown'))
        if index > 100:
            print(u'...')
            break

    log_helper.end_logging()

    return 0
github decalage2 / oletools / oletools / oleobj.py View on Github external
# TODO: option to extract objects to files (false by default)
    print('-'*79)
    print('File: %r' % filename)
    index = 1

    # do not throw errors but remember them and try continue with other streams
    err_stream = False
    err_dumping = False
    did_dump = False

    xml_parser = None
    if is_zipfile(filename):
        log.info('file could be an OOXML file, looking for relationships with '
                 'external links')
        xml_parser = XmlParser(filename)
        for relationship, target in find_external_relationships(xml_parser):
            did_dump = True
            print("Found relationship '%s' with external link %s" % (relationship, target))

    # look for ole files inside file (e.g. unzip docx)
    # have to finish work on every ole stream inside iteration, since handles
    # are closed in find_ole
    for ole in find_ole(filename, data, xml_parser):
        if ole is None:    # no ole file found
            continue

        for path_parts in ole.listdir():
            stream_path = '/'.join(path_parts)
            log.debug('Checking stream %r', stream_path)
            if path_parts[-1] == '\x01Ole10Native':
                stream = None
github decalage2 / oletools / oletools / ooxml.py View on Github external
def get_type(filename):
    """ return one of the DOCTYPE_* constants or raise error """
    parser = XmlParser(filename)
    if parser.is_single_xml():
        match = None
        with open(filename, 'r') as handle:
            match = re.search(OFFICE_XML_PROGID_REGEX, handle.read(1024))
        if not match:
            return DOCTYPE_NONE
        prog_id = match.groups()[0]
        if prog_id == WORD_XML_PROG_ID:
            return DOCTYPE_WORD_XML
        if prog_id == EXCEL_XML_PROG_ID:
            return DOCTYPE_EXCEL_XML
        return DOCTYPE_NONE

    is_doc = False
    is_xls = False
    is_ppt = False
github decalage2 / oletools / oletools / oleobj.py View on Github external
try:
        if olefile.isOleFile(arg_for_ole):
            if is_ppt(arg_for_ole):
                log.info('is ppt file: ' + filename)
                for ole in find_ole_in_ppt(arg_for_ole):
                    yield ole
                    ole = None   # is closed in find_ole_in_ppt
            # in any case: check for embedded stuff in non-sectored streams
            log.info('is ole file: ' + filename)
            ole = olefile.OleFileIO(arg_for_ole)
            yield ole
        elif xml_parser is not None or is_zipfile(arg_for_zip):
            # keep compatibility with 3rd-party code that calls this function
            # directly without providing an XmlParser instance
            if xml_parser is None:
                xml_parser = XmlParser(arg_for_zip)
                # force iteration so XmlParser.iter_non_xml() returns data
                [x for x in xml_parser.iter_xml()]

            log.info('is zip file: ' + filename)
            # we looped through the XML files before, now we can
            # iterate the non-XML files looking for ole objects
            for subfile, _, file_handle in xml_parser.iter_non_xml():
                try:
                    head = file_handle.read(len(olefile.MAGIC))
                except RuntimeError:
                    log.error('zip is encrypted: ' + filename)
                    yield None
                    continue

                if head == olefile.MAGIC:
                    file_handle.seek(0)