How to use the oletools.ooxml function in oletools

To help you get started, we’ve selected a few oletools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github decalage2 / oletools / tests / ooxml / test_basic.py View on Github external
def test_rough_doctype(self):
        """Checks all samples, expect either ole files or good ooxml output"""
        # map from extension to expected doctype
        ext2doc = dict(
            docx=ooxml.DOCTYPE_WORD, docm=ooxml.DOCTYPE_WORD,
            dotx=ooxml.DOCTYPE_WORD, dotm=ooxml.DOCTYPE_WORD,
            xml=(ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_WORD_XML),
            xlsx=ooxml.DOCTYPE_EXCEL, xlsm=ooxml.DOCTYPE_EXCEL,
            xlsb=ooxml.DOCTYPE_EXCEL, xlam=ooxml.DOCTYPE_EXCEL,
            xltx=ooxml.DOCTYPE_EXCEL, xltm=ooxml.DOCTYPE_EXCEL,
            pptx=ooxml.DOCTYPE_POWERPOINT, pptm=ooxml.DOCTYPE_POWERPOINT,
            ppsx=ooxml.DOCTYPE_POWERPOINT, ppsm=ooxml.DOCTYPE_POWERPOINT,
            potx=ooxml.DOCTYPE_POWERPOINT, potm=ooxml.DOCTYPE_POWERPOINT,
            ods=ooxml.DOCTYPE_NONE, odt=ooxml.DOCTYPE_NONE,
            odp=ooxml.DOCTYPE_NONE,
        )

        # files that are neither OLE nor xml:
        except_files = 'empty', 'text'
        except_extns = 'rtf', 'csv', 'zip'
github decalage2 / oletools / oletools / msodde.py View on Github external
def process_excel_xml(filepath):
    """ find dde links in xml files created with excel 2003 or excel 2007+

    TODO: did not manage to create dde-link in the 2007+-xml-format. Find out
          whether this is possible at all. If so, extend this function
    """
    dde_links = []
    parser = ooxml.XmlParser(filepath)
    for _, elem, _ in parser.iter_xml():
        tag = elem.tag.lower()
        if tag != 'cell' and not tag.endswith('}cell'):
            continue   # we are only interested in cells
        formula = None
        for key in elem.keys():
            if key.lower() == 'formula' or key.lower().endswith('}formula'):
                formula = elem.get(key)
                break
        if formula is None:
            continue
        logger.debug(u'found cell with formula {0}'.format(formula))
        match = re.match(XML_DDE_FORMAT, formula)
        if match:
            dde_links.append(u' '.join(match.groups()[:2]))
    return u'\n'.join(dde_links)
github decalage2 / oletools / oletools / msodde.py View on Github external
def process_xlsx(filepath):
    """ process an OOXML excel file (e.g. .xlsx or .xlsb or .xlsm) """
    dde_links = []
    parser = ooxml.XmlParser(filepath)
    for _, elem, _ in parser.iter_xml():
        tag = elem.tag.lower()
        if tag == 'ddelink' or tag.endswith('}ddelink'):
            # we have found a dde link. Try to get more info about it
            link_info = []
            if 'ddeService' in elem.attrib:
                link_info.append(elem.attrib['ddeService'])
            if 'ddeTopic' in elem.attrib:
                link_info.append(elem.attrib['ddeTopic'])
            dde_links.append(u' '.join(link_info))

    # binary parts, e.g. contained in .xlsb
    for subfile, content_type, handle in parser.iter_non_xml():
        try:
            logger.info('Parsing non-xml subfile {0} with content type {1}'
                        .format(subfile, content_type))
github malice-plugins / office / docs / office.py View on Github external
# Check for old office formats
        try:
            doctype = ooxml.get_type(__sessions__.current.file.path)
            OOXML_FILE = True
        except Exception as exc:
            OOXML_FILE = False

        # set defaults
        XLSX_FILE = False
        EXCEL_XML_FILE = False
        DOCX_FILE = False
        if OOXML_FILE is True:
            if doctype == ooxml.DOCTYPE_EXCEL:
                XLSX_FILE = True
            elif doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
                EXCEL_XML_FILE = True
            elif doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
                DOCX_FILE = True

        # Tests to check for valid Office structures.
        OLE_FILE = olefile.isOleFile(__sessions__.current.file.path)
        XML_FILE = zipfile.is_zipfile(__sessions__.current.file.path)
        if OLE_FILE:
            ole = olefile.OleFileIO(__sessions__.current.file.path)
        elif XML_FILE:
            zip_xml = zipfile.ZipFile(__sessions__.current.file.path, 'r')
        elif OLD_XML:
            pass
        elif MHT_FILE:
            pass
        elif DOCX_FILE:
github decalage2 / oletools / oletools / msodde.py View on Github external
return process_doc(ole)

    with open(filepath, 'rb') as file_handle:
        # TODO: here we should not assume this is a file on disk, filepath can be a file object
        if file_handle.read(4) == RTF_START:
            logger.debug('Process file as rtf')
            return process_rtf(file_handle, field_filter_mode)

    try:
        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    if doctype is None:
        logger.debug('Process file as csv')
        return process_csv(filepath)
    # could be docx; if not: this is the old default code path
    logger.debug('Process file as word 2007+ (docx)')
    return process_docx(filepath, field_filter_mode)
github decalage2 / oletools / oletools / msodde.py View on Github external
# TODO: here we should not assume this is a file on disk, filepath can be a file object
        if file_handle.read(4) == RTF_START:
            logger.debug('Process file as rtf')
            return process_rtf(file_handle, field_filter_mode)

    try:
        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    if doctype is None:
        logger.debug('Process file as csv')
        return process_csv(filepath)
    # could be docx; if not: this is the old default code path
    logger.debug('Process file as word 2007+ (docx)')
    return process_docx(filepath, field_filter_mode)
github decalage2 / oletools / oletools / msodde.py View on Github external
return process_rtf(file_handle, field_filter_mode)

    try:
        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    if doctype is None:
        logger.debug('Process file as csv')
        return process_csv(filepath)
    # could be docx; if not: this is the old default code path
    logger.debug('Process file as word 2007+ (docx)')
    return process_docx(filepath, field_filter_mode)
github viper-framework / viper / viper / modules / office.py View on Github external
return

        file_data = __sessions__.current.file.data
        if file_data.startswith(b'
github malice-plugins / office / docs / office.py View on Github external
return

        file_data = __sessions__.current.file.data
        if file_data.startswith(b'
github decalage2 / oletools / oletools / msodde.py View on Github external
return process_xls(filepath)
        if is_ppt(filepath):
            logger.debug('is ppt - cannot have DDE')
            return u''
        logger.debug('Process file as word 2003 (doc)')
        with olefile.OleFileIO(filepath, path_encoding=None) as ole:
            return process_doc(ole)

    with open(filepath, 'rb') as file_handle:
        # TODO: here we should not assume this is a file on disk, filepath can be a file object
        if file_handle.read(4) == RTF_START:
            logger.debug('Process file as rtf')
            return process_rtf(file_handle, field_filter_mode)

    try:
        doctype = ooxml.get_type(filepath)
        logger.debug('Detected file type: {0}'.format(doctype))
    except Exception as exc:
        logger.debug('Exception trying to xml-parse file: {0}'.format(exc))
        doctype = None

    if doctype == ooxml.DOCTYPE_EXCEL:
        logger.debug('Process file as excel 2007+ (xlsx)')
        return process_xlsx(filepath)
    if doctype in (ooxml.DOCTYPE_EXCEL_XML, ooxml.DOCTYPE_EXCEL_XML2003):
        logger.debug('Process file as xml from excel 2003/2007+')
        return process_excel_xml(filepath)
    if doctype in (ooxml.DOCTYPE_WORD_XML, ooxml.DOCTYPE_WORD_XML2003):
        logger.debug('Process file as xml from word 2003/2007+')
        return process_docx(filepath)
    if doctype is None:
        logger.debug('Process file as csv')