How to use the extractcode.extract.extract function in extractcode

To help you get started, we’ve selected a few extractcode examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
def test_extract_nested_arch_with_corrupted_compressed_should_extract_inner_archives_only_once(self):
        test_file = self.get_test_loc('extract/nested_not_compressed/nested_with_not_compressed_gz_file.tgz', copy=True)
        expected = [
            'nested_with_not_compressed_gz_file.tgz',
            'nested_with_not_compressed_gz_file.tgz-extract/top/file',
            'nested_with_not_compressed_gz_file.tgz-extract/top/notcompressed.gz'
        ]
        result = list(extract.extract(test_file, recurse=True))
        check_no_error(result)
        check_files(test_file, expected)
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
def test_extract_tar_with_broken_links(self):
        test_dir = self.get_test_loc('extract/broken_link', copy=True)
        result = list(extract.extract(test_dir, recurse=True))
        expected = (
            'broken-link.tar.bz2',
            'broken-link.tar.bz2-extract/openssl/test/Makefile',
        )
        check_files(test_dir, expected)
        expected_warning = [[], []]
        warns = [r.warnings for r in result]
        assert expected_warning == warns
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
def test_extract_directory_with_office_docs(self):
        test_dir = self.get_test_loc('extract/office_docs', copy=True)
        result = list(extract.extract(test_dir, kinds=(extractcode.docs,), recurse=True))
        expected = [
            'abc.docx',
            'abc.docx-extract/[Content_Types].xml',
            'abc.docx-extract/docProps/app.xml',
            'abc.docx-extract/docProps/core.xml',
            'abc.docx-extract/_rels/.rels',
            'abc.docx-extract/word/fontTable.xml',
            'abc.docx-extract/word/document.xml',
            'abc.docx-extract/word/settings.xml',
            'abc.docx-extract/word/numbering.xml',
            'abc.docx-extract/word/activeX/activeX1.xml',
            'abc.docx-extract/word/activeX/activeX2.xml',
            'abc.docx-extract/word/activeX/activeX3.xml',
            'abc.docx-extract/word/activeX/_rels/activeX1.xml.rels',
            'abc.docx-extract/word/activeX/_rels/activeX2.xml.rels',
            'abc.docx-extract/word/activeX/_rels/activeX3.xml.rels',
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
tarinfo.name = 'somefilename-%i.txt' % i
                tarinfo.uid = 123
                tarinfo.gid = 456
                tarinfo.uname = 'johndoe'
                tarinfo.gname = 'fake'
                tarinfo.type = tarfile.REGTYPE
                tarinfo.mode = 0 # this is the readonly part
                tarinfo.mtime = time.mktime(datetime.datetime.now().timetuple())
                file = io.StringIO()
                file.write(TEXT)
                file.seek(0)
                tarinfo.size = len(TEXT)
                tar.addfile(tarinfo, file)
            tar.close()
        """
        result = list(extract.extract(test_file, recurse=False))
        check_no_error(result)

        expected = (
            'somefilename-0.txt',
            'somefilename-1.txt',
        )
        test_dir = extractcode.get_extraction_path(test_file)
        check_files(test_dir, expected)
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
def test_extract_archive_non_nested(self):
        test_dir = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True)
        expected = (
            'a/b/a.txt',
            'a/b/b.txt',
            'a/c/c.txt',
        )
        result = extract.extract(test_dir, recurse=False)
        check_no_error(result)
        check_files(extractcode.get_extraction_path(test_dir), expected)

        result = extract.extract(test_dir, recurse=True)
        check_no_error(result)
        check_files(extractcode.get_extraction_path(test_dir), expected)
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
def test_extract_tar_gz_with_spaces_in_name(self):
        test_dir = self.get_test_loc('extract/space-tgz', copy=True)
        expected = (
            'with spaces in name.tar.gz',
            'with spaces in name.tar.gz-extract/a/b/a.txt',
            'with spaces in name.tar.gz-extract/a/b/b.txt',
            'with spaces in name.tar.gz-extract/a/c/c.txt',
        )
        result = list(extract.extract(test_dir, recurse=True))
        check_no_error(result)
        check_files(test_dir, expected)
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
def test_extract_zip_with_spaces_in_name(self):
        test_dir = self.get_test_loc('extract/space-zip', copy=True)
        expected = (
            'with spaces in name.zip',
            'with spaces in name.zip-extract/empty_dirs_and_small_files/small_files/small_file.txt'
        )
        result = list(extract.extract(test_dir, recurse=True))
        check_no_error(result)
        check_files(test_dir, expected)
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
'nested_tars.tar.gz-extract/b/a/a.txt',
            'nested_tars.tar.gz-extract/b/b/.svn/all-wcprops',
            'nested_tars.tar.gz-extract/b/b/.svn/entries',
            'nested_tars.tar.gz-extract/b/b/.svn/format',
            'nested_tars.tar.gz-extract/b/b/.svn/text-base/a.txt.svn-base',
            'nested_tars.tar.gz-extract/b/b/a.txt',
            'nested_tars.tar.gz-extract/b/c/.svn/all-wcprops',
            'nested_tars.tar.gz-extract/b/c/.svn/entries',
            'nested_tars.tar.gz-extract/b/c/.svn/format',
            'nested_tars.tar.gz-extract/b/c/.svn/prop-base/a.tar.gz.svn-base',
            'nested_tars.tar.gz-extract/b/c/.svn/text-base/a.tar.gz.svn-base',
            'nested_tars.tar.gz-extract/b/c/.svn/text-base/a.txt.svn-base',
            'nested_tars.tar.gz-extract/b/c/a.tar.gz',
            'nested_tars.tar.gz-extract/b/c/a.txt'
        ]
        result1 = list(extract.extract(test_dir, recurse=False))
        check_no_error(result1)
        check_files(test_dir, expected)
github nexB / scancode-toolkit / tests / extractcode / test_extract.py View on Github external
# The setup is a tad complex because we want to have a relative dir
        # to the base dir where we run tests from, ie the scancode-toolkit/ dir
        # To use relative paths, we use our tmp dir at the root of the code tree
        from os.path import dirname, join, abspath
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        import tempfile
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = self.get_test_loc('extract/relative_path/basic.zip')
        import shutil
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX
        result = list(extract.extract(test_src_file))
        expected = ['c/a/a.txt', 'c/b/a.txt', 'c/c/a.txt']
        check_files(test_tgt_dir, expected)
        for r in result:
            assert [] == r.warnings
            assert [] == r.errors
github nexB / scancode-toolkit / src / scancode / api.py View on Github external
def extract_archives(location, recurse=True):
    """
    Yield ExtractEvent while extracting archive(s) and compressed files at
    `location`. If `recurse` is True, extract nested archives-in-archives
    recursively.
    Archives and compressed files are extracted in a directory named
    "-extract" created in the same directory as the archive.
    Note: this API is returning an iterable and NOT a sequence.
    """
    from extractcode.extract import extract
    from extractcode import default_kinds
    for xevent in extract(location, kinds=default_kinds, recurse=recurse):
        yield xevent