How to use the extractcode.archive function in extractcode

To help you get started, we’ve selected a few extractcode examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nexB / scancode-toolkit / tests / extractcode / test_archive.py View on Github external
def test_get_extractors(self):
        test_data = [
            ('archive/zip/basic.zip', [archive.extract_zip]),
            ('archive/rar/basic.rar', [archive.extract_rar]),
            ('archive/deb/adduser_3.112ubuntu1_all.deb', [archive.extract_ar]),
            ('archive/cpio/elfinfo-1.0-1.fc9.src.cpio', [archive.extract_cpio]),
            ('archive/rpm/elfinfo-1.0-1.fc9.src.rpm', [archive.extract_rpm, archive.extract_cpio]),
            ('archive/gzip/file_4.26-1.diff.gz', [archive.uncompress_gzip]),
            ('archive/ar/liby.a', [archive.extract_ar]),
            ('archive/bz2/single_file_not_tarred.bz2', [archive.uncompress_bzip2]),
            ('archive/tar/tarred.tar', [archive.extract_tar]),
            ('archive/tbz/tarred_bzipped.bz', [archive.uncompress_bzip2]),
            ('archive/tbz/tarred_bzipped.tar.bz2', [archive.extract_tar]),
            ('archive/tbz/tarred_bzipped.tbz', [archive.extract_tar]),
            ('archive/tgz/tarred_gzipped.gz', [archive.uncompress_gzip]),
            ('archive/tgz/tarred_gzipped.tar.gz', [archive.extract_tar]),
            ('archive/tgz/tarred_gzipped.tgz', [archive.extract_tar]),
            ('archive/7z/z.7z', [archive.extract_7z]),
            ('archive/Z/tr2tex.Z', [archive.extract_Z, ]),
github nexB / scancode-toolkit / tests / extractcode / test_archive.py View on Github external
def test_get_extractors(self):
        test_data = [
            ('archive/zip/basic.zip', [archive.extract_zip]),
            ('archive/rar/basic.rar', [archive.extract_rar]),
            ('archive/deb/adduser_3.112ubuntu1_all.deb', [archive.extract_ar]),
            ('archive/cpio/elfinfo-1.0-1.fc9.src.cpio', [archive.extract_cpio]),
            ('archive/rpm/elfinfo-1.0-1.fc9.src.rpm', [archive.extract_rpm, archive.extract_cpio]),
            ('archive/gzip/file_4.26-1.diff.gz', [archive.uncompress_gzip]),
            ('archive/ar/liby.a', [archive.extract_ar]),
            ('archive/bz2/single_file_not_tarred.bz2', [archive.uncompress_bzip2]),
            ('archive/tar/tarred.tar', [archive.extract_tar]),
            ('archive/tbz/tarred_bzipped.bz', [archive.uncompress_bzip2]),
            ('archive/tbz/tarred_bzipped.tar.bz2', [archive.extract_tar]),
            ('archive/tbz/tarred_bzipped.tbz', [archive.extract_tar]),
            ('archive/tgz/tarred_gzipped.gz', [archive.uncompress_gzip]),
            ('archive/tgz/tarred_gzipped.tar.gz', [archive.extract_tar]),
            ('archive/tgz/tarred_gzipped.tgz', [archive.extract_tar]),
            ('archive/7z/z.7z', [archive.extract_7z]),
            ('archive/Z/tr2tex.Z', [archive.extract_Z, ]),
            ('archive/Z/tkWWW-0.11.tar.Z', [archive.extract_Z, archive.extract_tar]),
            ('archive/xar/xar-1.4.xar', [archive.extract_xarpkg]),
        ]

        for test_file, expected in test_data:
            test_loc = self.get_test_loc(test_file)
            extractors = archive.get_extractors(test_loc)
            assert expected == extractors
github nexB / scancode-toolkit / tests / extractcode / test_archive.py View on Github external
def test_get_extractor_cbz(self):
        test_file = self.get_test_loc('archive/cbz/t.cbz')
        result = archive.get_extractor(test_file)
        expected = archive.extract_zip
        assert expected == result
github nexB / scancode-toolkit / tests / extractcode / test_archive.py View on Github external
def test_uncompress_bzip2_with_trailing_data(self):
        test_file = self.get_test_loc('archive/bz2/single_file_trailing_data.bz2')
        test_dir = self.get_temp_dir()
        archive.uncompress_bzip2(test_file, test_dir)
        result = os.path.join(test_dir, 'single_file_trailing_data.bz2-extract')
        assert os.path.exists(result)
github nexB / scancode-toolkit / src / extractcode / extract.py View on Github external
if not recurse:
            if TRACE:
                drs = set(dirs)
            for d in dirs[:]:
                if extractcode.is_extraction_path(d):
                    dirs.remove(d)
            if TRACE:
                logger.debug('extract:walk: not recurse: removed dirs:' + repr(drs.symmetric_difference(set(dirs))))
        for f in files:
            loc = join(top, f)
            if not recurse and extractcode.is_extraction_path(loc):
                if TRACE:
                    logger.debug('extract:walk not recurse: skipped  file: %(loc)r' % locals())
                continue

            if not archive.should_extract(loc, kinds):
                if TRACE:
                    logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals())
                continue

            target = join(abspath(top), extractcode.get_extraction_path(loc))
            if TRACE:
                logger.debug('extract:target: %(target)r' % locals())
            for xevent in extract_file(loc, target, kinds):
                if TRACE:
                    logger.debug('extract:walk:extraction event: %(xevent)r' % locals())
                yield xevent

            if recurse:
                if TRACE:
                    logger.debug('extract:walk: recursing on target: %(target)r' % locals())
                for xevent in extract(target, kinds, recurse):
github nexB / scancode-toolkit / src / typecode / contenttype.py View on Github external
def is_archive(self):
        """
        Return True if the file is some kind of archive or compressed file.
        """
        if self._is_archive is not None:
            return self._is_archive
        self._is_archive = False

        from extractcode import archive

        ft = self.filetype_file.lower()
        can_extract = bool(archive.can_extract(self.location))
        if on_linux and py2:
            docx_ext = b'x'
        else:
            docx_ext = u'x'

        if self.is_text:
            self._is_archive = False

        elif self.filetype_file.lower().startswith('gem image data'):
            self._is_archive = False

        elif (self.is_compressed
            or 'archive' in ft
            or can_extract
            or self.is_package
            or self.is_filesystem