How to use the internetarchive.get_item function in internetarchive

To help you get started, we’ve selected a few internetarchive examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github gdamdam / iagitup / iagitup / iagitup.py View on Github external
# here we set the ia identifier
    itemname = '%s-%s_-_%s' % ('github.com', repo_name, pushed_date)
    title = '%s' % (itemname)

    #initializing the main metadata
    meta = dict(mediatype=mediatype, creator=uploader_name, collection=collection, title=title, year=year, date=date, \
           subject=subject, uploaded_with=uploader, originalurl=originalurl, pushed_date=raw_pushed_date, description=description)

    # override default metadata with any supplemental metadata provided.
    if custom_meta != None:
        meta.update(custom_meta)

    try:
        # upload the item to the Internet Archive
        print(("Creating item on Internet Archive: %s") % meta['title'])
        item = internetarchive.get_item(itemname)
        # checking if the item already exists:
        if not item.exists:
            print(("Uploading file to the internet archive: %s") % bundle_file)
            item.upload(bundle_file, metadata=meta, retries=9001, request_kwargs=dict(timeout=9001), delete=False)
            # upload the item to the Internet Archive
            print("Uploading avatar...")
            item.upload('{}/cover.jpg'.format(gh_repo_folder), retries=9001, request_kwargs=dict(timeout=9001), delete=True)
        else:
            print("\nSTOP: The same repository seems already archived.")
            print(("---->>  Archived repository URL: \n \thttps://archive.org/details/%s") % itemname)
            print("---->>  Archived git bundle file: \n \thttps://archive.org/download/{0}/{1}.bundle \n\n".format(itemname,bundle_filename))
            shutil.rmtree(gh_repo_folder)
            exit(0)

    except Exception as e:
        print str(e)
github WikiTeam / wikiteam / uploader.py View on Github external
wiki = wiki.lower()
        prefix = dumpgenerator.domain2prefix(config={'api': wiki})

        wikiname = prefix.split('-')[0]
        dumps = []
        for dirname, dirnames, filenames in os.walk('.'):
            if dirname == '.':
                for f in filenames:
                    if f.startswith('%s-' % (wikiname)) and (f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z')):
                        dumps.append(f)
                break

        c = 0
        for dump in dumps:
            wikidate = dump.split('-')[1]
            item = get_item('wiki-' + wikiname)
            if dump in uploadeddumps:
                if config['prune-directories']:
                    rmline='rm -rf %s-%s-wikidump/' % (wikiname, wikidate)
                    # With -f the deletion might have happened before and we won't know
                    if not os.system(rmline):
                        print 'DELETED %s-%s-wikidump/' % (wikiname, wikidate)
                if config['prune-wikidump'] and dump.endswith('wikidump.7z'):
                        # Simplistic quick&dirty check for the presence of this file in the item
                        stdout, stderr = subprocess.Popen(["md5sum", dump], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
                        dumphash = re.sub(' +.+\n?', '', stdout)

                        if dumphash in map(lambda x: x['md5'], item.files):
                            log(wiki, dump, 'verified')
                            rmline='rm -rf %s' % dump
                            if not os.system(rmline):
                                print 'DELETED ' + dump
github pastpages / pastpages.org / archive / models.py View on Github external
saved_crop = self.save_crop()
            files.append(saved_crop)
        internetarchive.upload(
            self.ia_id,
            files,
            metadata=self.ia_metadata,
            access_key=settings.IA_ACCESS_KEY_ID,
            secret_key=settings.IA_SECRET_ACCESS_KEY,
            checksum=False,
            verbose=True
        )
        if self.has_image:
            os.remove(saved_image)
        if self.has_crop:
            os.remove(saved_crop)
        return internetarchive.get_item(self.ia_id)
github pastpages / pastpages.org / archive / models.py View on Github external
def get_ia_item(self):
        logger.debug("Getting IA item for {}".format(self.ia_id))
        config = dict(s3=dict(access=settings.IA_ACCESS_KEY_ID, secret=settings.IA_SECRET_ACCESS_KEY))
        return internetarchive.get_item(self.ia_id, config=config)
github rohit-dua / BUB / bot / worker.py View on Github external
def get_valid_identifier(self, primary = True):
        """Iterate over identifiers suffixed by _, until found."""
        if self.ia_identifier:
            ia_key = self.ia_identifier
        else:
            ia_key = "%s_%s_%s" %('bub', self.library, self.Id)
        item = ia.get_item(ia_key)
        if item.exists == False and primary == True:
            return item
        for index in range(2,10):
            item = ia.get_item("%s_%s" %(ia_key, index))
            if item.identifier == self.ia_identifier:
                continue
            if item.exists == False:
                return item
        item = ia.get_item(urandom(16).encode("hex"))
        return item
github ekansa / open-context-py / opencontext_py / apps / ocitems / mediafiles / internetarchive.py View on Github external
if isinstance(json_ld, dict):
            # cache the remote file locally to upload it
            item_id = self.id_prefix + '-' + json_ld['slug']
            if not isinstance(cache_dir, str):
                cache_dir = self.cache_file_dir
            dir_file = self.bin_file_obj.join_dir_filename(file_name,
                                                           cache_dir)
            if not os.path.exists(dir_file):
                print('Cannot find the cached file: ' +  dir_file + ' !')
            else:
                sleep(self.delay_before_request)
                print('Ready to upload: ' + file_name)
                # start an internet archive session
                s = self.start_ia_session()
                # get or make an item
                item = get_item(item_id,
                                archive_session=s,
                                debug=True)
                # now upload file
                try:
                    # sometimes the connect fails with an uncaught exception, so
                    # catch it here.
                    r = item.upload_file(dir_file,
                                         key=file_name,
                                         metadata=metadata)
                    # set the uri for the media item just uploaded
                    if r.status_code == requests.codes.ok or self.save_db:
                        ia_file_uri = self.make_ia_image_uri(item_id, file_name)
                except:
                    print('Upload failure for:'  + file_name + ' uuid: ' + man_obj.uuid)
                    ia_file_uri = None
        return ia_file_uri
github bibanon / tubeup / tubeup / TubeUp.py View on Github external
os.remove(description_file_path)

        # Delete empty annotations.xml file so it isn't uploaded
        annotations_file_path = videobasename + '.annotations.xml'
        if (os.path.exists(annotations_file_path) and
            (('annotations' in vid_meta and
             vid_meta['annotations'] in {'', EMPTY_ANNOTATION_FILE}) or
                check_is_file_empty(annotations_file_path))):
            os.remove(annotations_file_path)

        # Upload all files with videobase name: e.g. video.mp4,
        # video.info.json, video.srt, etc.
        files_to_upload = glob.glob(videobasename + '*')

        # Upload the item to the Internet Archive
        item = internetarchive.get_item(itemname)

        if custom_meta:
            metadata.update(custom_meta)

        # Parse internetarchive configuration file.
        parsed_ia_s3_config = parse_config_file(self.ia_config_path)[1]['s3']
        s3_access_key = parsed_ia_s3_config['access']
        s3_secret_key = parsed_ia_s3_config['secret']

        if None in {s3_access_key, s3_secret_key}:
            msg = ('`internetarchive` configuration file is not configured'
                   ' properly.')

            self.logger.error(msg)
            if self.verbose:
                print(msg)
github ArchiveTeam / terroroftinytown / terroroftinytown / release / iaupload.py View on Github external
def upload(self):
        item = internetarchive.get_item(self.identifier)
        metadata = dict(
            title=self.title,
            collection=self.collection,
            mediatype='software',
            subject=self.subject,
            description=self.description,
        )

        logger.info('Begin upload %s %s.', self.identifier, self.filenames)

        item.upload(self.filenames, metadata=metadata,
                    verify=True, verbose=True,
                    access_key=self.access_key, secret_key=self.secret_key,
                    retries=10)

        logger.info('Done upload.')
github rohit-dua / BUB / bot / worker.py View on Github external
    @ia_online(logger = log)
    def upload_to_IA(self, library, Id): 
        """Upload book to IA with appropriate metadata."""
        if self.ia_identifier == None:
            item = self.get_valid_identifier()
            self.ia_identifier = item.identifier
        else:
            item = ia.get_item(self.ia_identifier)
        metadata = dict(
            mediatype = "text",
            creator = self.author,
            title = re.sub(r"""[!#\n\r|^\\\"~()\[\]:\-/]""",'',self.title)[:330],
            publisher = self.publisher,
            description = re.sub(r"""[!#\n\r|^\\\"~()\[\]:\-/]""",'',self.description),
            source = self.infoLink,
            language = self.language,
            year = self.year,
            date = self.publishedDate,
            subject = "bub_upload",
            licenseurl = "http://creativecommons.org/publicdomain/mark/1.0/" if self.publicDomain == True else "",
            scanner = self.scanner,
            sponsor = self.sponser,
            uploader = "bub")
        metadata['google-id'] = self.Id if self.library == 'gb' else ""
github jhu-lcsr / costar_plan / costar_hyper / costar_block_stacking_internet_archive_download.py View on Github external
def main(args, root='root'):
    item = internetarchive.get_item('johns_hopkins_costar_dataset')

    path = os.path.expanduser(args['path'])

    dryrun = args['dryrun']

    r = item.download(
            destdir=path,  # The directory to download files to
            ignore_existing=True,  # Skip files that already exist locally
            checksum=True,  # Skip files based on checksum
            verbose=True,  # Print progress to stdout
            retries=100,  # Thenumber of times to retry on failed requests
            # Set to true to print headers to stdout, and exit without downloading
            dryrun=dryrun)

    print(r)