Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
https://archive.org/details/greekGovernmentgazette''')
required = parser.add_argument_group('required arguments')
optional = parser.add_argument_group('optional arguments')
# arguments
required.add_argument('-d', help='Input directory', required=True)
optional.add_argument('--w', help='Number of workers', type=int, default=1)
args = argparser.parse_args()
# pdfs listed recursively
pdfs = list_files(args.d, '.pdf', recursive=True)
# frozenset for faster lookup
# returns uploaded files
uploaded = frozenset([x['identifier'] for x in search_items(
'collection:greekgovernmentgazette')])
# pool for multiprocessing
pool = multiprocessing.Pool(args.w)
pool.map(ia_upload, pdfs)
def search(search_term):
dictset = []
for item in search_items(search_term).iter_as_items():
files = [file_meta for file_meta in item.files
if file_meta["format"] == "VBR MP3"]
if (len(files) > 0):
# Make Collection
item_datetime = None
if 'date' in item.metadata:
item_date = parse(item.metadata['date'])
item_datetime = datetime.combine(item_date,
datetime.min.time())
c = Collection()
c.id = item.identifier
c.source = "{}{}".format(DETAILS_URL,
item.identifier)
c.description = None
import os
import time
import sys
import internetarchive as ia
from internetarchive.session import ArchiveSession
from internetarchive import get_item
from internetarchive import download
ident = 'podcasts'
destifolder = 'iapodcasts'
search = ia.search_items('collection:%s' % ident)
current = [f for f in os.listdir(destifolder)]
num = 0
for result in search: #for all items in a collection
num = num + 1 #item count
itemid = result['identifier']
print('Downloading: #' + str(num) + '\t' + itemid)
if itemid not in current:
try:
download(itemid, destdir=destifolder, retries=5, glob_pattern=['*.ogg', '*.mp3', '*.wav', '*.flv'])
print('\t\t Download success.')
except Exception as e:
print("Error Occurred downloading () = {}".format(itemid, e) )
print('Pausing for 20 minutes')
#time.sleep(1200)
def search(search_term):
dictset = []
for item in search_items(search_term).iter_as_items():
files = [file_meta for file_meta in item.files
if file_meta["format"] == "VBR MP3"]
if (len(files) > 0):
# Make Collection
item_datetime = None
if 'date' in item.metadata:
item_date = parse(item.metadata['date'])
item_datetime = datetime.combine(item_date, datetime.min.time())
c = Collection()
c.id = item.identifier
c.source = "{}{}".format(DETAILS_URL,
item.identifier)
c.description = None # item.metadata['description']