Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_push():
"""
Read WARC, manipulate content to ensure uniqueness, push to IPFS
WARC should result in two CDXJ entries with three space-limited fields
each: surt URI, datetime, JSON
JSON should contain AT LEAST locator, mime_type, and status fields
"""
newWARCPath = ipwbTest.createUniqueWARC()
# use ipwb indexer to push
cdxjList = indexer.indexFileAt(newWARCPath, quiet=True)
cdxj = '\n'.join(cdxjList)
firstEntry = cdxj.split('\n')[0]
firstNonMetadataEntry = ''
for line in cdxj.split('\n'):
if line[0] != '!':
firstNonMetadataEntry = line
break
assert checkCDXJFields(firstNonMetadataEntry)
firstEntryLastField = firstNonMetadataEntry.split(' ', 2)[2]
assert checkIPWBJSONFieldPresesence(firstEntryLastField)
def test_warc_ipwbIndexerBrokenWARCRecord():
pathOfBrokenWARC = os.path.join(os.path.dirname(__file__) +
'/../samples/warcs/broken.warc')
cdxjList = indexer.indexFileAt(pathOfBrokenWARC, quiet=True)
cdxj = '\n'.join(cdxjList)
assert ipwbTest.countCDXJEntries(cdxj) == 1
def test_cdxj_warc_responseRecordCount():
newWARCPath = ipwbTest.createUniqueWARC()
# use ipwb indexer to push
cdxjList = indexer.indexFileAt(newWARCPath, quiet=True)
cdxj = '\n'.join(cdxjList)
assert ipwbTest.countCDXJEntries(cdxj) == 2
def startReplay(warcFilename):
global p
pathOfWARC = os.path.join(os.path.dirname(__file__) +
'/../samples/warcs/' + warcFilename)
tempFilePath = tempfile.gettempdir() + '/' + ''.join(random.sample(
string.ascii_uppercase + string.digits * 6, 12)) + '.cdxj'
open(tempFilePath, 'a').close() # Create placeholder file for replay
p = Process(target=replay.start, args=[tempFilePath])
p.start()
sleep(5)
cdxjList = indexer.indexFileAt(pathOfWARC, quiet=True)
cdxj = '\n'.join(cdxjList)
with open(tempFilePath, 'w') as f:
f.write(cdxj)
def checkArgs_index(args):
if not ipwbUtil.isDaemonAlive():
sys.exit()
encKey = None
compressionLevel = None
if args.e:
encKey = ''
if args.c:
compressionLevel = 6 # Magic 6, TA-DA!
indexer.indexFileAt(args.warcPath, encKey, compressionLevel,
args.compressFirst, outfile=args.outfile,
debug=args.debug)
# if user does not select file, browser also
# submit an empty part without filename
if file.filename == '':
flash('No selected file')
return resp
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
warcPath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(warcPath)
# TODO: Check if semaphore lock exists, log it if so, wait for the lock
# to be released, and create a new lock
print('Indexing file from uploaded WARC at {0} to {1}'.format(
warcPath, app.cdxjFilePath))
indexer.indexFileAt(warcPath, outfile=app.cdxjFilePath)
print('Index updated at {0}'.format(app.cdxjFilePath))
app.cdxjFileContents = getIndexFileContents(app.cdxjFilePath)
# TODO: Release semaphore lock
resp.location = request.referrer
return resp