Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def bst_webcoll_postprocess(recids=[]):
"""Parse recids to POST to remote server to alert that records are visible."""
if isinstance(recids, str):
recids = recids.split(",")
cache = get_redis()
cached_ids = cache.get("webcoll_pending_recids") or []
if cached_ids and not cached_ids == "[]":
if isinstance(cached_ids, str):
cached_ids = eval(cached_ids)
recids += cached_ids
if not CFG_WEBCOLL_POST_REQUEST_URL:
write_message("CFG_WEBCOLL_POST_REQUEST_URL is not set.")
return
if recids and len(recids) > 0 and not recids == "[]":
write_message("Going to POST callback to {0}: {1} (total: {2})".format(
CFG_WEBCOLL_POST_REQUEST_URL,
recids[:10],
len(recids))
)
cache.set("webcoll_pending_recids", recids)
session = requests.Session()
try:
addapter = requests.adapters.HTTPAdapter(max_retries=3)
session.mount(CFG_WEBCOLL_POST_REQUEST_URL, addapter)
response = session.post(CFG_WEBCOLL_POST_REQUEST_URL,
data={'recids': recids})
except Exception as err:
try:
submit_records_via_ftp(filepath)
filename = filepath.split('/')[-1]
body.append("\t%s (%s records)" % (filename, batch_size))
except:
_errors_detected.append(Exception(
"Failed to upload %s to FTP server" % filepath)
)
write_message("Failed to upload %s to FTP server" % filepath)
else:
body += ['\tFiles ready for upload:']
for filename in files_to_upload:
body.append("\t%s (%s records)" % (filename, batch_size))
if files_to_upload:
body = '\n'.join(body)
write_message(subject)
write_message(body)
if submit:
if submit_records_via_mail(subject, body, CFG_CONSYNHARVEST_EMAIL):
write_message("Mail sent to %r" % (CFG_CONSYNHARVEST_EMAIL,))
else:
write_message("ERROR: Cannot send mail.")
else:
write_message("No new files!")
"""
if not dates:
dates = (get_lastupdated(rank_method_code), '')
if dates[0] is None:
dates = ("0000-00-00 00:00:00", '')
query = """SELECT b.id FROM bibrec AS b WHERE b.modification_date >= %s"""
if dates[1]:
query += " and b.modification_date <= %s"
query += " ORDER BY b.id ASC"""
if dates[1]:
res = run_sql(query, (dates[0], dates[1]))
else:
res = run_sql(query, (dates[0], ))
alist = create_range_list([row[0] for row in res])
if not alist:
write_message("No new records added since last time method was run")
return alist
if _arxiv:
overwrite = True
else:
overwrite = not task_get_option('no-overwrite')
try:
record = extract_references_from_record(recid)
msg = "Extracted references for %s" % recid
safe_to_extract = True
if overwrite:
write_message("%s (overwrite)" % msg)
else:
write_message(msg)
if not check_record_for_refextract(recid):
write_message('Record not safe for re-extraction, skipping')
safe_to_extract = False
if safe_to_extract:
records.append(record)
# Create a RT ticket if necessary
if task_get_option('new') or task_get_option('create-ticket'):
create_ticket(recid, bibcatalog_system)
except FullTextNotAvailable:
write_message("No full text available for %s" % recid)
def _get_values_from_marc_tag(tag, recids):
'''Finds the value for a specific tag'''
digits = tag[0:2]
try:
intdigits = int(digits)
if intdigits < 0 or intdigits > 99:
raise ValueError
except ValueError:
# invalid tag value asked for
write_message('You have asked for an invalid tag value ' \
'[tag=%s; value=%s].' %(tag, intdigits), verbose=5)
return []
bx = "bib%sx" % digits
bibx = "bibrec_bib%sx" % digits
max_recid = get_max_recid()
if len(recids) == 1:
to_append = '= %s'
query_params = [recids.tolist()[0]]
elif len(recids) < max_recid/3:
# if we have less then one third of the records
# use IN
#This realy depends on how large the repository is..
to_append = 'IN %s'
query_params = [tuple(recids)]
tag = 'BAI'
elif tag == 'extid:INSPIREID':
tag = 'INSPIRE'
elif tag == 'extid:ORCID':
tag = 'ORCID'
elif tag == 'extid:KAKEN':
tag = 'KAKEN'
elif tag == 'uid':
tag = 'UID'
else:
continue
data = data.strip()
if personid not in ret:
ret[personid] = {'personid': personid}
if tag in ret[personid]:
write_message("ERROR: http://old.inspirehep.net/author/profile/{personid} has invalid IDs".format(personid=personid), stream=sys.stderr)
continue
ret[personid][tag] = data.upper()
if tag == 'BAI':
ret[personid]['ORIGINAL_BAI'] = data
return ret.values()
_errors_detected.append(e)
error_trace = traceback.format_exc()
# Some error happened, lets gracefully quit
results[full_xml_filepath] = (StatusCodes.CONVERSION_ERROR,
error_trace)
write_message('Error converting:'
' \n {0}'.format(error_trace))
continue
with open(new_full_xml_filepath, "w") as marcfile:
marcfile.write(converted_xml)
results[full_xml_filepath] = (StatusCodes.OK,
new_full_xml_filepath)
else:
results[full_xml_filepath] = (StatusCodes.DOCTYPE_WRONG,
doctype)
write_message("Doctype not interesting: {0}".format(doctype))
return results
write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr)
raise StandardError
for (url, format, description, comment, flags, timestamp) in urls:
assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, timestamp, pretend=pretend))
elif mode == 'replace_or_insert': # to be thought as correct_or_insert
for bibdoc in bibrecdocs.list_bibdocs():
if bibdoc.get_docname() == docname:
if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'):
if newname != docname:
try:
if not pretend:
bibdoc.change_name(newname)
## Let's refresh the list of bibdocs.
bibrecdocs.build_bibdoc_list()
except StandardError, e:
write_message(e, stream=sys.stderr)
raise
found_bibdoc = False
for bibdoc in bibrecdocs.list_bibdocs():
if bibdoc.get_docname() == newname:
found_bibdoc = True
if doctype == 'PURGE':
if not pretend:
bibdoc.purge()
elif doctype == 'DELETE':
if not pretend:
bibdoc.delete()
elif doctype == 'EXPUNGE':
if not pretend:
bibdoc.expunge()
elif doctype == 'FIX-ALL':
if not pretend:
term_docs = deserialize_via_marshal(hitlist)
if term_docs.has_key("Gi"):
Gi[t] = term_docs["Gi"][1]
elif len(term_docs) == 1:
Gi[t] = 1
else:
Fi = 0
Gi[t] = 1
for (j, tf) in term_docs.iteritems():
Fi += tf[0]
for (j, tf) in term_docs.iteritems():
if tf[0] != Fi:
Gi[t] = Gi[t] + ((float(tf[0]) / Fi) * math.log(float(tf[0]) / Fi) / math.log(2)) / math.log(N)
write_message("Phase 3: ......processed %s/%s terms" % ((i+5000>len(terms) and len(terms) or (i+5000)), len(terms)))
i += 5000
write_message("Phase 3: Finished getting approximate importance of all affected terms")
write_message("Phase 4: Calculating normalization value for all affected records and updating %sR" % table[:-1])
records = Nj.keys()
i = 0
while i < len(records):
#Calculating the normalization value for each document, and adding the Gi value to each term in each document.
docs_terms = get_from_reverse_index(records, i, (i + 5000), table)
for (j, termlist) in docs_terms:
doc_terms = deserialize_via_marshal(termlist)
try:
for (t, tf) in doc_terms.iteritems():
if Gi.has_key(t):
Nj[j] = Nj.get(j, 0) + math.pow(Gi[t] * (1 + math.log(tf[0])), 2)
Git = int(math.floor(Gi[t]*100))
if Git >= 0:
Git += 1
twodates = None
return twodates
else:
write_message("Dates have invalid format, not "
"'yyyy-mm-dd:yyyy-mm-dd'")
twodates = None
return twodates
## final check.. date1 must me smaller than date2
date1 = str(twodates[0]) + " 01:00:00"
date2 = str(twodates[1]) + " 01:00:00"
if compare_timestamps_with_tolerance(date1, date2) != -1:
write_message("First date must be before second date.")
twodates = None
return twodates
else:
write_message("Dates have invalid format, not "
"'yyyy-mm-dd:yyyy-mm-dd'")
twodates = None
else:
twodates = None
return twodates