Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
links_to_save = []
t = datetime.now().isoformat()
print(" + [%s] %s urls to resolve" % (t, len(urls_to_clear)))
try:
for res in multithreaded_resolve(
urls_to_clear,
threads=min(50, batch_size),
throttle=0.2,
max_redirects=20,
insecure=True,
timeout=Timeout(connect=10, read=30),
follow_meta_refresh=True
):
source = res.url
last = res.stack[-1]
if res.error and type(res.error) != RedirectError and not issubclass(type(res.error), RedirectError):
print("ERROR on resolving %s: %s (last url: %s)" % (source, res.error, last.url), file=sys.stderr)
continue
if verbose:
print(" ", last.status, "(%s)" % last.type, ":", source, "->", last.url, file=sys.stderr)
if len(source) < 1024:
links_to_save.append({'_id': source, 'real': last.url})
alreadydone[source] = last.url
if source != last.url:
done += 1
except Exception as e:
print("CRASHED with %s (%s) while resolving batch, skipping it for now..." % (e, type(e)))
print("CRASHED with %s (%s) while resolving %s" % (e, type(e), urls_to_clear), file=sys.stderr)
skip += batch_size
print(" + [%s] STORING %s REDIRECTIONS IN MONGO" % (t, len(links_to_save)))
if links_to_save:
try:
done = 0
batch_urls = list(set([l for t in todo if not t.get("proper_links", []) for l in t.get('links', [])]))
alreadydone = {l["_id"]: l["real"] for l in linkscoll.find({"_id": {"$in": batch_urls}})}
urls_to_clear = []
for u in batch_urls:
if u in alreadydone:
continue
if u.startswith("https://twitter.com/") and "/status/" in u:
alreadydone[u] = u.replace("?s=19", "")
continue
urls_to_clear.append(u)
links_to_save = []
t = datetime.now().isoformat()
print(" + [%s] %s urls to resolve" % (t, len(urls_to_clear)))
try:
for res in multithreaded_resolve(
urls_to_clear,
threads=min(50, batch_size),
throttle=0.2,
max_redirects=20,
insecure=True,
timeout=Timeout(connect=10, read=30),
follow_meta_refresh=True
):
source = res.url
last = res.stack[-1]
if res.error and type(res.error) != RedirectError and not issubclass(type(res.error), RedirectError):
print("ERROR on resolving %s: %s (last url: %s)" % (source, res.error, last.url), file=sys.stderr)
continue
if verbose:
print(" ", last.status, "(%s)" % last.type, ":", source, "->", last.url, file=sys.stderr)
if len(source) < 1024:
data.add_noise('y', dist=noise_type, size=dim_l, **noise_parameters)
Encoder, encoder_args = update_encoder_args(x_shape, model_type=encoder_type,
encoder_args=encoder_args)
Decoder, decoder_args = update_decoder_args(x_shape, model_type=decoder_type,
decoder_args=decoder_args)
build_mine_discriminator(models, x_shape, dim_l, Encoder, key='mine_discriminator',
**encoder_args)
build_noise_discriminator(models, dim_l, key='noise_discriminator')
build_encoder(models, x_shape, dim_l, Encoder, **encoder_args)
build_extra_networks(models, x_shape, dim_l, dim_l, Decoder, **decoder_args)
TRAIN_ROUTINES = dict(mine_discriminator=mine_discriminator_routine,
noise_discriminator=noise_discriminator_routine,
encoder=encoder_routine, nets=network_routine)
DEFAULT_CONFIG = dict(data=dict(batch_size=dict(train=64, test=640), duplicate=2))