How to use minet - 3 common examples

To help you get started, we’ve selected a few minet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / gazouilloire / bin / complete_links_resolving_v2.py View on Github external
links_to_save = []
        t = datetime.now().isoformat()
        print("  + [%s] %s urls to resolve" % (t, len(urls_to_clear)))
        try:
            for res in multithreaded_resolve(
              urls_to_clear,
              threads=min(50, batch_size),
              throttle=0.2,
              max_redirects=20,
              insecure=True,
              timeout=Timeout(connect=10, read=30),
              follow_meta_refresh=True
            ):
                source = res.url
                last = res.stack[-1]
                if res.error and type(res.error) != RedirectError and not issubclass(type(res.error), RedirectError):
                    print("ERROR on resolving %s: %s (last url: %s)" % (source, res.error, last.url), file=sys.stderr)
                    continue
                if verbose:
                    print("          ", last.status, "(%s)" % last.type, ":", source, "->", last.url, file=sys.stderr)
                if len(source) < 1024:
                    links_to_save.append({'_id': source, 'real': last.url})
                alreadydone[source] = last.url
                if source != last.url:
                    done += 1
        except Exception as e:
            print("CRASHED with %s (%s) while resolving batch, skipping it for now..." % (e, type(e)))
            print("CRASHED with %s (%s) while resolving %s" % (e, type(e), urls_to_clear), file=sys.stderr)
            skip += batch_size
            print("  + [%s] STORING %s REDIRECTIONS IN MONGO" % (t, len(links_to_save)))
            if links_to_save:
                try:
github medialab / gazouilloire / bin / complete_links_resolving_v2.py View on Github external
done = 0
        batch_urls = list(set([l for t in todo if not t.get("proper_links", []) for l in t.get('links', [])]))
        alreadydone = {l["_id"]: l["real"] for l in linkscoll.find({"_id": {"$in": batch_urls}})}
        urls_to_clear = []
        for u in batch_urls:
            if u in alreadydone:
                continue
            if u.startswith("https://twitter.com/") and "/status/" in u:
                alreadydone[u] = u.replace("?s=19", "")
                continue
            urls_to_clear.append(u)
        links_to_save = []
        t = datetime.now().isoformat()
        print("  + [%s] %s urls to resolve" % (t, len(urls_to_clear)))
        try:
            for res in multithreaded_resolve(
              urls_to_clear,
              threads=min(50, batch_size),
              throttle=0.2,
              max_redirects=20,
              insecure=True,
              timeout=Timeout(connect=10, read=30),
              follow_meta_refresh=True
            ):
                source = res.url
                last = res.stack[-1]
                if res.error and type(res.error) != RedirectError and not issubclass(type(res.error), RedirectError):
                    print("ERROR on resolving %s: %s (last url: %s)" % (source, res.error, last.url), file=sys.stderr)
                    continue
                if verbose:
                    print("          ", last.status, "(%s)" % last.type, ":", source, "->", last.url, file=sys.stderr)
                if len(source) < 1024:
github rdevon / cortex / arch / shelf / adversarial_clustering.py View on Github external
data.add_noise('y', dist=noise_type, size=dim_l, **noise_parameters)

    Encoder, encoder_args = update_encoder_args(x_shape, model_type=encoder_type,
                                                encoder_args=encoder_args)
    Decoder, decoder_args = update_decoder_args(x_shape, model_type=decoder_type,
                                                decoder_args=decoder_args)
    build_mine_discriminator(models, x_shape, dim_l, Encoder, key='mine_discriminator',
                             **encoder_args)
    build_noise_discriminator(models, dim_l, key='noise_discriminator')
    build_encoder(models, x_shape, dim_l, Encoder, **encoder_args)
    build_extra_networks(models, x_shape, dim_l, dim_l, Decoder, **decoder_args)


TRAIN_ROUTINES = dict(mine_discriminator=mine_discriminator_routine,
                      noise_discriminator=noise_discriminator_routine,
                      encoder=encoder_routine, nets=network_routine)

DEFAULT_CONFIG = dict(data=dict(batch_size=dict(train=64, test=640), duplicate=2))

minet

A webmining CLI tool & library for python.

GPL-3.0
Latest version published 2 months ago

Package Health Score

78 / 100
Full package analysis