How to use the wasabi.msg.warn function in wasabi

To help you get started, we’ve selected a few wasabi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github explosion / spaCy / spacy / cli / train.py View on Github external
util.set_env_log(verbose)

                    progress = _get_progress(
                        i,
                        losses,
                        scorer.scores,
                        output_stats,
                        beam_width=beam_width if has_beam_widths else None,
                        cpu_wps=cpu_wps,
                        gpu_wps=gpu_wps,
                    )
                    if i == 0 and "textcat" in pipeline:
                        textcats_per_cat = scorer.scores.get("textcats_per_cat", {})
                        for cat, cat_score in textcats_per_cat.items():
                            if cat_score.get("roc_auc_score", 0) < 0:
                                msg.warn(
                                    "Textcat ROC AUC score is undefined due to "
                                    "only one value in label '{}'.".format(cat)
                                )
                    msg.row(progress, **row_settings)
                # Early stopping
                if n_early_stopping is not None:
                    current_score = _score_for_model(meta)
                    if current_score < best_score:
                        iter_since_best += 1
                    else:
                        iter_since_best = 0
                        best_score = current_score
                    if iter_since_best >= n_early_stopping:
                        msg.text(
                            "Early stopping, best iteration "
                            "is: {}".format(i - iter_since_best)
github explosion / sense2vec / sense2vec / prodigy_recipes.py View on Github external
def eval_dataset(set_id):
        DB = connect()
        data = DB.get_dataset(set_id)
        accepted = [eg for eg in data if eg["answer"] == "accept" and eg.get("accept")]
        rejected = [eg for eg in data if eg["answer"] == "reject"]
        ignored = [eg for eg in data if eg["answer"] == "ignore"]
        if not accepted and not rejected:
            msg.warn("No annotations collected", exits=1)
        total_count = 0
        agree_count = 0
        for eg in accepted:
            total_count += len(eg.get("options", []))
            agree_count += len(eg.get("accept", []))
        msg.info(f"Evaluating data from '{set_id}'")
        msg.text(f"You rejected {len(rejected)} and ignored {len(ignored)} pair(s)")
        pc = agree_count / total_count
        text = f"You agreed {agree_count} / {total_count} times ({pc:.0%})"
        if pc > 0.5:
            msg.good(text)
        else:
            msg.fail(text)
github explosion / sense2vec / scripts / 06_precompute_cache.py View on Github external
fancy_index = []
    for i, n in enumerate(a.shape):
        if i == axis:
            fancy_index.append(indices)
        else:
            ind_shape = (1,) * i + (-1,) + (1,) * (ndim - i - 1)
            fancy_index.append(cupy.arange(n).reshape(ind_shape))

    return a[fancy_index]


if __name__ == "__main__":
    try:
        plac.call(main)
    except KeyboardInterrupt:
        msg.warn("Cancelled.")
github explosion / sense2vec / sense2vec / prodigy_recipes.py View on Github external
def eval_dataset(set_id):
        """Output summary about user agreement with the model."""
        DB = connect()
        data = DB.get_dataset(set_id)
        accepted = [eg for eg in data if eg["answer"] == "accept" and eg.get("accept")]
        rejected = [eg for eg in data if eg["answer"] == "reject"]
        if not accepted and not rejected:
            msg.warn("No annotations collected", exits=1)
        high_conf = 0.8
        agree_count = 0
        disagree_high_conf = len([e for e in rejected if e["confidence"] > high_conf])
        for eg in accepted:
            choice = eg["accept"][0]
            score_choice = [o["score"] for o in eg["options"] if o["id"] == choice][0]
            score_other = [o["score"] for o in eg["options"] if o["id"] != choice][0]
            if score_choice > score_other:
                agree_count += 1
            elif eg["confidence"] > high_conf:
                disagree_high_conf += 1
        pc = agree_count / (len(accepted) + len(rejected))
        text = f"You agreed {agree_count} / {len(data)} times ({pc:.0%})"
        msg.info(f"Evaluating data from '{set_id}'")
        if pc > 0.5:
            msg.good(text)
github explosion / spaCy / spacy / cli / init_model.py View on Github external
vectors_name=None,
    model_name=None,
):
    """
    Create a new model from raw data, like word frequencies, Brown clusters
    and word vectors. If vectors are provided in Word2Vec format, they can
    be either a .txt or zipped as a .zip or .tar.gz.
    """
    if jsonl_loc is not None:
        if freqs_loc is not None or clusters_loc is not None:
            settings = ["-j"]
            if freqs_loc:
                settings.append("-f")
            if clusters_loc:
                settings.append("-c")
            msg.warn(
                "Incompatible arguments",
                "The -f and -c arguments are deprecated, and not compatible "
                "with the -j argument, which should specify the same "
                "information. Either merge the frequencies and clusters data "
                "into the JSONL-formatted file (recommended), or use only the "
                "-f and -c files, without the other lexical attributes.",
            )
        jsonl_loc = ensure_path(jsonl_loc)
        lex_attrs = srsly.read_jsonl(jsonl_loc)
    else:
        clusters_loc = ensure_path(clusters_loc)
        freqs_loc = ensure_path(freqs_loc)
        if freqs_loc is not None and not freqs_loc.exists():
            msg.fail("Can't find words frequencies file", freqs_loc, exits=1)
        lex_attrs = read_attrs_from_deprecated(freqs_loc, clusters_loc)
github explosion / sense2vec / scripts / 04_fasttext_train_vectors.py View on Github external
https://github.com/facebookresearch/fastText

    Note that this script will call into fastText and expects you to pass in the
    built fasttext binary. The command will also be printed if you want to run
    it separately.
    """

    output_path = Path(out_dir)
    if not output_path.exists():
        output_path.mkdir(parents=True)
        msg.good(f"Created output directory {out_dir}")

    if fasttext_filepath:
        msg.info("Loading fastText model vectors from .bin file")
        if in_dir:
            msg.warn(f"Warning: Providing a fastText filepath overrides fastText vector training")
        fasttext_filepath = Path(fasttext_filepath)
        if not fasttext_filepath.exists() or not fasttext_filepath.is_file() or not (fasttext_filepath.suffix == '.bin'):
            msg.fail("Error: fasttext_filepath expects a fastText model .bin file", exits=1)
        fasttext_model = fasttext.load_model(str(fasttext_filepath))
        msg.good("Successfully loaded fastText model")
    elif in_dir:
        msg.info("Training fastText model vectors")
        input_path = Path(in_dir)
        # Check to see if fasttext_filepath exists
        if not input_path.exists() or not input_path.is_dir():
            msg.fail("Not a valid input directory", in_dir, exits=1)
        tmp_path = input_path / "s2v_input.tmp"
        input_files = [p for p in input_path.iterdir() if p.suffix == ".s2v"]
        if not input_files:
            msg.fail("Input directory contains no .s2v files", in_dir, exits=1)
        # fastText expects only one input file and only reads from disk and not
github explosion / spaCy / spacy / cli / train.py View on Github external
# Make sure all files and paths exists if they are needed
    train_path = util.ensure_path(train_path)
    dev_path = util.ensure_path(dev_path)
    meta_path = util.ensure_path(meta_path)
    output_path = util.ensure_path(output_path)
    if raw_text is not None:
        raw_text = list(srsly.read_jsonl(raw_text))
    if not train_path or not train_path.exists():
        msg.fail("Training data not found", train_path, exits=1)
    if not dev_path or not dev_path.exists():
        msg.fail("Development data not found", dev_path, exits=1)
    if meta_path is not None and not meta_path.exists():
        msg.fail("Can't find model meta.json", meta_path, exits=1)
    meta = srsly.read_json(meta_path) if meta_path else {}
    if output_path.exists() and [p for p in output_path.iterdir() if p.is_dir()]:
        msg.warn(
            "Output directory is not empty",
            "This can lead to unintended side effects when saving the model. "
            "Please use an empty directory or a different path instead. If "
            "the specified output path doesn't exist, the directory will be "
            "created for you.",
        )
    if not output_path.exists():
        output_path.mkdir()

    # Take dropout and batch size as generators of values -- dropout
    # starts high and decays sharply, to force the optimizer to explore.
    # Batch size starts at 1 and grows, so that we make updates quickly
    # at the beginning of training.
    dropout_rates = util.decaying(
        util.env_opt("dropout_from", 0.2),
        util.env_opt("dropout_to", 0.2),
github explosion / projects / textcat-docs-issues / scripts_spacy.py View on Github external
nlp = spacy.load(model)
    data, _ = format_data(srsly.read_jsonl(eval_path))
    sc = nlp.evaluate(data)
    result = [("F-Score", f"{sc.textcat_score:.3f}")]
    msg.table(result)


if __name__ == "__main__":
    opts = {"train": train_model, "evaluate": evaluate_model}
    cmd = sys.argv.pop(1)
    if cmd not in opts:
        msg.fail(f"Unknown command: {cmd}", f"Available: {', '.join(opts)}", exits=1)
    try:
        plac.call(opts[cmd])
    except KeyboardInterrupt:
        msg.warn("Stopped.", exits=1)
github explosion / sense2vec / sense2vec / prodigy_recipes.py View on Github external
def eval_dataset(set_id):
        DB = connect()
        data = DB.get_dataset(set_id)
        accepted = [eg for eg in data if eg["answer"] == "accept" and eg.get("accept")]
        rejected = [eg for eg in data if eg["answer"] == "reject"]
        ignored = [eg for eg in data if eg["answer"] == "ignore"]
        if not accepted and not rejected:
            msg.warn("No annotations collected", exits=1)
        counts = Counter()
        for eg in accepted:
            for model_id in eg["accept"]:
                counts[model_id] += 1
        preference, _ = counts.most_common(1)[0]
        ratio = f"{counts[preference]} / {sum(counts.values()) - counts[preference]}"
        msg.info(f"Evaluating data from '{set_id}'")
        msg.text(f"You rejected {len(rejected)} and ignored {len(ignored)} pair(s)")
        if counts["A"] == counts["B"]:
            msg.warn(f"No preference ({ratio})")
        else:
            pc = counts[preference] / sum(counts.values())
            msg.good(f"You preferred vectors {preference} with {ratio} ({pc:.0%})")
            msg.text(mapping[preference])