How to use the wasabi.Printer function in wasabi

To help you get started, we’ve selected a few wasabi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github abhinavkashyap / sciwing / sciwing / utils / common.py View on Github external
def extract_zip(filename: str, destination_dir: str):
    """ Extracts a zipped file

    Parameters
    ----------
    filename : str
        The zipped filename
    destination_dir : str
        The directory where the zipped will be placed

    """
    msg_printer = Printer()
    try:
        with msg_printer.loading(f"Unzipping file {filename} to {destination_dir}"):
            stdout.flush()
            with zipfile.ZipFile(filename, "r") as z:
                z.extractall(destination_dir)

        msg_printer.good(f"Finished extraction {filename} to {destination_dir}")
    except zipfile.BadZipFile:
        msg_printer.fail(f"Couldnot extract {filename} to {destination_dir}")
github abhinavkashyap / sciwing / sciwing / metrics / token_cls_accuracy.py View on Github external
def __init__(
        self,
        datasets_manager: DatasetsManager = None,
        predicted_tags_namespace_prefix="predicted_tags",
    ):
        super(TokenClassificationAccuracy, self).__init__(
            datasets_manager=datasets_manager
        )
        self.datasets_manager = datasets_manager
        self.label_namespaces = datasets_manager.label_namespaces
        self.predicted_tags_namespace_prefix = predicted_tags_namespace_prefix
        self.msg_printer = wasabi.Printer()
        self.classification_metrics_utils = ClassificationMetricsUtils()

        # a mapping between namespace and tp_counters for every class
        self.tp_counter: Dict[str, Dict[str, Any]] = defaultdict(dict)
        self.fp_counter: Dict[str, Dict[str, Any]] = defaultdict(dict)
        self.fn_counter: Dict[str, Dict[str, Any]] = defaultdict(dict)
        self.tn_counter: Dict[str, Dict[str, Any]] = defaultdict(dict)
github abhinavkashyap / sciwing / sciwing / infer / seq_label_inference / seq_label_inference.py View on Github external
model: nn.Module,
        model_filepath: str,
        datasets_manager: DatasetsManager,
        device: Optional[Union[str, torch.device]] = torch.device("cpu"),
        predicted_tags_namespace_prefix: str = "predicted_tags",
    ):
        super(SequenceLabellingInference, self).__init__(
            model=model,
            model_filepath=model_filepath,
            datasets_manager=datasets_manager,
            device=device,
        )

        self.predicted_tags_namespace_prefix = predicted_tags_namespace_prefix
        self.labels_namespaces = self.datasets_manager.label_namespaces
        self.msg_printer = wasabi.Printer()
        self.metrics_calculator = TokenClassificationAccuracy(
            datasets_manager=datasets_manager
        )

        # The key is the namespace of different labels
        # The value is a dictioary of label->idx
        self.label2idx_mapping: Dict[str, Dict[str, Any]] = {}
        self.idx2label_mapping: Dict[str, Dict[str, Any]] = {}
        for namespace in self.labels_namespaces:
            self.label2idx_mapping[
                namespace
            ] = self.datasets_manager.get_label_idx_mapping(label_namespace=namespace)
            self.idx2label_mapping[
                namespace
            ] = self.datasets_manager.get_idx_label_mapping(label_namespace=namespace)
github abhinavkashyap / sciwing / sciwing / vocab / char_emb_loader.py View on Github external
def __init__(
        self,
        token2idx: Dict,
        embedding_type: Union[str, None] = None,
        embedding_dimension: Union[str, None] = None,
    ):
        self.token2idx = token2idx
        self.embedding_type = embedding_type
        self.embedding_dimension = embedding_dimension
        self.msg_printer = wasabi.Printer()
        self.vocab_embedding = self.load_embedding()
github abhinavkashyap / sciwing / sciwing / metrics / conll_2003_metrics.py View on Github external
def __init__(
        self,
        datasets_manager: DatasetsManager,
        predicted_tags_namespace_prefix="predicted_tags",
        words_namespace: str = "tokens",
    ):
        super(ConLL2003Metrics, self).__init__(datasets_manager=datasets_manager)
        self.datasets_manager = datasets_manager
        self.label_namespaces = datasets_manager.label_namespaces
        self.words_namespace = words_namespace
        self.namespace_to_vocab = self.datasets_manager.namespace_to_vocab
        self.predicted_tags_namespace_prefix = predicted_tags_namespace_prefix
        self.msg_printer = wasabi.Printer()
        self.acc_counter: Dict[str, List[float]] = defaultdict(list)
        self.precision_counter: Dict[str, List[float]] = defaultdict(list)
        self.recall_counter: Dict[str, List[float]] = defaultdict(list)
        self.fmeasure_counter: Dict[str, List[float]] = defaultdict(list)
github abhinavkashyap / sciwing / sciwing / datasets / classification / generic_sect_dataset.py View on Github external
"""
        self.classname2idx = self.get_classname2idx()
        self.idx2classname = {
            idx: classname for classname, idx in self.classname2idx.items()
        }
        self.filename = filename
        self.train_size = train_size
        self.test_size = test_size
        self.validation_size = validation_size
        self.dataset_type = dataset_type
        self.debug = debug
        self.debug_dataset_proportion = debug_dataset_proportion
        self.max_instance_length = max_instance_length
        self.lines, self.labels = self.get_lines_labels(filename=self.filename)

        self.msg_printer = wasabi.Printer()
github abhinavkashyap / sciwing / examples / conll_2003 / conll_seq_crf_elmo.py View on Github external
)
    parser.add_argument(
        "--sample_proportion", help="Sample proportion of the dataset", type=float
    )

    parser.add_argument(
        "--num_layers", help="Number of layers in rnn2seq encoder", type=int
    )

    parser.add_argument(
        "--add_projection_layer",
        help="Add projection layer in rnn2seq encoder",
        action="store_true",
    )
    args = parser.parse_args()
    msg_printer = wasabi.Printer()

    data_dir = pathlib.Path(DATA_DIR)
    train_filename = data_dir.joinpath("conll_bioul.train")
    dev_filename = data_dir.joinpath("conll_bioul.dev")
    test_filename = data_dir.joinpath("conll_bioul.test")

    instance_preprocessing = InstancePreprocessing()

    data_manager = CoNLLDatasetManager(
        train_filename=train_filename,
        dev_filename=dev_filename,
        test_filename=test_filename,
        column_names=["POS", "DEP", "NER"],
        train_only="ner",
        namespace_vocab_options={
            "tokens": {"preprocessing_pipeline": [instance_preprocessing.lowercase]}
github abhinavkashyap / sciwing / sciwing / utils / science_ie_data_utils.py View on Github external
utilities. For more information on the dataset you can refer to https://scienceie.github.io/

        Parameters
        ----------
        folderpath : pathlib.Path
            The path where the ScienceIEDataset is stored
        ignore_warnings : bool
            If True, then all the warnings generated by this class for inconsistencies in the
            data is ignored

        """
        self.folderpath = folderpath
        self.ignore_warning = ignore_warnings
        self.entity_types = ["Process", "Material", "Task"]
        self.file_ids = self.get_file_ids()
        self.msg_printer = wasabi.Printer()
        self.nlp = spacy.load("en_core_web_sm")
        self._conll_col_sep = " "
github explosion / spaCy / spacy / analysis.py View on Github external
def print_summary(nlp, pretty=True, no_print=False):
    """Print a formatted summary for the current nlp object's pipeline. Shows
    a table with the pipeline components and why they assign and require, as
    well as any problems if available.

    nlp (Language): The nlp object.
    pretty (bool): Pretty-print the results (color etc).
    no_print (bool): Don't print anything, just return the data.
    RETURNS (dict): A dict with "overview" and "problems".
    """
    msg = Printer(pretty=pretty, no_print=no_print)
    overview = []
    problems = {}
    for i, (name, pipe) in enumerate(nlp.pipeline):
        requires = getattr(pipe, "requires", [])
        assigns = getattr(pipe, "assigns", [])
        retok = getattr(pipe, "retokenizes", False)
        overview.append((i, name, requires, assigns, retok))
        problems[name] = analyze_pipes(nlp.pipeline, name, pipe, i, warn=False)
    msg.divider("Pipeline Overview")
    header = ("#", "Component", "Requires", "Assigns", "Retokenizes")
    msg.table(overview, header=header, divider=True, multiline=True)
    n_problems = sum(len(p) for p in problems.values())
    if any(p for p in problems.values()):
        msg.divider("Problems ({})".format(n_problems))
        for name, problem in problems.items():
            if problem:
github explosion / spaCy / spacy / cli / converters / conll_ner2json.py View on Github external
Sample format:

    -DOCSTART- -X- O O

    I O
    like O
    London B-GPE
    and O
    New B-GPE
    York I-GPE
    City I-GPE
    . O

    """
    msg = Printer(no_print=no_print)
    doc_delimiter = "-DOCSTART- -X- O O"
    # check for existing delimiters, which should be preserved
    if "\n\n" in input_data and seg_sents:
        msg.warn(
            "Sentence boundaries found, automatic sentence segmentation with "
            "`-s` disabled."
        )
        seg_sents = False
    if doc_delimiter in input_data and n_sents:
        msg.warn(
            "Document delimiters found, automatic document segmentation with "
            "`-n` disabled."
        )
        n_sents = 0
    # do document segmentation with existing sentences
    if "\n\n" in input_data and doc_delimiter not in input_data and n_sents: