How to use the nemo.utils.exp_logging.get_logger function in NEMO

To help you get started, we’ve selected a few NEMO examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / utils / callbacks / ner.py View on Github external
# Copyright (c) 2019 NVIDIA Corporation
__all__ = ['eval_iter_callback', 'eval_epochs_done_callback']

from nemo.utils.exp_logging import get_logger


logger = get_logger('')


def eval_iter_callback(tensors, global_vars, eval_data_layer, tag_ids):
    if "correct_tags" not in global_vars.keys():
        global_vars["correct_tags"] = 0
    if "token_count" not in global_vars.keys():
        global_vars["token_count"] = 0
    if "correct_chunks" not in global_vars.keys():
        global_vars["correct_chunks"] = 0
    if "predicted_chunks" not in global_vars.keys():
        global_vars["predicted_chunks"] = 0
    if "total_chunks" not in global_vars.keys():
        global_vars["total_chunks"] = 0
    if "lines" not in global_vars.keys():
        global_vars["lines"] = []
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / utils / callbacks / joint_intent_slot.py View on Github external
__all__ = ['eval_iter_callback', 'eval_epochs_done_callback']

import os
import random
import time

import matplotlib
from matplotlib import pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

from nemo.utils.exp_logging import get_logger
matplotlib.use("TkAgg")


logger = get_logger('')


def tensor2list(tensor):
    return tensor.detach().cpu().tolist()


def eval_iter_callback(tensors,
                       global_vars,
                       eval_data_layer):
    if "all_intent_preds" not in global_vars.keys():
        global_vars["all_intent_preds"] = []
    if "all_intent_labels" not in global_vars.keys():
        global_vars["all_intent_labels"] = []
    if "all_slot_preds" not in global_vars.keys():
        global_vars["all_slot_preds"] = []
    if "all_slot_labels" not in global_vars.keys():
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / data / datasets / punctuation_capitalization.py View on Github external
"""

import itertools
import os
import pickle
import random

import numpy as np
from torch.utils.data import Dataset

from nemo.utils.exp_logging import get_logger

from . import utils


logger = get_logger('')


def get_features(queries,
                 max_seq_length,
                 tokenizer,
                 punct_label_ids=None,
                 capit_label_ids=None,
                 pad_label='O',
                 punct_labels_lines=None,
                 capit_labels_lines=None,
                 ignore_extra_tokens=False,
                 ignore_start_end=False):
    """
    Args:
    queries (list of str): text sequences
    max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / utils / callbacks / glue.py View on Github external
Utility functions for GLUE tasks
Some transformer of this code were adapted from the HuggingFace library at
https://github.com/huggingface/transformers
"""
__all__ = ['eval_iter_callback', 'eval_epochs_done_callback']

import os
import random

import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics import matthews_corrcoef, f1_score

from nemo.utils.exp_logging import get_logger

logger = get_logger('')


def eval_iter_callback(tensors, global_vars):
    if "all_preds" not in global_vars.keys():
        global_vars["all_preds"] = []
    if "all_labels" not in global_vars.keys():
        global_vars["all_labels"] = []

    logits_lists = []
    preds_lists = []
    labels_lists = []

    for kv, v in tensors.items():
        # for GLUE classification tasks
        if 'logits' in kv:
            for v_tensor in v:
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / data / datasets / token_classification.py View on Github external
"""

import itertools
import os
import pickle
import random

import numpy as np
from torch.utils.data import Dataset

from nemo.utils.exp_logging import get_logger

from . import utils


logger = get_logger('')


def get_features(queries,
                 max_seq_length,
                 tokenizer,
                 pad_label='O',
                 raw_labels=None,
                 unique_labels=None,
                 ignore_extra_tokens=False,
                 ignore_start_end=False):
    """
    Args:
        queries (list of str):
        max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
        tokenizer (Tokenizer): such as NemoBertTokenizer
        pad_label (str): pad value use for labels.
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / data / datasets / utils.py View on Github external
import subprocess
import sys

import numpy as np
from sentencepiece import SentencePieceTrainer as SPT
from tqdm import tqdm

from nemo.utils.exp_logging import get_logger

from ...utils.nlp_utils import (get_vocab,
                                write_vocab,
                                write_vocab_in_order,
                                label2idx)


logger = get_logger('')
LOGGING_TMP = '{} dataset has already been processed and stored at {}'


def get_stats(lengths):
    lengths = np.asarray(lengths)
    logger.info(f'Min: {np.min(lengths)} | \
                 Max: {np.max(lengths)} | \
                 Mean: {np.mean(lengths)} | \
                 Median: {np.median(lengths)}')
    logger.info(f'75 percentile: {np.percentile(lengths, 75)}')
    logger.info(f'99 percentile: {np.percentile(lengths, 99)}')


def get_label_stats(labels, outfile='stats.tsv'):
    labels = Counter(labels)
    total = sum(labels.values())
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / utils / callbacks / punctuation_capitalization.py View on Github external
# Copyright (c) 2019 NVIDIA Corporation
__all__ = ['eval_iter_callback', 'eval_epochs_done_callback']

import random

import numpy as np
from sklearn.metrics import classification_report

from nemo_nlp.data.datasets.utils import list2str, tensor2list
from nemo_nlp.utils.nlp_utils import plot_confusion_matrix
from nemo.utils.exp_logging import get_logger


logger = get_logger('')


def eval_iter_callback(tensors, global_vars):
    if "punct_all_preds" not in global_vars.keys():
        global_vars["punct_all_preds"] = []
    if "punct_all_labels" not in global_vars.keys():
        global_vars["punct_all_labels"] = []
    if "capit_all_preds" not in global_vars.keys():
        global_vars["capit_all_preds"] = []
    if "capit_all_labels" not in global_vars.keys():
        global_vars["capit_all_labels"] = []
    if "all_subtokens_mask" not in global_vars.keys():
        global_vars["all_subtokens_mask"] = []

    all_subtokens_mask = []
    punct_all_logits, punct_all_labels = [], []
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / data / datasets / joint_intent_slot.py View on Github external
Some parts of this code were adapted from the HuggingFace library at
https://github.com/huggingface/pytorch-pretrained-BERT
"""

import itertools
import random

import numpy as np
from torch.utils.data import Dataset

from nemo.utils.exp_logging import get_logger

from . import utils


logger = get_logger('')


def get_features(queries,
                 max_seq_length,
                 tokenizer,
                 pad_label=128,
                 raw_slots=None,
                 ignore_extra_tokens=False,
                 ignore_start_end=False):
    all_subtokens = []
    all_loss_mask = []
    all_subtokens_mask = []
    all_segment_ids = []
    all_input_ids = []
    all_input_mask = []
    sent_lengths = []
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / data / datasets / punctuation.py View on Github external
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import collections
import numpy as np
import pickle
from torch.utils.data import Dataset

from nemo.utils.exp_logging import get_logger

logger = get_logger('')


class BertPunctuationDataset(Dataset):
    def __init__(self, input_file, max_seq_length, tokenizer):
        # Cache features and tag_ids
        data_dir = os.path.dirname(input_file)
        filename = os.path.basename(input_file)[:-4]
        features_pkl = os.path.join(data_dir, filename + "_features.pkl")
        tag_ids_pkl = os.path.join(data_dir, filename + "_tag_ids.pkl")

        if os.path.exists(features_pkl) and os.path.exists(tag_ids_pkl):
            # If input_file was already processed, load from pickle files
            self.features = pickle.load(open(features_pkl, 'rb'))
            self.tag_ids = pickle.load(open(tag_ids_pkl, 'rb'))
            logger.info(f'features restored from {features_pkl}')
            logger.info(f'tag_ids restored from {tag_ids_pkl}')
github NVIDIA / NeMo / collections / nemo_nlp / nemo_nlp / data / utils.py View on Github external
import os
import pickle

import numpy as np

from nemo.utils.exp_logging import get_logger

logger = get_logger('')


def dataset_to_ids(dataset, tokenizer, cache_ids=False, add_bos_eos=True):
    """
    Reads dataset from file line by line, tokenizes each line with tokenizer,
    and returns list of lists which corresponds to ids of tokenized strings.

    Args:
        dataset: path to dataset
        tokenizer: tokenizer to convert text into ids
        cache_ids: if True, ids are saved to disk as pickle file
            with similar name (e.g., data.txt --> data.txt.pkl)
        add_bos_eos: bool, whether to add <s> and </s> symbols (e.g., for NMT)
    Returns:
        ids: list of ids which correspond to tokenized strings of the dataset
    """