How to use the medcat.utils.loggers.basic_logger function in medcat

To help you get started, we’ve selected a few medcat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CogStack / MedCAT / medcat / cat.py View on Github external
import json
import pandas
import spacy
from time import sleep
from functools import partial
from multiprocessing import Process, Manager, Queue, Pool, Array
from medcat.cdb import CDB
from medcat.spacy_cat import SpacyCat
from medcat.preprocessing.tokenizers import spacy_split_all
from medcat.utils.spelling import CustomSpellChecker
from medcat.utils.spacy_pipe import SpacyPipe
from medcat.preprocessing.cleaners import spacy_tag_punct
from medcat.utils.helpers import get_all_from_name, tkn_inds_from_doc
from medcat.utils.loggers import basic_logger

log = basic_logger("CAT")

# Check scispacy models
from medcat.utils.helpers import check_scispacy
check_scispacy()

class CAT(object):
    """ Annotate a dataset
    """
    SEPARATOR = ""
    NESTED_ENTITIES = os.getenv("NESTED_ENTITIES", 'false').lower() == 'true'
    KEEP_PUNCT = os.getenv("KEEP_PUNCT", ":|.").split("|")

    def __init__(self, cdb, vocab=None, skip_stopwords=True, meta_cats=[]):
        self.cdb = cdb
        self.vocab = vocab
        # Build the required spacy pipeline
github CogStack / MedCAT / medcat / cat.py View on Github external
from functools import partial
from multiprocessing import Process, Manager, Queue, Pool, Array
from medcat.cdb import CDB
from medcat.spacy_cat import SpacyCat
from medcat.preprocessing.tokenizers import spacy_split_all
from medcat.utils.spelling import CustomSpellChecker
from medcat.utils.spacy_pipe import SpacyPipe
from medcat.preprocessing.cleaners import spacy_tag_punct
from medcat.utils.helpers import get_all_from_name, tkn_inds_from_doc
from medcat.utils.loggers import basic_logger
from medcat.utils.data_utils import make_mc_train_test
import time
import sys, traceback
from tqdm.autonotebook import tqdm

log = basic_logger("CAT")

class CAT(object):
    r'''
    The main MedCAT class used to annotate documents, it is built on top of spaCy
    and works as a spaCy pipline. Creates an instance of a spaCy pipline that can
    be used as a spacy nlp model.

    Args:
        cdb (medcat.cdb.CDB):
            The concept database that will be used for NER+L
        vocab (medcat.utils.vocab.Vocab, optional):
            Vocabulary used for vector embeddings and spelling. Default: None
        skip_stopwords (bool):
            If True the stopwords will be ignored and not detected in the pipeline.
            Default: True
        meta_cats (list of medcat.meta_cat.MetaCAT, optional):
github CogStack / MedCAT / medcat / cdb.py View on Github external
""" Representation class for CDB data
"""
import pickle
import numpy as np
from scipy.sparse import dok_matrix
#from gensim.matutils import unitvec
from medcat.utils.matutils import unitvec, sigmoid
from medcat.utils.attr_dict import AttrDict
from medcat.utils.loggers import basic_logger
import os
import pandas as pd

log = basic_logger("cdb")
class CDB(object):
    """ Holds all the CDB data required for annotation
    """
    MAX_COO_DICT_SIZE = int(os.getenv('MAX_COO_DICT_SIZE', 10000000))
    MIN_COO_COUNT = int(os.getenv('MIN_COO_COUNT', 100))

    def __init__(self):
        self.index2cui = [] # A list containing all CUIs 
        self.cui2index = {} # Map from cui to index in the index2cui list
        self.name2cui = {} # Converts a normalized concept name to a cui
        self.name2cnt = {} # Converts a normalized concept name to a count
        self.name_isunique = {} # Should this name be skipped
        self.name2original_name = {} # Holds the two versions of a name
        self.name2ntkns = {} # Number of tokens for this name
        self.name_isupper = {} # Checks was this name all upper case in cdb 
        self.cui2desc = {} # Map between a CUI and its cdb description
github CogStack / MedCAT / medcat / utils / ml_utils.py View on Github external
from sklearn.model_selection import train_test_split
import numpy as np
from medcat.utils.models import LSTM as MODEL
from sklearn.metrics import classification_report, f1_score, confusion_matrix, precision_score, recall_score
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

from medcat.utils.loggers import basic_logger
log = basic_logger("utils")


def get_batch(ind, batch_size, x, y, cpos, device):
    # Get the start/end index for this batch
    start = ind * batch_size
    end = (ind+1) * batch_size

    # Get the batch
    x_batch = x[start:end]
    y_batch = y[start:end]
    c_batch = cpos[start:end]

    # Return and move the batches to the right device
    return x_batch.to(device), y_batch.to(device), c_batch.to(device)
github CogStack / MedCAT / medcat / utils / make_vocab.py View on Github external
from medcat.utils.vocab import Vocab
import numpy as np
import pandas
from medcat.preprocessing.tokenizers import spacy_split_all
from medcat.preprocessing.cleaners import spacy_tag_punct, clean_name, clean_def
from medcat.utils.spacy_pipe import SpacyPipe
from functools import partial
from medcat.utils.spelling import CustomSpellChecker
from gensim.models import Word2Vec
from medcat.preprocessing.iterators import SimpleIter
from medcat.utils.loggers import basic_logger

log = basic_logger("CAT")

class MakeVocab(object):
    def __init__(self, cdb, vocab=None, word_tokenizer=None):
        self.cdb = cdb

        self.w2v = None
        if vocab is not None:
            self.vocab = vocab
        else:
            self.vocab = Vocab()

        # Build the required spacy pipeline
        self.nlp = SpacyPipe(spacy_split_all, disable=['ner', 'parser', 'vectors', 'textcat'])

        # Get the tokenizer
        if word_tokenizer is not None:
github CogStack / MedCAT / medcat / spacy_cat.py View on Github external
from spacy.tokens import Span
import numpy as np
import operator
from medcat.utils.loggers import basic_logger
from medcat.utils.matutils import unitvec
from medcat.utils.ml_utils import load_hf_tokenizer, build_vocab_from_hf
from spacy.lang.en.stop_words import STOP_WORDS
import os
log = basic_logger("spacycat")

# IF UMLS it includes specific rules that are only good for the Full UMLS version
if os.getenv('TYPE', 'other').lower() == 'umls':
    log.info("Using cat_ann for annotations")
    from medcat.cat_ann import CatAnn
else:
    log.info("Using basic_cat_ann for annotations")
    from medcat.basic_cat_ann import CatAnn



class SpacyCat(object):
    """ A Spacy pipe module, can be easily added into a spacey pipline

    cdb:  the cdb object of class cat.cdb representing the concepts
    vocab:  vocab object of class cat.utils.vocab with vector representations