How to use the cltk.utils.cltk_logger.logger function in cltk

To help you get started, we’ve selected a few cltk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cltk / cltk / cltk / tag / lapos.py View on Github external
def tag_sentence(self, sentence):
        """Tag using Lapos model.

        TODO: Figure out how to pre-load model (loading is really slow). Or force users to bulk-convert files or strings.
        """
        fp_lapos = os.path.expanduser('~/cltk_data/multilingual/software/lapos')
        fp_model = os.path.expanduser('~/cltk_data/{0}/model/{1}_models_cltk/taggers/pos'.format(self.language, self.language))  # rel from Lapos dir
        try:
            lapos_command = 'cd {0} && echo "{1}" | ./lapos -t -m {2}'.format(fp_lapos, sentence, fp_model)
            p_out = subprocess.check_output(lapos_command,
                                            shell=True,
                                            stderr=subprocess.STDOUT,
                                            universal_newlines=True)
        except subprocess.CalledProcessError as cp_err:
            logger.error('Lapos call failed. Check installation.')
            logger.error(sentence)
            print(cp_err)
            raise

        # Parse output from Lapos
        # TODO: Make this cleaner/faster
        output_list = p_out.split('\n')
        output_list_filtered = [l for l in output_list if not l.startswith('loading the models')]
        output_list_filtered = [l for l in output_list_filtered if not l == 'done']
        output_list_filtered = [l for l in output_list_filtered if l]

        for line in output_list_filtered:
            word_tags = line.split(' ')
            tagged_sentence = []
            for word_tag in word_tags:
                word, tag = word_tag.split('/')
                word_tag_tuple = (word, tag)
github cltk / cltk / cltk / semantics / latin / lookup.py View on Github external
def load_replacement_patterns(self):
        """Check for availability of the specified dictionary."""
        filename = self.dictionary + '.py'
        models = self.language + '_models_cltk'
        rel_path = os.path.join(get_cltk_data_dir(),
                                self.language,
                                'model',
                                models,
                                'semantics',
                                filename)
        path = os.path.expanduser(rel_path)
        logger.info('Loading lemmata or synonyms. This may take a minute.')
        loader = importlib.machinery.SourceFileLoader(filename, path)
        module = types.ModuleType(loader.name)
        loader.exec_module(module)
        return module.DICTIONARY
github cltk / cltk / cltk / corpus / greek / tlgu.py View on Github external
full_args = ['v', 'w', 'x', 'y', 'z']
            [tlgu_options.append(x) for x in full_args]  # pylint: disable=W0106
        if break_lines:
            tlgu_options.append('N')
        if divide_works:
            tlgu_options.append('W')
        if latin:
            tlgu_options.append('r')
        # setup extra args
        if extra_args is None:
            extra_args = []
        else:
            try:
                extra_args = list(extra_args)
            except Exception as exc:
                logger.error("Argument 'extra_args' must be a list: %s.", exc)
                raise
        tlgu_options = tlgu_options + extra_args
        # assemble all tlgu flags
        tlgu_options = list(set(tlgu_options))
        if tlgu_options:
            tlgu_flags = '-' + ' -'.join(tlgu_options)
        else:
            tlgu_flags = ''
        # make tlgu call
        tlgu_call = 'tlgu {0} {1} {2}'.format(tlgu_flags,
                                              input_path,
                                              output_path)
        logger.info(tlgu_call)
        try:
            p_out = subprocess.call(tlgu_call, shell=True)
            if p_out == 1:
github cltk / cltk / cltk / phonology / greek / transcription.py View on Github external
"""

from cltk.utils.cltk_logger import logger

from nltk.tokenize import wordpunct_tokenize

import re
import unicodedata

try:
    # James Tauber's greek_accentuation package
    from greek_accentuation import characters as chars
except ImportError as import_error:
    message = 'Missing "greek_accentuation" package. Install with ' \
              '`pip install greek-accentuation`.'
    logger.error(message)
    logger.error(import_error)
    raise

__author__ = ['Jack Duff ']
__license__ = 'MIT License. See LICENSE.'


# Dictionaries of phonological reconstructions for use in transcribing.
# Probert, Philomen. 2010. Phonology, in E. Bakker, A Companion to the \
# Ancient Greek Language.
# (Entries which are commented out are realized through diacritic analysis.)

GREEK = {
    'Attic': {
        'Probert': {
            'correspondence': {
github cltk / cltk / cltk / phonology / greek / transcription.py View on Github external
from cltk.utils.cltk_logger import logger

from nltk.tokenize import wordpunct_tokenize

import re
import unicodedata

try:
    # James Tauber's greek_accentuation package
    from greek_accentuation import characters as chars
except ImportError as import_error:
    message = 'Missing "greek_accentuation" package. Install with ' \
              '`pip install greek-accentuation`.'
    logger.error(message)
    logger.error(import_error)
    raise

__author__ = ['Jack Duff ']
__license__ = 'MIT License. See LICENSE.'


# Dictionaries of phonological reconstructions for use in transcribing.
# Probert, Philomen. 2010. Phonology, in E. Bakker, A Companion to the \
# Ancient Greek Language.
# (Entries which are commented out are realized through diacritic analysis.)

GREEK = {
    'Attic': {
        'Probert': {
            'correspondence': {
                'α': 'ɑ',
github cltk / cltk / cltk / corpus / utils / importer.py View on Github external
def list_corpora(self):
        """Show corpora available for the CLTK to download."""
        try:
            corpora = LANGUAGE_CORPORA[self.language]
        except NameError as name_error:
            logger.error('Corpus not available for language %s: %s', (self.language, name_error))

        corpus_list = []
        for corpus in corpora:
            corpus_list.append(corpus['name'])
        return corpus_list
github cltk / cltk / cltk / phonology / utils.py View on Github external
def __init__(self, place=None, manner=None, voiced=None, ipar=None, geminate=None):
        if isinstance(place, Place) or place is None:
            self.place = place
        else:
            logger.error("Incorrect argument")
        if isinstance(manner, Manner) or manner is None:
            self.manner = manner
        else:
            logger.error("Incorrect argument")
            raise ValueError
        if type(voiced) == bool or voiced is None:
            self.voiced = voiced
        else:
            logger.error("Incorrect argument")
            raise TypeError
        if type(geminate) == bool or geminate is None:
            self.geminate = geminate
        else:
            logger.error("Incorrect argument")
            raise TypeError
        self.ipar = ipar
github cltk / cltk / cltk / corpus / utils / importer.py View on Github external
def _copy_dir_recursive(src_rel, dst_rel):
        """Copy contents of one directory to another. `dst_rel` dir cannot
        exist. Source: http://stackoverflow.com/a/1994840
        TODO: Move this to file_operations.py module.
        :type src_rel: str
        :param src_rel: Directory to be copied.
        :type dst_rel: str
        :param dst_rel: Directory to be created with contents of ``src_rel``.
        """
        src = os.path.expanduser(src_rel)
        dst = os.path.expanduser(dst_rel)
        try:
            shutil.copytree(src, dst)
            logger.info('Files copied from %s to %s', src, dst)
        except OSError as exc:
            if exc.errno == errno.ENOTDIR:
                shutil.copy(src, dst)
                logger.info('Files copied from %s to %s', src, dst)
            else:
                raise
github cltk / cltk / cltk / corpus / utils / importer.py View on Github external
TODO: write check or try if `cltk_data` dir is not present
        """
        if self.testing:
            distributed_corpora_fp = os.path.normpath(get_cltk_data_dir() + '/test_distributed_corpora.yaml')
        else:
            distributed_corpora_fp = os.path.normpath(get_cltk_data_dir() + '/distributed_corpora.yaml')

        try:
            with open(distributed_corpora_fp) as file_open:
                corpora_dict = yaml.safe_load(file_open)
        except FileNotFoundError:
            logger.info('`~/cltk_data/distributed_corpora.yaml` file not found.')
            return []
        except yaml.parser.ParserError as parse_err:
            logger.debug('Yaml parsing error: %s' % parse_err)
            return []

        user_defined_corpora = []
        for corpus_name in corpora_dict:
            about = corpora_dict[corpus_name]

            if about['language'].lower() == self.language:
                user_defined_corpus = dict()
                # user_defined_corpus['git_remote'] = about['git_remote']
                user_defined_corpus['origin'] = about['origin']
                user_defined_corpus['type'] = about['type']
                user_defined_corpus['name'] = corpus_name
                user_defined_corpora.append(user_defined_corpus)

        return user_defined_corpora
github cltk / cltk / cltk / phonology / utils.py View on Github external
def __init__(self, place=None, manner=None, voiced=None, ipar=None, geminate=None):
        if isinstance(place, Place) or place is None:
            self.place = place
        else:
            logger.error("Incorrect argument")
        if isinstance(manner, Manner) or manner is None:
            self.manner = manner
        else:
            logger.error("Incorrect argument")
            raise ValueError
        if type(voiced) == bool or voiced is None:
            self.voiced = voiced
        else:
            logger.error("Incorrect argument")
            raise TypeError
        if type(geminate) == bool or geminate is None:
            self.geminate = geminate
        else:
            logger.error("Incorrect argument")
            raise TypeError
        self.ipar = ipar