How to use the nltk.data.find function in nltk

To help you get started, we’ve selected a few nltk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NVIDIA-AI-IOT / Electron / slackbot / slackbot / lib / python2.7 / site-packages / nltk / chunk / named_entity.py View on Github external
def build_model(fmt='binary'):
    print('Loading training data...')
    train_paths = [find('corpora/ace_data/ace.dev'),
                   find('corpora/ace_data/ace.heldout'),
                   find('corpora/ace_data/bbn.dev'),
                   find('corpora/ace_data/muc.dev')]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print('Training...')
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
github Hate-Speech-Detection-Project / hate_detector / __main__.py View on Github external
def init_nltk():
    if not os.path.exists('nltk'):
        os.makedirs('nltk')
    nltk.data.path.append(os.getcwd() + '/nltk')
    dependencies = ['corpora/stopwords']
    for package in dependencies:
        try:
            nltk.data.find(package)
        except LookupError:
            nltk.download(package, os.getcwd() + '/nltk')
github nltk / nltk / nltk / chunk / named_entity.py View on Github external
def build_model(fmt='binary'):
    print('Loading training data...')
    train_paths = [
        find('corpora/ace_data/ace.dev'),
        find('corpora/ace_data/ace.heldout'),
        find('corpora/ace_data/bbn.dev'),
        find('corpora/ace_data/muc.dev'),
    ]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print('Training...')
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
github PaddlePaddle / Paddle / python / paddle / dataset / sentiment.py View on Github external
URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip')
        path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora')
        filename = os.path.join(path, 'movie_reviews.zip')
        zip_file = zipfile.ZipFile(filename)
        zip_file.extractall(path)
        zip_file.close()
        # make sure that nltk can find the data
        if paddle.dataset.common.DATA_HOME not in nltk.data.path:
            nltk.data.path.append(paddle.dataset.common.DATA_HOME)
        movie_reviews.categories()
    except LookupError:
        print("Downloading movie_reviews data set, please wait.....")
        nltk.download(
            'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME)
        print("Download data set success.....")
        print("Path is " + nltk.data.find('corpora/movie_reviews').path)
github SmartDataAnalytics / horus-ner / src / core / feature_extraction / features.py View on Github external
if load_topic_modeling==1:
            self.logger.info(':: loading TM')
            self.text_tm = TopicModeling(self.config)
        else:
            self.text_tm = None
        self.logger.info(':: database connecting ...')
        self.conn = sqlite3.connect(self.config.database_db)

        if bool(int(self.config.models_force_download)) is True:
            self.logger.info(':: downloading NLTK data...')
            try:
                nltk.data.find('averaged_perceptron_tagger.zip')
            except LookupError:
                nltk.download('averaged_perceptron_tagger')
            try:
                nltk.data.find('punkt.zip')
            except LookupError:
                nltk.download('punkt')
            try:
                nltk.data.find('maxent_ne_chunker.zip')
            except LookupError:
                nltk.download('maxent_ne_chunker')
            try:
                nltk.data.find('universal_tagset.zip')
            except LookupError:
                nltk.download('universal_tagset')
            try:
                nltk.data.find('words.zip')
            except LookupError:
                nltk.download('words')
github SmartDataAnalytics / horus-ner / scripts / download_models.py View on Github external
except LookupError:
    nltk.download('averaged_perceptron_tagger')
try:
    nltk.data.find('punkt.zip')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('maxent_ne_chunker.zip')
except LookupError:
    nltk.download('maxent_ne_chunker')
try:
    nltk.data.find('universal_tagset.zip')
except LookupError:
    nltk.download('universal_tagset')
try:
    nltk.data.find('words.zip')
except LookupError:
    nltk.download('words')
try:
    nltk.data.find('stopwords.zip')
except LookupError:
    nltk.download('stopwords')
github nltk / nltk / nltk / corpus / util.py View on Github external
# Find the corpus root directory.
        zip_name = re.sub(r'(([^/]+)(/.*)?)', r'\2.zip/\1/', self.__name)
        if TRY_ZIPFILE_FIRST:
            try:
                root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
            except LookupError as e:
                try:
                    root = nltk.data.find('{}/{}'.format(self.subdir, self.__name))
                except LookupError:
                    raise e
        else:
            try:
                root = nltk.data.find('{}/{}'.format(self.subdir, self.__name))
            except LookupError as e:
                try:
                    root = nltk.data.find('{}/{}'.format(self.subdir, zip_name))
                except LookupError:
                    raise e

        # Load the corpus.
        corpus = self.__reader_cls(root, *self.__args, **self.__kwargs)

        # This is where the magic happens!  Transform ourselves into
        # the corpus by modifying our own __dict__ and __class__ to
        # match that of the corpus.

        args, kwargs = self.__args, self.__kwargs
        name, reader_cls = self.__name, self.__reader_cls

        self.__dict__ = corpus.__dict__
        self.__class__ = corpus.__class__
github SmartDataAnalytics / horus-ner / scripts / download_models.py View on Github external
import nltk
try:
    nltk.data.find('averaged_perceptron_tagger.zip')
except LookupError:
    nltk.download('averaged_perceptron_tagger')
try:
    nltk.data.find('punkt.zip')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('maxent_ne_chunker.zip')
except LookupError:
    nltk.download('maxent_ne_chunker')
try:
    nltk.data.find('universal_tagset.zip')
except LookupError:
    nltk.download('universal_tagset')
try:
    nltk.data.find('words.zip')
except LookupError:
    nltk.download('words')
try:
    nltk.data.find('stopwords.zip')
except LookupError:
    nltk.download('stopwords')
github NVIDIA-AI-IOT / Electron / slackbot / slackbot / lib / python2.7 / site-packages / nltk / chunk / named_entity.py View on Github external
def build_model(fmt='binary'):
    print('Loading training data...')
    train_paths = [find('corpora/ace_data/ace.dev'),
                   find('corpora/ace_data/ace.heldout'),
                   find('corpora/ace_data/bbn.dev'),
                   find('corpora/ace_data/muc.dev')]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print('Training...')
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
github sloria / TextBlob / textchunk / named_entity.py View on Github external
def build_model(fmt='binary'):
    print('Loading training data...')
    train_paths = [find('corpora/ace_data/ace.dev'),
                   find('corpora/ace_data/ace.heldout'),
                   find('corpora/ace_data/bbn.dev'),
                   find('corpora/ace_data/muc.dev')]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print('Training...')
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)