How to use the wordfreq.available_languages function in wordfreq

To help you get started, we’ve selected a few wordfreq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github LuminosoInsight / wordfreq / tests / test_general.py View on Github external
def test_languages():
    # Make sure we get all the languages when looking for the default
    # 'best' wordlist
    avail = available_languages()
    assert len(avail) >= 34

    # 'small' covers the same languages, but with some different lists
    avail_small = available_languages('small')
    assert len(avail_small) == len(avail)
    assert avail_small != avail

    # 'combined' is the same as 'small'
    avail_old_name = available_languages('combined')
    assert avail_old_name == avail_small

    # 'large' covers fewer languages
    avail_large = available_languages('large')
    assert len(avail_large) >= 14
    assert len(avail) > len(avail_large)

    # Look up the digit '2' in the main word list for each language
    for lang in avail:
        assert word_frequency('2', lang) > 0
github LuminosoInsight / wordfreq / tests / test_general.py View on Github external
# Make sure we get all the languages when looking for the default
    # 'best' wordlist
    avail = available_languages()
    assert len(avail) >= 34

    # 'small' covers the same languages, but with some different lists
    avail_small = available_languages('small')
    assert len(avail_small) == len(avail)
    assert avail_small != avail

    # 'combined' is the same as 'small'
    avail_old_name = available_languages('combined')
    assert avail_old_name == avail_small

    # 'large' covers fewer languages
    avail_large = available_languages('large')
    assert len(avail_large) >= 14
    assert len(avail) > len(avail_large)

    # Look up the digit '2' in the main word list for each language
    for lang in avail:
        assert word_frequency('2', lang) > 0

        # Make up a weirdly verbose language code and make sure
        # we still get it
        new_lang_code = '%s-001-x-fake-extension' % lang.upper()
        assert word_frequency('2', new_lang_code) > 0
github LuminosoInsight / wordfreq / tests / test_general.py View on Github external
def test_languages():
    # Make sure we get all the languages when looking for the default
    # 'best' wordlist
    avail = available_languages()
    assert len(avail) >= 34

    # 'small' covers the same languages, but with some different lists
    avail_small = available_languages('small')
    assert len(avail_small) == len(avail)
    assert avail_small != avail

    # 'combined' is the same as 'small'
    avail_old_name = available_languages('combined')
    assert avail_old_name == avail_small

    # 'large' covers fewer languages
    avail_large = available_languages('large')
    assert len(avail_large) >= 14
    assert len(avail) > len(avail_large)

    # Look up the digit '2' in the main word list for each language
    for lang in avail:
        assert word_frequency('2', lang) > 0

        # Make up a weirdly verbose language code and make sure
        # we still get it
        new_lang_code = '%s-001-x-fake-extension' % lang.upper()
        assert word_frequency('2', new_lang_code) > 0
github LuminosoInsight / wordfreq / tests / test_general.py View on Github external
def test_languages():
    # Make sure we get all the languages when looking for the default
    # 'best' wordlist
    avail = available_languages()
    assert len(avail) >= 34

    # 'small' covers the same languages, but with some different lists
    avail_small = available_languages('small')
    assert len(avail_small) == len(avail)
    assert avail_small != avail

    # 'combined' is the same as 'small'
    avail_old_name = available_languages('combined')
    assert avail_old_name == avail_small

    # 'large' covers fewer languages
    avail_large = available_languages('large')
    assert len(avail_large) >= 14
    assert len(avail) > len(avail_large)
github LuminosoInsight / wordfreq / scripts / top_n.py View on Github external
"""
A quick script to output the top N words (1000 for now) in each language.
You can send the output to a file and diff it to see changes between wordfreq
versions.
"""
import wordfreq


N = 1000

if __name__ == '__main__':
    for lang in sorted(wordfreq.available_languages()):
        for word in wordfreq.top_n_list(lang, 1000):
            print('{}\t{}'.format(lang, word))
github oborchers / Fast_Sentence_Embeddings / fse / models / base_s2v.py View on Github external
def _check_language_settings(self, lang_freq:str):
        """ Check if the supplied language is a compatible with the wordfreq package
        
        Parameters
        ----------
        lang_freq : str
            The language used to induce the frequencies into the wv.vocab object.

        """
        if lang_freq in available_languages(wordlist='best'):
            self.lang_freq = str(lang_freq)
            logger.info("no frequency mode: using wordfreq for estimation "
                        f"of frequency for language: {self.lang_freq}")
        else:
            raise ValueError(f"Language {lang_freq} is not available in wordfreq")