How to use the wordfreq.wordfreq function in wordfreq

To help you get started, we’ve selected a few wordfreq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ipython / ipyparallel / examples / daVinci Word Count / pwordfreq.py View on Github external
def pwordfreq(view, fnames):
    """Parallel word frequency counter.
    
    view - An IPython DirectView
    fnames - The filenames containing the split data.
    """
    assert len(fnames) == len(view.targets)
    view.scatter('fname', fnames, flatten=True)
    ar = view.apply(wordfreq, ipp.Reference('fname'))
    freqs_list = ar.get()
    word_set = set()
    for f in freqs_list:
        word_set.update(f.keys())
    freqs = dict(zip(word_set, repeat(0)))
    for f in freqs_list:
        for word, count in f.items():
            freqs[word] += count
    return freqs
github ipython / ipython / docs / examples / kernel / pwordfreq.py View on Github external
for word, count in f.iteritems():
            freqs[word] += count
    return freqs

if __name__ == '__main__':
    # Create a MultiEngineClient
    from IPython.kernel import client
    ipc = client.MultiEngineClient()
    
    # Run the wordfreq script on the engines.
    ipc.run('wordfreq.py')

    # Run the serial version
    print "Serial word frequency count:"
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    # The parallel version
    print "\nParallel word frequency count:"
    files = ['davinci%i.txt' % i for i in range(4)]
    ipc.scatter('textfile', files)
    ipc.execute('text = open(textfile[0]).read()')
    pfreqs = pwordfreq(ipc,'text')
    print_wordfreq(freqs)
github ipython / ipython / docs / examples / kernel / pwordfreq_skel.py View on Github external
rc - An IPython RemoteController
    text - The name of a string on the engines to do the freq count on.
    """

if __name__ == '__main__':
    # Create a MultiEngineClient
    from IPython.kernel import client
    ipc = client.MultiEngineClient()
    
    # Run the wordfreq script on the engines.
    ipc.run('wordfreq.py')

    # Run the serial version
    print "Serial word frequency count:"
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    # The parallel version
    print "\nParallel word frequency count:"
    files = ['davinci%i.txt' % i for i in range(4)]
    ipc.scatter('textfile', files)
    ipc.execute('text = open(textfile[0]).read()')
    pfreqs = pwordfreq(ipc,'text')
    print_wordfreq(freqs)
github ipython / ipyparallel / examples / daVinci Word Count / pwordfreq.py View on Github external
view = rc[:]
    view.apply_sync(os.chdir, os.getcwd())

    if not os.path.exists('davinci.txt'):
        # download from project gutenberg
        print("Downloading Da Vinci's notebooks from Project Gutenberg")
        r = requests.get(davinci_url)
        with io.open('davinci.txt', 'w', encoding='utf8') as f:
            f.write(r.text)
        
    # Run the serial version
    print("Serial word frequency count:")
    text = io.open('davinci.txt', encoding='latin1').read()
    tic = time.time()
    freqs = wordfreq(text)
    toc = time.time()
    print_wordfreq(freqs, 10)
    print("Took %.3f s to calculate"%(toc-tic))
    
    
    # The parallel version
    print("\nParallel word frequency count:")
    # split the davinci.txt into one file per engine:
    lines = text.splitlines()
    nlines = len(lines)
    n = len(rc)
    block = nlines//n
    for i in range(n):
        chunk = lines[i*block:i*(block+1)]
        with io.open('davinci%i.txt'%i, 'w', encoding='utf8') as f:
            f.write('\n'.join(chunk))