How to use the wordfreq.print_wordfreq function in wordfreq

To help you get started, we’ve selected a few wordfreq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ipython / ipython / docs / examples / kernel / pwordfreq.py View on Github external
# Run the wordfreq script on the engines.
    ipc.run('wordfreq.py')

    # Run the serial version
    print "Serial word frequency count:"
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    # The parallel version
    print "\nParallel word frequency count:"
    files = ['davinci%i.txt' % i for i in range(4)]
    ipc.scatter('textfile', files)
    ipc.execute('text = open(textfile[0]).read()')
    pfreqs = pwordfreq(ipc,'text')
    print_wordfreq(freqs)
github ipython / ipyparallel / examples / daVinci Word Count / pwordfreq.py View on Github external
view.apply_sync(os.chdir, os.getcwd())

    if not os.path.exists('davinci.txt'):
        # download from project gutenberg
        print("Downloading Da Vinci's notebooks from Project Gutenberg")
        r = requests.get(davinci_url)
        with io.open('davinci.txt', 'w', encoding='utf8') as f:
            f.write(r.text)
        
    # Run the serial version
    print("Serial word frequency count:")
    text = io.open('davinci.txt', encoding='latin1').read()
    tic = time.time()
    freqs = wordfreq(text)
    toc = time.time()
    print_wordfreq(freqs, 10)
    print("Took %.3f s to calculate"%(toc-tic))
    
    
    # The parallel version
    print("\nParallel word frequency count:")
    # split the davinci.txt into one file per engine:
    lines = text.splitlines()
    nlines = len(lines)
    n = len(rc)
    block = nlines//n
    for i in range(n):
        chunk = lines[i*block:i*(block+1)]
        with io.open('davinci%i.txt'%i, 'w', encoding='utf8') as f:
            f.write('\n'.join(chunk))
    
    try: #python2
github ipython / ipyparallel / examples / daVinci Word Count / pwordfreq.py View on Github external
n = len(rc)
    block = nlines//n
    for i in range(n):
        chunk = lines[i*block:i*(block+1)]
        with io.open('davinci%i.txt'%i, 'w', encoding='utf8') as f:
            f.write('\n'.join(chunk))
    
    try: #python2
        cwd = os.path.abspath(os.getcwdu())
    except AttributeError: #python3
        cwd = os.path.abspath(os.getcwd())
    fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)]
    tic = time.time()
    pfreqs = pwordfreq(view,fnames)
    toc = time.time()
    print_wordfreq(freqs)
    print("Took %.3f s to calculate on %i engines"%(toc-tic, len(view.targets)))
    # cleanup split files
    map(os.remove, fnames)
github ipython / ipython / docs / examples / kernel / pwordfreq.py View on Github external
freqs[word] += count
    return freqs

if __name__ == '__main__':
    # Create a MultiEngineClient
    from IPython.kernel import client
    ipc = client.MultiEngineClient()
    
    # Run the wordfreq script on the engines.
    ipc.run('wordfreq.py')

    # Run the serial version
    print "Serial word frequency count:"
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    # The parallel version
    print "\nParallel word frequency count:"
    files = ['davinci%i.txt' % i for i in range(4)]
    ipc.scatter('textfile', files)
    ipc.execute('text = open(textfile[0]).read()')
    pfreqs = pwordfreq(ipc,'text')
    print_wordfreq(freqs)
github ipython / ipython / docs / examples / kernel / pwordfreq_skel.py View on Github external
text - The name of a string on the engines to do the freq count on.
    """

if __name__ == '__main__':
    # Create a MultiEngineClient
    from IPython.kernel import client
    ipc = client.MultiEngineClient()
    
    # Run the wordfreq script on the engines.
    ipc.run('wordfreq.py')

    # Run the serial version
    print "Serial word frequency count:"
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    # The parallel version
    print "\nParallel word frequency count:"
    files = ['davinci%i.txt' % i for i in range(4)]
    ipc.scatter('textfile', files)
    ipc.execute('text = open(textfile[0]).read()')
    pfreqs = pwordfreq(ipc,'text')
    print_wordfreq(freqs)
github ipython / ipython / docs / examples / kernel / pwordfreq_skel.py View on Github external
# Run the wordfreq script on the engines.
    ipc.run('wordfreq.py')

    # Run the serial version
    print "Serial word frequency count:"
    text = open('davinci.txt').read()
    freqs = wordfreq(text)
    print_wordfreq(freqs, 10)
    
    # The parallel version
    print "\nParallel word frequency count:"
    files = ['davinci%i.txt' % i for i in range(4)]
    ipc.scatter('textfile', files)
    ipc.execute('text = open(textfile[0]).read()')
    pfreqs = pwordfreq(ipc,'text')
    print_wordfreq(freqs)