How to use the multiprocessing.dummy.Pool function in multiprocessing

To help you get started, we’ve selected a few multiprocessing examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozilla / DeepSpeech / bin / import_cv2.py View on Github external
# Excluding samples that failed on label validation
                counter['invalid_label'] += 1
            elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
                # Excluding samples that are too short to fit the transcript
                counter['too_short'] += 1
            elif frames/SAMPLE_RATE > MAX_SECS:
                # Excluding very long samples to keep a reasonable batch-size
                counter['too_long'] += 1
            else:
                # This one is good - keep it for the target CSV
                rows.append((os.path.split(wav_filename)[-1], file_size, label))
            counter['all'] += 1
            counter['total_time'] += frames

    print("Importing mp3 files...")
    pool = Pool(cpu_count())
    bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
    for i, _ in enumerate(pool.imap_unordered(one_sample, samples), start=1):
        bar.update(i)
    bar.update(num_samples)
    pool.close()
    pool.join()

    with open(output_csv, 'w', encoding='utf-8') as output_csv_file:
        print('Writing CSV file for DeepSpeech.py as: ', output_csv)
        writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
        writer.writeheader()
        bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
        for filename, file_size, transcript in bar(rows):
            if space_after_every_character:
                writer.writerow({'wav_filename': filename, 'wav_filesize': file_size, 'transcript': ' '.join(transcript)})
            else:
github linnabrown / run_dbcan / Hotpep / parallel_group_many_proteins_many_patterns_noDNA.py View on Github external
self.freq = None
		self.group = None
		self.subp = None
		self.accession = None
		self.neighbour_seqs = None
		self.ec = None #added by Le Feb 19, 2020
		
	
def callCustom(args):
	return call(args, shell=True)
	
print ("Assigning proteins to groups")
args_array = []
var1 = 1
varlist = " ".join(str(x) for x in variables)
pool = ThreadPool(threads)
while var1 <= threads:
	args_array.append(("bact_group_many_proteins_many_patterns.py "+ str(var1) + " " + varlist))
	var1 += 1
pool.map(callCustom, args_array)

print("Collecting Results")

pep_list_array = []
try:
	f = open(peptide_dir_name+"/large_fams.txt", 'r')
except:
	f = open(peptide_dir_name+"/fam_list.txt", 'r')
for line in f:
	pep_list_array.append(line.rstrip())
f.close()
pep_list_hash = {}
github weizhixiaoyi / DouBan-Spider / book / book_crawl.py View on Github external
if book_id_list:
                            self.book_spider_log.info(
                                '重新获取' + str(tag) + 'tag, 第' + str(start) + '个书籍ID失败, 重试第' + str(i) + '次数成功')
                            break
                        else:
                            self.book_spider_log.info(
                                '重新获取' + str(tag) + 'tag, 第' + str(start) + '个书籍ID失败, 重试第' + str(i) + '次数失败')
                        time.sleep(10)
                    if not book_id_list:
                        start += 20
                        continue
                elif not book_id_list:
                    break

                # 多线程获取书籍Info
                movie_pool = ThreadPool(12)
                movie_pool.map(self.get_book_info, book_id_list)
                movie_pool.close()
                movie_pool.join()

                # 多线程获取电影作者信息
                person_id_list = []
                while self.redis_con.llen('author_queue'):
                    # 出队列获取作者ID
                    person_id_list.append(str(self.redis_con.rpop('author_queue').decode('utf-8')))
                author_poll = ThreadPool(12)
                author_poll.map(self.get_person_info, person_id_list)
                author_poll.close()
                author_poll.join()

                # 进行下一轮迭代
                start += 20
github KatharaFramework / Kathara / src / Resources / manager / docker / DockerMachine.py View on Github external
def undeploy(self, lab_hash, selected_machines=None):
        machines = self.get_machines_by_filters(lab_hash=lab_hash)

        cpus = cpu_count()
        machines_pool = Pool(cpus)

        items = [machines] if len(machines) < cpus else \
                              utils.list_chunks(machines, cpus)

        for chunk in items:
            machines_pool.map(func=partial(self._undeploy_machine, selected_machines, True), iterable=chunk)
github pratik008 / HealthCare_Twitter_Analysis / Resources / n-grams / send_tweets_to_mongodb.py View on Github external
for f in os.listdir(path+'/'+g):
                process_disease_file(path,g,db.tweets,f)
    
        except:
            continue


###################################################################################

if __name__ == '__main__':
    
    
    path=sys.argv[1]
    
    #Multiprocessing
    pool = ThreadPool(6)#You should modify this function depending on the number of cores in your computer
    
    #Database
    client = MongoClient()
    db = client['HealthCare_Twitter_Analysis']
    #collection = db.tweets
    
    #Use test database for debugging
    #db = client['test']
    

    #Navigate directory structure
    for g in os.listdir(path):
        try:
            files=os.listdir(path+'/'+g)
            partial_process_disease_file=partial(process_disease_file,path,g,db.tweets)
            pool.map(partial_process_disease_file,files)
github apache / incubator-sdap-nexus / analysis / webservice / algorithms / TimeSeriesSolr.py View on Github external
calculator = TimeSeriesCalculator()
            for dayinseconds in daysinrange:
                result = calculator.calc_average_on_day(min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
                results.append(result)
        else:
            # Create a task to calc average difference for each day
            manager = Manager()
            work_queue = manager.Queue()
            done_queue = manager.Queue()
            for dayinseconds in daysinrange:
                work_queue.put(
                    ('calc_average_on_day', min_lat, max_lat, min_lon, max_lon, ds, dayinseconds))
            [work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]

            # Start new processes to handle the work
            pool = Pool(maxprocesses)
            [pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses)]
            pool.close()

            # Collect the results as [(day (in ms), average difference for that day)]
            for i in xrange(0, len(daysinrange)):
                result = done_queue.get()
                try:
                    error_str = result['error']
                    self.log.error(error_str)
                    raise NexusProcessingException(reason="Error calculating average by day.")
                except KeyError:
                    pass

                results.append(result)

            pool.terminate()
github d-schmidt / hearthscan-bot / scrape.py View on Github external
card['hpwn'] = hpid
                    except Exception as e:
                        urlName = getHearthHeadId(name)
                        url = 'https://www.hearthstonetopdecks.com/cards/{}/'.format(urlName)
                        _, cardHTD = parseHTD(url, session)
                        card['cdn'] = cardHTD['cdn']
                        card['hpwn'] = 12288

                    card['head'] = getHearthHeadId(name)
                    currentSet[card['name']] = card
                    print('.', end='')

                saveCardsAsJson(filename, currentSet)
                resultCards.update(currentSet)

    with Pool(4) as p:
        p.map(doSet, sets)

    return resultCards
github chainspace / chainspace-prototype / chainspacemeasurements / chainspacemeasurements / instances.py View on Github external
def ssh_exec(self, command):
        self._log("Executing command on all nodes: {}".format(command))
        args = [(self._single_ssh_exec, instance, command) for instance in self._get_running_instances()]
        pool = Pool(ChainspaceNetwork.threads)
        result = pool.map(_multi_args_wrapper, args)
        pool.close()
        pool.join()
        self._log("Executed command on all nodes: {}".format(command))

        return result
github Josue87 / BoomER / modules / linux / gathering / suid_sgid_root.py View on Github external
def recursive(self, my_dir):
        self.print_info("Recursive mode")
        files_suid = []
        files_sgid = []
        files_list = []
        for cpwd, dirs, files in walk(my_dir):
            if cpwd.endswith("/"):
                cwd = cpwd
            else:
                cwd = cpwd + "/"
            for f in files:
                files_list.append(cwd + f)
        pool = Pool(8)
        results = pool.map(self.is_suid_sgid, files_list)
        pool.close()
        pool.join()
        for result in results:
            if result[0]:
                files_suid.append(result[0])
            if result[1]:
                files_sgid.append(result[1])

        return [files_suid, files_sgid]
github src-d / ml / ast2vec / cloning.py View on Github external
def clone_repos(self, inputs, output, ignore):
        with Pool(self._num_threads) as pool:
            pool.map(partial(self.process_repo, ignore=ignore, target_dir=output),
                     self.generate_repo_urls(inputs))