How to use the funcy.chunks function in funcy

To help you get started, we’ve selected a few funcy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github AdaCore / libadalang / utils / run_nameres.py View on Github external
if not automated:
        print("Loading old results ..")
        prev_results = load_or_create("results_file", lambda: None)
        if no_resolution:
            results = prev_results
            embed()
            return

    results = Results()
    files = []
    for dir in dirs:
        dir_files = sorted(glob('{}/*.ad?'.format(dir)))
        if pattern:
            dir_files = [f for f in dir_files if re.findall(pattern, f)]
        dir_files = chunks(
            chunk_size,
            map(os.path.basename, dir_files)
        )
        files += [(dir, fs) for fs in dir_files]

    project = os.path.abspath(project)

    raw_results = pmap(
        lambda (dir, f): FileResult.nameres_files(
            dir, f, project=project, extra_args=extra_args
        ),
        files, nb_threads=j
    )

    total_nb_files = sum(len(fs[1]) for fs in files)
github uploadcare / intercom-rank / migrations / versions / ef6edca55d1d_.py View on Github external
def upgrade():
    ### commands auto generated by Alembic - please adjust! ###
    table = op.create_table('free_email_providers',
    sa.Column('id', sa.Integer(), nullable=False),
    sa.Column('domain', sa.Unicode(length=255), nullable=False),
    sa.PrimaryKeyConstraint('id')
    )
    op.create_index(op.f('ix_free_email_providers_domain'), 'free_email_providers', ['domain'], unique=False)
    ### end Alembic commands ###

    with open(os.path.join(PROJECT_ROOT, 'free.emails'), 'r') as f:
        FREE_EMAILS_SET = set(r.strip() for r in f.readlines())

    # Fill the data
    for domains in chunks(1000, FREE_EMAILS_SET):
        op.bulk_insert(table, [{'domain': d} for d in domains])
github uploadcare / intercom-rank / app / intercom / service.py View on Github external
timeout=TIMEOUT)
            # TODO: re-raise custom exception for 429 HTTP error
            # for further handling (e.g. retry celery task)
            response.raise_for_status()
            result = response.json()

            try:
                status_url = result['links']['self']
                logger.debug('Bulk update status: %s', status_url)
            except KeyError:
                logger.error('Weird response from Intercom: %r', result)

            return result

        with self.get_executor() as executor:
            for _ in executor.map(request, chunks(CHUNK_SIZE, users_data)):
                pass
github idrdex / star-django / legacy / management / commands / fill_probes.py View on Github external
def mget(keys):
    return lcat(redis_client.mget(chunk) for chunk in chunks(10000, keys))
github kensho-technologies / bubs / bubs / tokenizer.py View on Github external
def word_tokenize(self, text):
        """Get list of string tokens from input string.

        Args:
            text: input string for tokenization
        Yields:
            token: str, non-whitespace tokens
        """
        for token in split_possessive_markers(split_contractions(_html_tokenize(text))):
            if self._max_characters_per_token is not None:
                for token_chunk in funcy.chunks(self._max_characters_per_token, token):
                    yield token_chunk
            else:
                yield token
github kensho-technologies / bubs / bubs / helpers.py View on Github external
def input_batch_generator(self, tokenized_sentences, batch_size):
        """Yield inputs to ContextualizedEmbedding in batches with minimal padding for prediction.

        Group sentences into batches in the order they're provided. Character-level padding is
        determined by longest sentence in the batch. Yield one batch at a time.

        Args:
            tokenized_sentences: list of lists of str, each str a token
            batch_size: int, number of sentences per batch generated

        Returns:
            Yields inputs to ContextualizedEmbedding one sentence batch at a time
        """
        while True:
            for chunk in chunks(batch_size, range(len(tokenized_sentences))):
                selected_sentences = [tokenized_sentences[index] for index in chunk]
                model_inputs = self.prepare_inputs_from_pretokenized(selected_sentences)
                yield model_inputs
github harrystech / arthur-redshift-etl / python / scripts / re_run_partial_pipeline.py View on Github external
"""
    Return dicts describing the current status of the pipelines.
    """
    extract_fields = jmespath.compile(
        """
        pipelineDescriptionList[].{
            pipelineId: pipelineId,
            name: name,
            pipelineState: fields[?key == '@pipelineState'].stringValue|[0],
            healthStatus: fields[?key == '@healthStatus'].stringValue|[0],
            latestRunTime: fields[?key == '@latestRunTime'].stringValue|[0]
        }
        """
    )
    chunk_size = 25  # Per AWS documentation, need to go in pages of 25 pipelines
    for ids_chunk in funcy.chunks(chunk_size, pipeline_ids):
        response = client.describe_pipelines(pipelineIds=ids_chunk)
        values = extract_fields.search(response)
        for value in values:
            yield value
github harrystech / arthur-redshift-etl / python / etl / pipeline.py View on Github external
"""
    client = boto3.client('datapipeline')
    paginator = client.get_paginator('list_pipelines')
    response_iterator = paginator.paginate()
    all_pipeline_ids = response_iterator.search("pipelineIdList[].id")
    if selection:
        selected_pipeline_ids = [pipeline_id
                                 for pipeline_id in all_pipeline_ids
                                 for glob in selection
                                 if fnmatch.fnmatch(pipeline_id, glob)]
    else:
        selected_pipeline_ids = list(all_pipeline_ids)

    dw_pipelines = []
    chunk_size = 25  # Per AWS documentation, need to go in pages of 25 pipelines
    for ids_chunk in funcy.chunks(chunk_size, selected_pipeline_ids):
        resp = client.describe_pipelines(pipelineIds=ids_chunk)
        for description in resp['pipelineDescriptionList']:
            for tag in description['tags']:
                if tag['key'] == 'user:project' and tag['value'] == 'data-warehouse':
                    dw_pipelines.append(DataPipeline(description))
    return sorted(dw_pipelines, key=attrgetter("name"))