How to use the cytoolz.groupby function in cytoolz

To help you get started, we’ve selected a few cytoolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github higlass / higlass-python / higlass / server.py View on Github external
def tiles():
        tids_requested = set(request.args.getlist("d"))

        if not tids_requested:
            return jsonify({"error": "No tiles requested"}), 400

        extract_uuid = lambda tid: tid.split(".")[0]
        uuids_to_tids = toolz.groupby(extract_uuid, tids_requested)

        tiles = []
        for uuid, tids in uuids_to_tids.items():
            ts = next((ts for ts in _list_tilesets() if ts.uuid == uuid), None)
            tiles.extend(ts.tiles(tids))
        data = {tid: tval for tid, tval in tiles}
        return jsonify(data)
github steemit / jussi / jussi / cache / cache_group.py View on Github external
# common case all TTLs equal, eg batch of get_block reqs
        if set(ttls) == BATCH_IRREVERSIBLE_TTL_SET:
            ttls = [irreversible_ttl(resp, last_irreversible_block_num) for resp in responses]
            triplets = filter(lambda p: p[0] != TTL.NO_CACHE, zip(ttls, requests, responses))
        else:
            new_ttls = []
            for i, ttl in enumerate(ttls):
                if ttl == TTL.DEFAULT_EXPIRE_IF_IRREVERSIBLE:
                    ttl = irreversible_ttl(responses[i], last_irreversible_block_num)
                new_ttls.append(ttl)
            triplets = filter(lambda p: p[0] != TTL.NO_CACHE, zip(ttls, requests, responses))

        futures = []
        # pylint: disable=no-member
        for ttl, grouped_triplets in cytoolz.groupby(itemgetter(0), triplets).items():
            if isinstance(ttl, TTL):
                ttl = ttl.value
            pairs = {jsonrpc_cache_key(req): resp for ttl, req, resp in grouped_triplets}
            self._memory_cache.set_manys(pairs, expire_time=ttl)
            futures.append(self.set_many(pairs, expire_time=ttl))
        if futures:
            await asyncio.gather(*futures, return_exceptions=True)
github CasiaFan / Dataset_to_VOC_converter / anno_coco2voc.py View on Github external
def parse_instance(content, outdir):
    categories = {d['id']: d['name'] for d in content['categories']}
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = list(map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations'])))
    # convert category id to name
    for instance in merged_info_list:
        instance['category_id'] = categories[instance['category_id']]
    # group by filename to pool all bbox in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        anno_tree = instance2xml_base(groups[0])
        # if one file have multiple different objects, save it in each category sub-directory
        filenames = []
        for group in groups:
            filenames.append(os.path.join(outdir, re.sub(" ", "_", group['category_id']),
                                    os.path.splitext(name)[0] + ".xml"))
            anno_tree.append(instance2xml_bbox(group, bbox_type='xyxy'))
        for filename in filenames:
            etree.ElementTree(anno_tree).write(filename, pretty_print=True)
        print("Formating instance xml file {} done!".format(name))
github CasiaFan / Dataset_to_VOC_converter / anno_coco2voc.py View on Github external
def parse_keypoints(content, outdir):
    keypoints = dict(zip(range(1, len(content['categories'][0]['keypoints'])+1), content['categories'][0]['keypoints']))
    # merge images and annotations: id in images vs image_id in annotations
    merged_info_list = map(cytoolz.merge, cytoolz.join('id', content['images'], 'image_id', content['annotations']))
    # convert category name to person
    for keypoint in merged_info_list:
        keypoint['category_id'] = "person"
    # group by filename to pool all bbox and keypoint in same file
    for name, groups in cytoolz.groupby('file_name', merged_info_list).items():
        filename = os.path.join(outdir, os.path.splitext(name)[0]+".xml")
        anno_tree = keypoints2xml_base(groups[0])
        for group in groups:
            anno_tree = keypoints2xml_object(group, anno_tree, keypoints, bbox_type="xyxy")
        doc = etree.ElementTree(anno_tree)
        doc.write(open(filename, "w"), pretty_print=True)
        print("Formating keypoints xml file {} done!".format(name))
github dask / dask / pbag / core.py View on Github external
def extend_chunk(self, seq):
        self._open_files()
        grouper = self.grouper
        npart = self.npartitions
        groups = groupby(grouper, seq)

        # Unify groups that hash the same
        groups2 = dict()
        for k, v in groups.items():
            key = hash(k) % self.npartitions
            if key not in groups2:
                groups2[key] = []
            groups2[key].extend(v)

        # Store to disk
        for k, group in groups2.items():
            if group:
                self.dump(group, self.files[k])
github dask / dask / dask / array / core.py View on Github external
>>> broadcast_dimensions(argpairs, numblocks)
    {'i': 2, 'j': 3}

    Works in other contexts too

    >>> argpairs = [('x', 'ij'), ('y', 'ij')]
    >>> d = {'x': ('Hello', 1), 'y': (1, (2, 3))}
    >>> broadcast_dimensions(argpairs, d)
    {'i': 'Hello', 'j': (2, 3)}
    """
    # List like [('i', 2), ('j', 1), ('i', 1), ('j', 2)]
    argpairs2 = [(a, ind) for a, ind in argpairs if ind is not None]
    L = concat([zip(inds, dims) for (x, inds), (x, dims)
                in join(first, argpairs2, first, numblocks.items())])

    g = groupby(0, L)
    g = dict((k, set([d for i, d in v])) for k, v in g.items())

    g2 = dict((k, v - set(sentinels) if len(v) > 1 else v) for k, v in g.items())

    if consolidate:
        return valmap(consolidate, g2)

    if g2 and not set(map(len, g2.values())) == set([1]):
        raise ValueError("Shapes do not align %s" % g)

    return valmap(first, g2)
github steemit / jussi / jussi / handlers.py View on Github external
try:
        for url, pool in pools.items():
            data = {
                'url': url,
                'queue': pool._queue.qsize,
                'in_use': len([ch._in_use for ch in pool._holders if ch._in_use is not None]),
                'ws_read_q_sizes': [ch._con.messages.qsize() for ch in pool._holders if ch._con]
            }
            ws_pools.append(data)
    except Exception as e:
        logger.error('error adding cache info', e=e)

    async_data = dict()
    try:
        tasks = asyncio.tasks.Task.all_tasks()
        grouped_tasks = cytoolz.groupby(lambda t: t._state, tasks)
        for k, v in grouped_tasks.items():
            grouped_tasks[k] = len(v)
        async_data = {
            'tasks.count': len(tasks),
            'tasks': grouped_tasks
        }
    except Exception as e:
        logger.error('error adding cache info', e=e)
    data = {
        'source_commit': http_request.app.config.args.source_commit,
        'docker_tag': http_request.app.config.args.docker_tag,
        'jussi_num': http_request.app.config.last_irreversible_block_num,
        'asyncio': async_data,
        'cache': cache_data,
        'server': server_data,
        'ws_pools': ws_pools