How to use the toolz.first function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rapidsai / dask-cuml / dask_cuml / neighbors / nearest_neighbors.py View on Github external
parts = list(map(delayed, data_parts))
        parts = client.compute(parts)  # Start computation in the background
        yield wait(parts)
        for part in parts:
            if part.status == 'error':
                yield part  # trigger error locally

        # A dict in the form of { part_key: part }
        key_to_part_dict = dict([(str(part.key), part) for part in parts])

        who_has = yield client.who_has(parts)

        worker_parts = {}
        for key, workers in who_has.items():
            worker = parse_host_port(first(workers))
            if worker not in worker_parts:
                worker_parts[worker] = []
            worker_parts[worker].append(key_to_part_dict[key])

        """
        Create IP Handles on each worker hosting input data
        """
        # Format of input_devarrays = ([(X, y)..], dev)
        input_devarrays = [(worker, client.submit(input_to_device_arrays, part,
                                                  {"k": k}, workers=[worker]))
                           for worker, part in worker_parts.items()]

        yield wait(input_devarrays)

        """
        Gather IPC handles for each worker and call _fit() on each worker
github rapidsai / dask-cuml / dask_cuml / linear_regression.py View on Github external
parts = list(map(delayed, zip(data_parts, label_parts)))
        parts = client.compute(parts)  # Start computation in the background
        yield wait(parts)

        for part in parts:
            if part.status == 'error':
                yield part  # trigger error locally

        # A dict in the form of { part_key: part }
        key_to_part_dict = dict([(str(part.key), part) for part in parts])

        who_has = yield client.who_has(parts)

        worker_parts = {}
        for key, workers in who_has.items():
            worker = parse_host_port(first(workers))
            if worker not in worker_parts:
                worker_parts[worker] = []
            worker_parts[worker].append(key_to_part_dict[key])

        """
        Create IP Handles on each worker hosting input data 
        """

        # Format of input_devarrays = ([(X, y)..], dev)
        input_devarrays = [(worker, client.submit(inputs_to_device_arrays, part, workers=[worker]))
                    for worker, part in worker_parts.items()]

        yield wait(input_devarrays)

        """
        Gather IPC handles for each worker and call _fit() on each worker containing data.
github ajylee / call_map / call_map / jedi_dump.py View on Github external
def get_module_node(effective_sys_path: List[Path], module_name: str) -> Tuple[Optional[Node], Optional[Exception]]:
    from .errors import ModuleResolutionError

    import_script = create_import_script(effective_sys_path, module_name)
    definitions = import_script.goto_definitions()

    if definitions:
        mod = tz.first(definitions)

        if tuple(map(int, jedi.__version__.split('.'))) >= (0,10,1):
            # duck punch to avoid mod._name.api_type error, which uses parent_context.
            mod._name.parent_context = mod._name.get_root_context()

        if mod.module_path:
            JediCodeElementNode.usage_resolution_modules |= frozenset((mod._name.get_root_context(),))

        node = JediCodeElementNode.from_definition(
            role='definition',
            call_pos=(mod.module_path, (1,0), (None,None)),
            definition=mod)

        err = None
    else:
        node = None
github abilian / abilian-sbe / abilian / sbe / apps / documents / models.py View on Github external
def get_object_by_path(self, path: str) -> Union["Document", "Folder", None]:
        assert path.startswith("/")
        assert "//" not in path

        if path == "/":
            return self

        path_segments = path[1:].split("/")
        obj = self
        try:
            for name in path_segments[:]:
                obj = first(x for x in obj.children if x.title == name)
            return obj
        except IndexError:
            return None
github bcbio / bcbio-nextgen / bcbio / structural / cnvkit.py View on Github external
"""
    from scipy.cluster.vq import kmeans, vq
    all_sizes = []
    for c in ref.file_contigs(ref_file):
        all_sizes.append(float(c.size))
    all_sizes.sort()
    if len(all_sizes) > 5:
        # separate out smaller chromosomes and haplotypes with kmeans
        centroids, _ = kmeans(np.array(all_sizes), 2)
        idx, _ = vq(np.array(all_sizes), centroids)
        little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
        little_sizes = [x[1] for x in little_sizes]
        # create one more cluster with the smaller, removing the haplotypes
        centroids2, _ = kmeans(np.array(little_sizes), 2)
        idx2, _ = vq(np.array(little_sizes), centroids2)
        little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
        little_sizes2 = [x[1] for x in little_sizes2]
        # get any chromosomes not in haplotype/random bin
        thresh = max(little_sizes2)
    else:
        thresh = 0
    larger_chroms = []
    for c in ref.file_contigs(ref_file):
        if c.size > thresh:
            larger_chroms.append(c.name)
    return larger_chroms
github vals / umis / umis / umis.py View on Github external
def detect_fastq_annotations(fastq_file):
    """
    detects annotations preesent in a FASTQ file by examining the first read
    """
    annotations = set()
    queryread = tz.first(read_fastq(fastq_file))
    for k, v in BARCODEINFO.items():
        if v.readprefix in queryread:
            annotations.add(k)
    return annotations
github dask / dask-ml / dask_ml / model_selection / _incremental.py View on Github external
def _adapt(self, info):
        # First, have an adaptive algorithm
        if self.n_initial_parameters == "grid":
            start = len(ParameterGrid(self.parameters))
        else:
            start = self.n_initial_parameters

        def inverse(time):
            """ Decrease target number of models inversely with time """
            return int(start / (1 + time) ** self.decay_rate)

        example = toolz.first(info.values())
        time_step = example[-1]["partial_fit_calls"]

        current_time_step = time_step + 1
        next_time_step = current_time_step

        if inverse(current_time_step) == 0:
            # we'll never get out of here
            next_time_step = 1

        while inverse(current_time_step) == inverse(next_time_step) and (
            self.decay_rate
            and not self.patience
            or next_time_step - current_time_step < self.scores_per_fit
        ):
            next_time_step += 1
github blaze / blaze / blaze / compute / chunks.py View on Github external
def discover(c):
    ds = discover(first(c))
    assert isdimension(ds[0])
    return var * ds.subshape[0]
github blaze / blaze / blaze / expr / table.py View on Github external
def dtype(self):
        ds = self.schema[-1]
        if isinstance(ds, Record):
            if len(ds.fields) > 1:
                raise TypeError("`.dtype` not defined for multicolumn object. "
                                "Use `.schema` instead")
            else:
                return dshape(first(ds.types))
        else:
            return dshape(ds)