How to use the dask.delayed function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask / dask / array / stats.py View on Github external
def normaltest(a, axis=0, nan_policy="propagate"):
    if nan_policy != "propagate":
        raise NotImplementedError(
            "`nan_policy` other than 'propagate' have not been implemented."
        )

    s, _ = skewtest(a, axis)
    k, _ = kurtosistest(a, axis)
    k2 = s * s + k * k
    return delayed(NormaltestResult, nout=2)(k2, delayed(distributions.chi2.sf)(k2, 2))
github SciTools / iris / lib / iris / fileformats / pp.py View on Github external
            @dask.delayed
            def fetch_valid_values_array():
                # Return the data values array (shape+size unknown).
                return proxy[:]
github simpeg / simpeg / SimPEG / potential_fields / magnetics / simulation.py View on Github external
def Jtvec(self, m, v, f=None):

        if self.coordinate_system == 'cartesian':
            dmudm = self.chiMap.deriv(m)
        else:
            dmudm = self.dSdm * self.chiMap.deriv(m)

        if self.modelType == 'amplitude':

            dfdm_v = dask.delayed(csr.dot)(v, self.dfdm)

            vec = da.from_delayed(dfdm_v, dtype=float, shape=[self.dfdm.shape[0]])

            if getattr(self, '_Mxyz', None) is not None:

                jtvec = da.dot(vec.astype(np.float32), self.G)

                Jtvec = dask.delayed(csr.dot)(jtvec, self.Mxyz)

            else:
                Jtvec = da.dot(vec.astype(np.float32), self.G)

        else:

            Jtvec = da.dot(v.astype(np.float32), self.G)
github radix-ai / graphchain / examples / example_2.py View on Github external
    @dask.delayed
    def goo(*args):
        sleep(1)
        return sum(args) + 1

    @dask.delayed
    def top(argument, argument2):
        sleep(3)
        return argument - argument2

    # Constants
    v1 = dask.delayed(1)
    v2 = dask.delayed(2)
    v3 = dask.delayed(3)
    v4 = dask.delayed(0)
    v5 = dask.delayed(-1)
    v6 = dask.delayed(-2)
    d1 = dask.delayed(-3)
    boo1 = boo(foo(v1), bar(v2), baz(v3))
    goo1 = goo(foo(v4), v6, bar(v5))
    baz2 = baz(boo1, goo1)
    top1 = top(d1, baz2)

    skipkeys = [boo1.key]
    return (top1, -14, skipkeys)  # DAG and expected result
github DigitalSlideArchive / HistomicsTK / histomicstk / segmentation / positive_pixel_count.py View on Github external
"""
    ts = large_image.getTileSource(slide_path)
    kwargs = dict(format=large_image.tilesource.TILE_FORMAT_NUMPY)
    if region is not None:
        kwargs['region'] = region
    if make_label_image:
        tile = ts.getRegion(**kwargs)[0]
        return count_image(tile, params)
    else:
        results = []
        total_tiles = ts.getSingleTile(**kwargs)['iterator_range']['position']
        for position in range(0, total_tiles, tile_grouping):
            results.append(delayed(_count_tiles)(
                slide_path, params, kwargs, position,
                min(tile_grouping, total_tiles - position)))
        results = delayed(_combine)(results).compute()
    return _totals_to_stats(results),
github radix-ai / graphchain / examples / example_1.py View on Github external
sleep(1)
        return sum(args) + 1

    @dask.delayed
    def top(argument, argument2):
        sleep(3)
        return argument - argument2

    # Constants
    v1 = dask.delayed(1)
    v2 = dask.delayed(2)
    v3 = dask.delayed(3)
    v4 = dask.delayed(0)
    v5 = dask.delayed(-1)
    v6 = dask.delayed(-2)
    d1 = dask.delayed(-3)

    boo1 = boo(foo(v1), bar(v2), baz(v3))
    goo1 = goo(foo(v4), v6, bar(v5))
    baz2 = baz(boo1, goo1)
    top1 = top(d1, baz2)
    return (top1, -14)  # DAG and expected result
github bioidiap / bob.bio.base / bob / bio / base / pipelines / vanilla_biometrics / zt_norm.py View on Github external
def compute_tnorm_scores(
        self, probe_scores, sampleset_for_tnorm, t_biometric_references, for_zt=False
    ):

        # Reducing all the Z-Scores to compute the stats
        all_scores_for_tnorm = dask.delayed(list)(sampleset_for_tnorm)

        stats = dask.delayed(self.ztnorm._compute_stats)(
            all_scores_for_tnorm, t_biometric_references, axis=1
        )

        return probe_scores.map_partitions(self.ztnorm._tnorm_samplesets, stats, for_zt)
github pytroll / satpy / satpy / composites / viirs.py View on Github external
# scaled_radiance = (radiance - minval) / (maxval - minval)
        # radiance = sqrt(scaled_radiance)

        moon_factor1 = 0.7 * (1.0 - moon_illum_fraction)
        moon_factor2 = 0.0022 * lza_data.data
        erf_portion = 1 + erf((sza_data.data - 95.0) / (5.0 * np.sqrt(2.0)))
        max_val = da.power(
            10, -1.7 -
            (2.65 + moon_factor1 + moon_factor2) * erf_portion) * unit_factor
        min_val = da.power(10, -4.0 -
                           (2.95 + moon_factor2) * erf_portion) * unit_factor

        # Update from Curtis Seaman, increase max radiance curve until less
        # than 0.5% is saturated
        if self.saturation_correction:
            delayed = dask.delayed(self._saturation_correction)(output_dataset.data, unit_factor, min_val, max_val)
            output_dataset.data = da.from_delayed(delayed, output_dataset.shape, output_dataset.dtype)
            output_dataset.data = output_dataset.data.rechunk(dnb_data.data.chunks)
        else:
            inner_sqrt = (output_dataset - min_val) / (max_val - min_val)
            # clip negative values to 0 before the sqrt
            inner_sqrt = inner_sqrt.where(inner_sqrt > 0, 0)
            output_dataset.data = np.sqrt(inner_sqrt).data

        info = dnb_data.attrs.copy()
        info.update(self.attrs)
        info["standard_name"] = "equalized_radiance"
        info["mode"] = "L"
        output_dataset.attrs = info
        return output_dataset
github NCBI-Hackathons / BarcSeek / parallel.py View on Github external
def parallelize(sample_dict:dict, num_lines:int, forward_fastq:str, reverse_fastq:Optional[str] = None):
    orig_home = os.getcwd()

    _sanity_checks_(num_lines, forward_fastq, reverse_fastq)
    master_dict  = _fetch_chunk_files_(num_lines, forward_fastq, reverse_fastq)
    logger.debug(json.dumps(master_dict))
    logger.debug("iterating over partition calls")
    results = []

    for p in sorted(master_dict):
        f_file = master_dict[p]['f_input']
        r_file = master_dict[p]['r_input']
        logger.debug("calling partition")
        result = dask.delayed(_partition_)(sample_dict, f_file, r_file)
        results.append(result)

    logger.debug("calling reduce")
    the_job = dask.delayed(_reduce_)(results)
    the_job.compute(num_workers=4)

    join_out = _join_output_(sample_dict, forward_fastq, reverse_fastq)
github JDASoftwareGroup / kartothek / kartothek / io / dask / dataframe.py View on Github external
store=store,
        factory=factory,
        load_dataset_metadata=False,
    )

    mps = list(
        dispatch_metapartitions_from_factory(dataset_factory, predicates=predicates)
    )
    if mps:
        random.shuffle(mps)
        # ensure that even with sampling at least one metapartition is returned
        cutoff_index = max(1, int(len(mps) * frac))
        mps = mps[:cutoff_index]
        ddf = dd.from_delayed(
            [
                dask.delayed(MetaPartition.get_parquet_metadata)(
                    mp, store=dataset_factory.store_factory, table_name=table_name,
                )
                for mp in mps
            ]
        )
    else:
        df = pd.DataFrame(columns=_METADATA_SCHEMA.keys())
        df = df.astype(_METADATA_SCHEMA)
        ddf = dd.from_pandas(df, npartitions=1)

    return ddf