How to use the toolz.unique function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / knit / dask_yarn / core.py View on Github external
ip = ip or socket.gethostbyname(socket.gethostname())

        self.env = env
        self.application_master_container = None
        self.app_id = None
        self.channels = channels or []
        self.conda_pars = conda_pars

        try:
            self.local_cluster = LocalCluster(n_workers=0, ip=ip)
        except (OSError, IOError):
            self.local_cluster = LocalCluster(n_workers=0, scheduler_port=0,
                                              ip=ip)

        self.packages = list(
            sorted(unique((packages or []) + global_packages, key=first_word)))

        self.knit = Knit(autodetect=autodetect, **kwargs)

        atexit.register(self.stop)
github dask / dask / dask / diagnostics / profile_visualize.py View on Github external
def get_colors(palette, names):
    unique_names = list(sorted(unique(names)))
    n_names = len(unique_names)
    palette_lookup = brewer[palette]
    keys = list(palette_lookup.keys())
    low, high = min(keys), max(keys)
    if n_names > high:
        colors = cycle(palette_lookup[high])
    elif n_names < low:
        colors = palette_lookup[low]
    else:
        colors = palette_lookup[n_names]
    color_lookup = dict(zip(unique_names, colors))
    return [color_lookup[n] for n in names]
github ibis-project / ibis / ibis / expr / datatypes.py View on Github external
def __repr__(self) -> str:
        return '{}({})'.format(
            self.name,
            ', '.join(
                '{}={!r}'.format(slot, getattr(self, slot))
                for slot in toolz.unique(self.__slots__ + ('nullable',))
            ),
github ibis-project / ibis / ibis / expr / operations.py View on Github external
def root_tables(self):
        result = list(
            toolz.unique(
                toolz.concatv(
                    self.expr._root_tables(),
                    distinct_roots(
                        *toolz.concatv(
                            self.window._order_by, self.window._group_by
                        )
                    ),
                )
            )
        )
        return result
github dask / dask / dask / dataframe / multi.py View on Github external
result: list
        A list of lists of keys that show which data exist on which
        divisions
    """
    _is_broadcastable = partial(is_broadcastable, dfs)
    dfs1 = [df for df in dfs if isinstance(df, _Frame) and not _is_broadcastable(df)]
    if len(dfs) == 0:
        raise ValueError("dfs contains no DataFrame and Series")
    if not all(df.known_divisions for df in dfs1):
        raise ValueError(
            "Not all divisions are known, can't align "
            "partitions. Please use `set_index` "
            "to set the index."
        )

    divisions = list(unique(merge_sorted(*[df.divisions for df in dfs1])))
    if len(divisions) == 1:  # single value for index
        divisions = (divisions[0], divisions[0])
    dfs2 = [
        df.repartition(divisions, force=True) if isinstance(df, _Frame) else df
        for df in dfs
    ]

    result = list()
    inds = [0 for df in dfs]
    for d in divisions[:-1]:
        L = list()
        for i, df in enumerate(dfs2):
            if isinstance(df, _Frame):
                j = inds[i]
                divs = df.divisions
                if j < len(divs) - 1 and divs[j] == d:
github blaze / blaze / blaze / expr / collections.py View on Github external
def _leaves(self):
        return list(unique(concat(i._leaves() for i in self.children)))
github dask / dask / dask / dataframe / core.py View on Github external
(, [('b', 0), ('b', 1)])),
     ('c', 1): ('b', 2),
     ('c', 2): ('b', 3)}
    """

    if not isinstance(b, (list, tuple)):
        raise ValueError('New division must be list or tuple')
    b = list(b)

    if len(b) < 2:
        # minimum division is 2 elements, like [0, 0]
        raise ValueError('New division must be longer than 2 elements')

    if b != sorted(b):
        raise ValueError('New division must be sorted')
    if len(b[:-1]) != len(list(unique(b[:-1]))):
        msg = 'New division must be unique, except for the last element'
        raise ValueError(msg)

    if force:
        if a[0] < b[0]:
            msg = ('left side of the new division must be equal or smaller '
                   'than old division')
            raise ValueError(msg)
        if a[-1] > b[-1]:
            msg = ('right side of the new division must be equal or larger '
                   'than old division')
            raise ValueError(msg)
    else:
        if a[0] != b[0]:
            msg = 'left side of old and new divisions are different'
            raise ValueError(msg)
github ursa-labs / ursabot / ursabot / configs.py View on Github external
def _from_projects(self, key, unique=False):
        values = (getattr(p, key) for p in self.projects)
        values = reduce(operator.add, values)
        if unique:
            values = toolz.unique(values)
        return list(values)
github holoviz / datashader / datashader / compiler.py View on Github external
Combine a list of base tuples into a single base tuple. This forms the
        reducing step in a reduction tree.

    ``finalize(aggs)``
        Given a tuple of base numpy arrays, returns the finalized ``DataArray``
        or ``Dataset``.
    """
    reds = list(traverse_aggregation(agg))

    # List of base reductions (actually computed)
    bases = list(unique(concat(r._build_bases(cuda) for r in reds)))
    dshapes = [b.out_dshape(schema) for b in bases]
    # List of tuples of (append, base, input columns, temps)
    calls = [_get_call_tuples(b, d, schema, cuda) for (b, d) in zip(bases, dshapes)]
    # List of unique column names needed
    cols = list(unique(concat(pluck(2, calls))))
    # List of temps needed
    temps = list(pluck(3, calls))

    create = make_create(bases, dshapes, cuda)
    info = make_info(cols)
    append = make_append(bases, cols, calls, glyph)
    combine = make_combine(bases, dshapes, temps)
    finalize = make_finalize(bases, agg, schema, cuda)

    return create, info, append, combine, finalize
github blaze / blaze / blaze / expr / table.py View on Github external
def active_columns(self):
        return sorted(unique(x._name for x in self.traverse()
                                    if isinstance(x, ScalarSymbol)))