How to use the pandarallel.utils.chunk function in pandarallel

To help you get started, we’ve selected a few pandarallel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nalepae / pandarallel / pandarallel / series.py View on Github external
def closure(data, func, **kwargs):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (ProgressBarsNotebookLab if in_notebook_lab
                            else ProgressBarsConsole)

            chunks = chunk(data.size, nb_workers)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(data)

            workers_args = [(plasma_store_name, object_id, chunk, func,
                             display_progress_bar, queue, index, kwargs)
                            for index, chunk in enumerate(chunks)]

            result_workers = pool.amap(Series.worker_map, workers_args)
github nalepae / pandarallel / pandarallel / series_rolling.py View on Github external
def closure(rolling, func, *args, **kwargs):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (
                ProgressBarsNotebookLab if in_notebook_lab else ProgressBarsConsole
            )

            series = rolling.obj
            window = rolling.window
            chunks = chunk(len(series), nb_workers, window)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(series)

            attribute2value = {
                attribute: getattr(rolling, attribute)
                for attribute in rolling._attributes
            }

            workers_args = [
github nalepae / pandarallel / pandarallel / dataframe.py View on Github external
pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (
                ProgressBarsNotebookLab if in_notebook_lab else ProgressBarsConsole
            )

            axis = kwargs.get("axis", 0)
            if axis == "index":
                axis = 0
            elif axis == "columns":
                axis = 1

            opposite_axis = 1 - axis
            chunks = chunk(df.shape[opposite_axis], nb_workers)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(df)

            workers_args = [
                (
                    plasma_store_name,
                    object_id,
                    chunk,
                    func,
github nalepae / pandarallel / pandarallel / series.py View on Github external
def closure(series, func, *args, **kwargs):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (ProgressBarsNotebookLab if in_notebook_lab
                            else ProgressBarsConsole)

            chunks = chunk(series.size, nb_workers)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(series)

            workers_args = [(plasma_store_name, object_id, chunk, func,
                             display_progress_bar, queue, index,
                             args, kwargs)
                            for index, chunk in enumerate(chunks)]

            result_workers = pool.amap(Series.worker_apply, workers_args)
github nalepae / pandarallel / pandarallel / rolling_groupby.py View on Github external
def closure(rolling_groupby, func, *args, **kwargs):
            groups = list(rolling_groupby._groupby.groups.items())
            chunks = chunk(len(groups), nb_workers)
            object_id = plasma_client.put(rolling_groupby.obj)
            groups_id = plasma_client.put(groups)

            attribute2value = {
                attribute: getattr(rolling_groupby, attribute)
                for attribute in rolling_groupby._attributes
            }

            worker_args = [
                (
                    plasma_store_name,
                    object_id,
                    groups_id,
                    attribute2value,
                    chunk,
                    func,
github nalepae / pandarallel / pandarallel / dataframe.py View on Github external
def closure(df, func):
            pool = ProcessingPool(nb_workers)
            manager = Manager()
            queue = manager.Queue()

            ProgressBars = (
                ProgressBarsNotebookLab if in_notebook_lab else ProgressBarsConsole
            )

            chunks = chunk(df.shape[0], nb_workers)

            maxs = [chunk.stop - chunk.start for chunk in chunks]
            values = [0] * nb_workers
            finished = [False] * nb_workers

            if display_progress_bar:
                progress_bar = ProgressBars(maxs)

            object_id = plasma_client.put(df)

            worker_args = [
                (
                    plasma_store_name,
                    object_id,
                    chunk,
                    func,
github nalepae / pandarallel / pandarallel / dataframe_groupby.py View on Github external
def closure(df_grouped, func, *args, **kwargs):
            groups = list(df_grouped.groups.items())
            chunks = chunk(len(groups), nb_workers)
            object_id = plasma_client.put(df_grouped.obj)
            groups_id = plasma_client.put(groups)

            workers_args = [
                (plasma_store_name, object_id, groups_id, chunk, func, args, kwargs)
                for chunk in chunks
            ]

            with ProcessingPool(nb_workers) as pool:
                result_workers = pool.map(DataFrameGroupBy.worker, workers_args)

            if len(df_grouped.grouper.shape) == 1:
                # One element in "by" argument
                if type(df_grouped.keys) == list:
                    # "by" argument is a list with only one element
                    keys = df_grouped.keys[0]