How to use the tqdm.auto.tqdm.pandas function in tqdm

To help you get started, we’ve selected a few tqdm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jmcarpenter2 / swifter / swifter / swifter_tests.py View on Github external
def test_vectorized_math_applymap_on_large_dataframe(self):
        LOG.info("test_vectorized_math_applymap_on_large_dataframe")
        df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)})

        tqdm.pandas(desc="Pandas Vec math applymap ~ DF")
        start_pd = time.time()
        pd_val = df.progress_applymap(math_vec_square)
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = df.swifter.progress_bar(desc="Vec math applymap ~ DF").applymap(math_vec_square)
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
            self.assertLess(swifter_time, pd_time)
github jmcarpenter2 / swifter / swifter / swifter_tests.py View on Github external
def test_nonvectorized_math_apply_on_large_series(self):
        LOG.info("test_nonvectorized_math_apply_on_large_series")
        df = pd.DataFrame({"x": np.random.normal(size=10_000_000)})
        series = df["x"]

        tqdm.pandas(desc="Pandas Nonvec math apply ~ Series")
        start_pd = time.time()
        pd_val = series.progress_apply(math_foo, compare_to=1)
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = series.swifter.progress_bar(desc="Nonvec math apply ~ Series").apply(math_foo, compare_to=1)
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
            self.assertLess(swifter_time, pd_time)
github jmcarpenter2 / swifter / swifter / swifter_tests.py View on Github external
def test_vectorized_math_apply_on_large_dataframe(self):
        LOG.info("test_vectorized_math_apply_on_large_dataframe")
        df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)})

        tqdm.pandas(desc="Pandas Vec math apply ~ DF")
        start_pd = time.time()
        pd_val = df.progress_apply(math_vec_multiply, axis=1)
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = df.swifter.progress_bar(desc="Vec math apply ~ DF").apply(math_vec_multiply, axis=1)
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
            self.assertLess(swifter_time, pd_time)
github jmcarpenter2 / swifter / swifter / test_dataframe.py View on Github external
def test_vectorized_math_apply_on_large_dataframe(self):
        LOG.info("test_vectorized_math_apply_on_large_dataframe")
        df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)})

        tqdm.pandas(desc="Pandas Vec math apply ~ DF")
        start_pd = time.time()
        pd_val = df.progress_apply(math_vec_multiply, axis=1)
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = df.swifter.progress_bar(desc="Vec math apply ~ DF").apply(math_vec_multiply, axis=1)
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
            self.assertLess(swifter_time, pd_time)
github jmcarpenter2 / swifter / swifter / test_dataframe.py View on Github external
def test_nonvectorized_math_applymap_on_large_dataframe(self):
        LOG.info("test_nonvectorized_math_applymap_on_large_dataframe")
        df = pd.DataFrame({"x": np.random.normal(size=5_000_000), "y": np.random.uniform(size=5_000_000)})

        tqdm.pandas(desc="Pandas Nonvec math applymap ~ DF")
        start_pd = time.time()
        pd_val = df.progress_applymap(math_foo)
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = df.swifter.progress_bar(desc="Nonvec math applymap ~ DF").applymap(math_foo)
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
            self.assertLess(swifter_time, pd_time)
github jmcarpenter2 / swifter / swifter / swifter_tests.py View on Github external
def test_vectorized_math_apply_on_large_series(self):
        LOG.info("test_vectorized_math_apply_on_large_series")
        df = pd.DataFrame({"x": np.random.normal(size=1_000_000)})
        series = df["x"]

        tqdm.pandas(desc="Pandas Vec math apply ~ Series")
        start_pd = time.time()
        pd_val = series.progress_apply(math_vec_square)
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = series.swifter.progress_bar(desc="Vec math apply ~ Series").apply(math_vec_square, axis=0)
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
            self.assertLess(swifter_time, pd_time)
github jmcarpenter2 / swifter / swifter / swifter.py View on Github external
np.array_equal(sample_df, tmp_df) & (sample_df.shape == tmp_df.shape),
                    error_message="Vectorized function sample doesn't match pandas apply sample.",
                )
            return func(self._obj, *args, **kwds)
        except ERRORS_TO_HANDLE:  # if can't vectorize, estimate time to pandas apply
            wrapped = self._wrapped_apply(func, convert_dtype=convert_dtype, args=args, **kwds)
            timed = timeit.timeit(wrapped, number=N_REPEATS)
            sample_proc_est = timed / N_REPEATS
            est_apply_duration = sample_proc_est / self._SAMPLE_SIZE * self._obj.shape[0]

            # if pandas sample apply takes too long and not performing str processing, use dask
            if (est_apply_duration > self._dask_threshold) and allow_dask_processing:
                return self._dask_apply(func, convert_dtype, *args, **kwds)
            else:  # use pandas
                if self._progress_bar:
                    tqdm.pandas(desc=self._progress_bar_desc or "Pandas Apply")
                    return self._obj.progress_apply(func, convert_dtype=convert_dtype, args=args, **kwds)
                else:
                    return self._obj.apply(func, convert_dtype=convert_dtype, args=args, **kwds)
github jmcarpenter2 / swifter / swifter / swifter.py View on Github external
# if the transformed dataframe is empty, return early using Pandas
        if not self._nrows:
            return self._obj_pd.apply(func, args=args, **kwds)

        # estimate time to pandas apply
        wrapped = self._wrapped_apply(func, *args, **kwds)
        timed = timeit.timeit(wrapped, number=N_REPEATS)
        sample_proc_est = timed / N_REPEATS
        est_apply_duration = sample_proc_est / self._SAMPLE_SIZE * self._nrows

        # No `allow_dask_processing` variable here, because we don't know the dtypes of the transformation
        if est_apply_duration > self._dask_threshold:
            return self._dask_apply(func, *args, **kwds)
        else:  # use pandas
            if self._progress_bar and hasattr(self._obj_pd, "progress_apply"):
                tqdm.pandas(desc=self._progress_bar_desc or "Pandas Apply")
                return self._obj_pd.progress_apply(func, *args, **kwds)
            else:
                return self._obj_pd.apply(func, *args, **kwds)
github mljar / mljar-supervised / supervised / automl.py View on Github external
import sys
import json
import copy
import time
import numpy as np
import pandas as pd

from tqdm.auto import tqdm

tqdm.pandas()

from supervised.models.learner_xgboost import XgbLearner
from supervised.iterative_learner_framework import IterativeLearner
from supervised.callbacks.early_stopping import EarlyStopping
from supervised.callbacks.metric_logger import MetricLogger
from supervised.callbacks.time_constraint import TimeConstraint
from supervised.metric import Metric
from supervised.tuner.random_parameters import RandomParameters
from supervised.tuner.registry import ModelsRegistry
from supervised.tuner.registry import BINARY_CLASSIFICATION
from supervised.tuner.preprocessing_tuner import PreprocessingTuner
from supervised.tuner.hill_climbing import HillClimbing
from supervised.models.ensemble import Ensemble
from supervised.models.compute_additional_metrics import ComputeAdditionalMetrics
from supervised.preprocessing.preprocessing_exclude_missing import (
    PreprocessingExcludeMissingValues,
github jmcarpenter2 / swifter / swifter / swifter.py View on Github external
with TQDMDaskProgressBar(desc=self._progress_bar_desc or "Dask Apply"):
                    return (
                        dd.from_pandas(self._obj, npartitions=self._npartitions)
                        .apply(func, *args, axis=axis, raw=raw, result_type=result_type, meta=meta, **kwds)
                        .compute(scheduler=self._scheduler)
                    )
            else:
                return (
                    dd.from_pandas(self._obj, npartitions=self._npartitions)
                    .apply(func, *args, axis=axis, raw=raw, result_type=result_type, meta=meta, **kwds)
                    .compute(scheduler=self._scheduler)
                )
        except ERRORS_TO_HANDLE:
            # if dask apply doesn't match pandas apply, fallback to pandas
            if self._progress_bar:
                tqdm.pandas(desc=self._progress_bar_desc or "Pandas Apply")
                apply_func = self._obj.progress_apply
            else:
                apply_func = self._obj.apply

            return apply_func(func, axis=axis, raw=raw, result_type=result_type, args=args, **kwds)