How to use the tqdm.notebook.tqdm function in tqdm

To help you get started, we’ve selected a few tqdm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / arche / src / arche / rules / others.py View on Github external
Returns:
        A result containing item keys per field which contained any trash symbol
    """
    garbage = (
        r"(?P^\s|\s$)"
        r"|(?P&[a-zA-Z]{2,}?;|&#\d*?;)"
        r"|(?P[.#@][^\d{}#.\s][^{}#.]+?{(?:[^:;{}]+?:[^:;{}]+?;)+?\s*?})"
        r"|(?P|)"
    )

    errors = {}
    row_keys: Set = set()
    rule_result = Result("Garbage Symbols", items_count=len(df))

    for column in tqdm(df.select_dtypes([np.object]).columns, desc="Garbage Symbols"):
        matches = df[column].apply(str).str.extractall(garbage, flags=re.IGNORECASE)
        if not matches.empty:
            error_keys = df.loc[matches.unstack().index.values].index
            bad_texts = matches.stack().value_counts().index.sort_values().tolist()
            # escape backslashes for markdown repr, `\n > \\n`
            bad_texts = [
                f"'{codecs.encode(bx, 'unicode_escape').decode()[:20]}'"
                for bx in bad_texts
            ]
            error = (
                f"{len(error_keys)/len(df)*100:.1f}% of '{column}' "
                f"values contain `{', '.join(bad_texts)}`"
            )

            errors[error] = list(error_keys)
            row_keys = row_keys.union(error_keys)
github tqdm / tqdm / tqdm / __init__.py View on Github external
def tqdm_notebook(*args, **kwargs):  # pragma: no cover
    """See tqdm.notebook.tqdm for full documentation"""
    from .notebook import tqdm as _tqdm_notebook
    from warnings import warn
    warn("This function will be removed in tqdm==5.0.0\n"
         "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`",
         TqdmDeprecationWarning, stacklevel=2)
    return _tqdm_notebook(*args, **kwargs)
github materialsproject / MPContribs / mpcontribs-client / mpcontribs / client / __init__.py View on Github external
def submit_contributions(
        self, contributions, skip_dupe_check=False, ignore_dupes=False, limit=200
    ):
        """Convenience function to submit a list of contributions"""
        # prepare structures/tables
        with tqdm(total=len(contributions)) as pbar:
            existing = {
                "ids": set(),
                "identifiers": set(),
                "structures": set(),
                "tables": set(),
            }
            unique_identifiers = True

            if not skip_dupe_check:
                name = contributions[0]["project"]
                resp = self.projects.get_entry(
                    pk=name, _fields=["unique_identifiers"]
                ).result()
                unique_identifiers = resp["unique_identifiers"]

                pbar.set_description("Get existing contribution(s)")
github scrapinghub / arche / src / arche / readers / items.py View on Github external
def categorize(df: pd.DataFrame) -> pd.DataFrame:
        """Cast columns with repeating values to `category` type to save memory"""
        if len(df) < 100:
            return
        for c in tqdm(df.columns, desc="Categorizing"):
            try:
                if df[c].nunique(dropna=False) <= 10:
                    df[c] = df[c].astype("category")
            # ignore lists and dicts columns
            except TypeError:
                continue
github cogent3 / cogent3 / src / cogent3 / util / progress_display.py View on Github external
def f(*args, **kw):
        if getattr(CURRENT, "context", None) is None:
            if sys.stdout.isatty():
                klass = tqdm
            elif using_notebook():
                klass = notebook.tqdm
            elif isinstance(sys.stdout, io.FileIO):
                klass = LogFileOutput
            else:
                klass = None

            if klass is None:
                CURRENT.context = NULL_CONTEXT
            else:
                CURRENT.context = ProgressContext(klass)
        parent = CURRENT.context
        show_progress = kw.pop("show_progress", None)
        if show_progress is False:
            subcontext = NULL_CONTEXT
        else:
            subcontext = parent.subcontext()
        kw["ui"] = CURRENT.context = subcontext
github scrapinghub / arche / src / arche / rules / category.py View on Github external
of possible values, including `NAN`.

    Args:
        df: data
        max_uniques: filter which determines which columns to use. Only columns with
        the number of unique values less than or equal to `max_uniques` are category columns.

    Returns:
        A result with stats containing value counts of categorical columns.
    """
    result = Result("Categories")

    columns = find_likely_cats(df, max_uniques)
    result.stats = [
        value_counts
        for value_counts in tqdm(
            map(lambda c: df[c].value_counts(dropna=False), columns),
            desc="Finding categories",
            total=len(columns),
        )
        if len(value_counts) <= max_uniques
    ]
    if not result.stats:
        result.add_info("Categories were not found")
        return result
    result.add_info(f"{len(result.stats)} category field(s)")
    result.outcome = Outcome.INFO
    return result
github materialsproject / MPContribs / mpcontribs-client / mpcontribs / client / __init__.py View on Github external
def delete_contributions(self, project):
        """Convenience function to remove all contributions for a project"""
        resp = self.contributions.get_entries(
            project=project, _fields=["id"], _limit=1
        ).result()
        ncontribs = resp["total_count"]

        if ncontribs:
            has_more, limit = True, 250

            with tqdm(total=ncontribs) as pbar:
                pbar.set_description("Delete contribution(s)")
                while has_more:
                    resp = self.contributions.delete_entries(
                        project=project, _limit=limit
                    ).result()
                    has_more = resp["has_more"]
                    pbar.update(resp["count"])

                if resp["count"]:
                    self.load()