How to use the retentioneering.visualization.plot function in retentioneering

To help you get started, we’ve selected a few retentioneering examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
def core_event_distribution(self, core_events, index_col=None, event_col=None,
                                thresh=None, plotting=True, use_greater=True, **kwargs):
        self._init_cols(locals())
        if type(core_events) == str:
            core_events = [core_events]
        self._obj['is_core_event'] = self._obj[self._event_col()].isin(core_events)
        rates = self._obj.groupby(self._index_col()).is_core_event.mean()
        if plotting:
            plot.core_event_dist(rates, thresh, **kwargs)
        if use_greater:
            f = set(rates[rates >= thresh].index.values)
        else:
            f = set(rates[rates < thresh].index.values)
        return self._obj[self._obj[self._index_col()].isin(f)].reset_index(drop=True)
github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
[self._event_col()]
                   .value_counts()
                   .loc[top_cluster['index']]
                   / clus2.shape[0]).reset_index()
        cr1 = (
            clus2[
                clus2[self._event_col()] == self.retention_config['positive_target_event']
            ][self._index_col()].nunique()
        ) / clus2[self._index_col()].nunique()
        top_all.columns = [self._event_col(), 'freq', ]
        top_cluster.columns = [self._event_col(), 'freq', ]

        top_all['hue'] = 'all' if cl2 is None else f'cluster {cl2}'
        top_cluster['hue'] = f'cluster {cl1}'

        plot.cluster_event_dist(
            top_all.append(top_cluster, ignore_index=True, sort=False),
            self._event_col(),
            cl1,
            [
                clus[self._index_col()].nunique() / self._obj[self._index_col()].nunique(),
                clus2[self._index_col()].nunique() / self._obj[self._index_col()].nunique(),
             ],
            [cr0, cr1],
            cl2
        )
github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
features = self.extract_features(**kwargs)
        if not hasattr(self, 'clusters') or refit_cluster:
            clusterer = getattr(clustering, method)
            self.clusters, self._metrics = clusterer(features, **kwargs)
            self._create_cluster_mapping(features.index.values)

        if hasattr(self, 'datatype') and self.datatype == 'features':
            target = kwargs.pop('target')
        else:
            target = self.get_positive_users(**kwargs)
        target = features.index.isin(target)
        self._metrics['homogen'] = clustering.homogeneity_score(target, self.clusters)
        if hasattr(self, '_tsne'):
            features.retention._tsne = self._tsne
        if plot_type:
            func = getattr(plot, plot_type)
            res = func(
                features,
                clustering.aggregate_cl(self.clusters, 7) if method == 'dbscan' else self.clusters,
                target,
                metrics=self._metrics,
                **kwargs
            )
            if res is not None:
                self._tsne = res
        return self.clusters
github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
f_cur = self._obj[self._event_col()] == event_order[0]
        f_next = self._obj['next_event'] == event_order[1]
        s_next = self._obj[f_cur & f_next].copy()
        s_cur = self._obj[f_cur & (~f_next)].copy()

        s_cur.time_diff[s_cur.time_diff < limit].hist(alpha=0.5, log=True,
                                                      bins=bins, label='Others {:.2f}'.format(
                                                          (s_cur.time_diff < limit).sum() / f_cur.sum()
                                                      ))
        s_next.time_diff[s_next.time_diff < limit].hist(alpha=0.7, log=True,
                                                        bins=bins,
                                                        label='Selected event order {:.2f}'.format(
                                                            (s_next.time_diff < limit).sum() / f_cur.sum()
                                                        ))
        plot.sns.mpl.pyplot.legend()
        plot.sns.mpl.pyplot.show()
        (s_cur.next_event.value_counts() / f_cur.sum()).iloc[:topk].plot.bar()
github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
elif sample_size is not None:
            features = features.sample(n=sample_size, random_state=0)

        if not (hasattr(self, '_tsne') and not refit):
            self._tsne = feature_extraction.learn_tsne(features, **kwargs)
        if plot_type == 'clusters':
            if kwargs.get('cmethod') is not None:
                kwargs['method'] = kwargs.pop('cmethod')
            old_targs = targets.copy()
            targets = self.get_clusters(plot_type=None, **kwargs)
        elif plot_type == 'targets':
            targets = self._tsne_targets
        else:
            return self._tsne
        if proj_type == '3d':
            plot.tsne_3d(
                self._obj,
                clustering.aggregate_cl(targets, 7) if kwargs.get('method') == 'dbscan' else targets,
                old_targs,
                **kwargs
            )
        else:
            plot.cluster_tsne(
                self._obj,
                clustering.aggregate_cl(targets, 7) if kwargs.get('method') == 'dbscan' else targets,
                targets,
                **kwargs
            )
        return self._tsne
github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
piv = pd.DataFrame(res.mean(2), index=base.index, columns=base.columns)
        stds = pd.DataFrame(res.std(2), index=base.index, columns=base.columns)

        if not kwargs.get('reverse'):
            for i in self.retention_config['target_event_list']:
                piv = piv.append(self._add_accums(piv, i))
        if kwargs.get('thr'):
            thr = kwargs.pop('thr')
            piv = self._process_thr(piv, thr, kwargs.get('max_steps' or 30), **kwargs)
        if kwargs.get('sorting'):
            piv = self._sort_matrix(piv)
        if not kwargs.get('for_diff'):
            if kwargs.get('reverse'):
                piv.columns = ['n'] + ['n - {}'.format(i - 1) for i in piv.columns[1:]]
        if plot_type:
            plot.step_matrix(
                piv.round(2),
                title=kwargs.get('title',
                                 'Step matrix {}'
                                 .format('reversed' if kwargs.get('reverse') else '')), **kwargs)
            plot.step_matrix(
                stds.round(3),
                title=kwargs.get('title',
                                 'Step matrix std'), **kwargs)
        if kwargs.get('dt_means') is not None:
            means = np.array(self._obj.groupby('event_rank').apply(
                lambda x: (x.next_timestamp - x.event_timestamp).dt.total_seconds().mean()
            ))
            piv = pd.concat([piv, pd.DataFrame([means[:kwargs.get('max_steps' or 30)]],
                                               columns=piv.columns, index=['dt_mean'])])
        return piv, stds
github retentioneering / retentioneering-tools / retentioneering / core / utils.py View on Github external
kwargs['method'] = kwargs.pop('cmethod')
            old_targs = targets.copy()
            targets = self.get_clusters(plot_type=None, **kwargs)
        elif plot_type == 'targets':
            targets = self._tsne_targets
        else:
            return self._tsne
        if proj_type == '3d':
            plot.tsne_3d(
                self._obj,
                clustering.aggregate_cl(targets, 7) if kwargs.get('method') == 'dbscan' else targets,
                old_targs,
                **kwargs
            )
        else:
            plot.cluster_tsne(
                self._obj,
                clustering.aggregate_cl(targets, 7) if kwargs.get('method') == 'dbscan' else targets,
                targets,
                **kwargs
            )
        return self._tsne