How to use the pandas.Series function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hidasib / GRU4Rec / baselines.py View on Github external
--------
        out : pandas.Series
            Prediction scores for selected items on how likely to be the next item of this session. Indexed by the item IDs.
        
        '''
        if self.prev_session_id != session_id:
            self.prev_session_id = session_id
            self.pers = dict()
        v = self.pers.get(input_item_id)
        if v:
            self.pers[input_item_id] = v + 1
        else:
            self.pers[input_item_id] = 1
        preds = np.zeros(len(predict_for_item_ids))
        mask = np.in1d(predict_for_item_ids, self.pop_list.index)
        ser = pd.Series(self.pers)
        preds[mask] = self.pop_list[predict_for_item_ids[mask]] 
        mask = np.in1d(predict_for_item_ids, ser.index)
        preds[mask] += ser[predict_for_item_ids[mask]]
        return pd.Series(data=preds, index=predict_for_item_ids)
 
github bashtage / arch / arch / multivariate / base.py View on Github external
def std_err(self):
        """
        Array of parameter standard errors
        """
        return pd.Series(np.sqrt(np.diag(self.param_cov)),
                         index=self._names, name='std_err')
github pydata / patsy / patsy / categorical.py View on Github external
                      lambda x: C(pandas.Series(x))]
        for prep in preps:
github zlatko-minev / pyEPR / pyEPR / core_distributed_analysis.py View on Github external
def _update_ansys_variables(self, variations=None):
        """
        Updates the list of ansys hfss variables for the set of sweeps.
        """
        variations = variations or self.variations
        for variation in variations:
            self._hfss_variables[variation] = pd.Series(
                self.get_variables(variation=variation))
        return self._hfss_variables
github ijmarshall / cochrane-nlp / experiments / ct.gov / cnn / train.py View on Github external
def batch_generator():
                """Perform stratified sampling
                
                It is assumed we are doing binary classification because it
                doesn't make sense to do this in the multiclass setting.
                
                """
                ys_train = self.ys_train.flatten() # assume no multi-task
                class_counts = pd.Series(ys_train).value_counts()
                rare_class, common_class = class_counts.argmin(), class_counts.argmax()

                rare_idxs = np.argwhere(ys_train == rare_class).flatten()
                common_idxs = np.argwhere(ys_train == common_class).flatten()

                while True:
                    # do stratified sampling
                    rare_batch_idxs = np.random.choice(rare_idxs, size=int(batch_size*mb_ratio) + 1)
                    common_batch_idxs = np.random.choice(common_idxs, size=int(batch_size*(1-mb_ratio)))
                    batch_idxs = np.concatenate([rare_batch_idxs, common_batch_idxs])

                    train_data = {}
                    for label in self.label_names:
                        train_data[label] = self.train_data[label][batch_idxs]

                    train_data['input'] = self.train_data['input'][batch_idxs]
github akrherz / iem / htdocs / plotting / auto / scripts / p4.py View on Github external
ets = datetime.date(year, 12, 31)
        today = datetime.date.today()
        if ets > today:
            ets = today
        days = []
        coverage = []
        while now <= ets:
            idx = iemre.daily_offset(now)
            sevenday = np.sum(precip[(idx - period) : idx, :, :], 0)
            pday = np.where(hasdata > 0, sevenday[:, :], -1)
            tots = np.sum(np.where(pday >= (threshold * 25.4), 1, 0))
            days.append(now)
            coverage.append(tots / float(datapts) * 100.0)

            now += datetime.timedelta(days=1)
    df = pd.DataFrame(dict(day=pd.Series(days), coverage=pd.Series(coverage)))

    (fig, ax) = plt.subplots(1, 1)
    ax.bar(days, coverage, fc="g", ec="g")
    ax.set_title(
        (
            "%s IEM Estimated Areal Coverage Percent of %s\n"
            " receiving %.2f inches of rain over trailing %s day period"
        )
        % (year, reference.state_names[state], threshold, period)
    )
    ax.set_ylabel("Areal Coverage [%]")
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%b\n%-d"))
    ax.set_yticks(range(0, 101, 25))
    ax.grid(True)
    return fig, df
github ver228 / tierpsy-tracker / work_in_progress / Curro_Analysis / group_data.py View on Github external
def getPValues(control_avg, curr_avg):

    p_values = pd.Series(index=curr_avg.columns)
    for feat in curr_avg.columns:
        x = control_avg[feat].values.astype(np.float)
        y = curr_avg[feat].values.astype(np.float)
        x = x[~np.isnan(x)]
        y = y[~np.isnan(y)]
        #if np.all(np.isnan(x)) or np.all(np.isnan(y)):
        #    continue
        _, p_value = ttest_ind(x,y, equal_var=False)
        #_, p_value = ranksums(x,y)
        
        #p_value positive if N2 is larger than the strain
        p_values[feat] = p_value

    p_values = p_values.dropna()
    #correct for false discovery rate using 2-stage Benjamini-Krieger-Yekutieli
    reject, pvals_corrected, alphacSidak, alphacBonf = \
github MITHaystack / scikit-discovery / skdiscovery / utilities / patterns / trend_tools.py View on Github external
def interpNaN(data):
    '''
    Interpolate data using a linear interpolation

    @param data: 1d numpy or pandas Series with possible NaN's
    @return data after interpolation
    '''

    if isinstance(data, np.ndarray):
        data = pd.Series(data)
        return data.interpolate().as_matrix()

    elif isinstance(data, pd.Series):
        return data.interpolate()
github dennybritz / reinforcement-learning / lib / plotting.py View on Github external
def plot_episode_stats(stats, smoothing_window=10, noshow=False):
    # Plot the episode length over time
    fig1 = plt.figure(figsize=(10,5))
    plt.plot(stats.episode_lengths)
    plt.xlabel("Episode")
    plt.ylabel("Episode Length")
    plt.title("Episode Length over Time")
    if noshow:
        plt.close(fig1)
    else:
        plt.show(fig1)

    # Plot the episode reward over time
    fig2 = plt.figure(figsize=(10,5))
    rewards_smoothed = pd.Series(stats.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean()
    plt.plot(rewards_smoothed)
    plt.xlabel("Episode")
    plt.ylabel("Episode Reward (Smoothed)")
    plt.title("Episode Reward over Time (Smoothed over window size {})".format(smoothing_window))
    if noshow:
        plt.close(fig2)
    else:
        plt.show(fig2)

    # Plot time steps and episode number
    fig3 = plt.figure(figsize=(10,5))
    plt.plot(np.cumsum(stats.episode_lengths), np.arange(len(stats.episode_lengths)))
    plt.xlabel("Time Steps")
    plt.ylabel("Episode")
    plt.title("Episode per time step")
    if noshow:
github autonomio / autonomio / old / multi_dim.py View on Github external
def _prepare_data(self):
        
        if self.data.shape[1] != 4:
            
            print "shape of the data is wrong > you need 4 variable columns"
        
        else:
            self.data['single_input_vector'] = self.data.apply(tuple, axis=1).apply(list)
            self.data['single_input_vector'] = self.data.single_input_vector.apply(lambda x: [list(x)])
            self.data['cumulative_input_vectors'] = self.data.single_input_vector.cumsum()
            self.data['output_vector'] = self.data[[1]].apply(tuple, axis=1).apply(list)

            self.max_sequence_length = self.data['cumulative_input_vectors'].apply(len).max()
            self.padded_sequences = pad_sequences(self.data['cumulative_input_vectors'].tolist(), self.max_sequence_length).tolist()
            self.data['padded_input_vectors'] = pd.Series(self.padded_sequences).apply(np.asarray)

        return 'NULL'