How to use the pandas.isnull function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wateraccounting / wa / Sheets / sheet3.py View on Github external
if not pd.isnull(lp_r02c01):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c01
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r02c02']''')[0]
    if not pd.isnull(lp_r02c02):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c02
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r02c03']''')[0]
    if not pd.isnull(lp_r02c03):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c03
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r02c04']''')[0]
    if not pd.isnull(lp_r02c04):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c04
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r02c05']''')[0]
    if not pd.isnull(lp_r02c05):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c05
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r02c06']''')[0]
    if not pd.isnull(lp_r02c06):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c06
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r02c07']''')[0]
    if not pd.isnull(lp_r02c07):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c07
github HealthRex / CDSS / medinfo / dataconversion / FeatureMatrixTransform.py View on Github external
            self._matrix[feature] = self._matrix[feature].apply(lambda x: distribution() if pd.isnull(x) else x)
github BayAreaMetro / fast-trips / scripts / compare_output.py View on Github external
'path_cost_1':'num paths missing from file2'}, inplace=True)
    df2_only = df_diff.loc[pd.isnull(df_diff.path_cost_1)].groupby(['iteration','passenger_id_num','trip_list_id_num']).agg({'union pathset probability':'max','path_cost_2':'count'})
    df2_only.rename(columns={'union pathset probability':'max prob missing from file1',
                             'path_cost_2':'num paths missing from file1'}, inplace=True)

    df_diff_summary = df_diff_counts.merge(df1_only, how='left', left_index=True, right_index=True)
    df_diff_summary = df_diff_summary.merge(df2_only, how='left', left_index=True, right_index=True)

    # note paths for which we didn't find ANY in one or the other run
    df_diff_summary['only in file1'] = 0
    df_diff_summary.loc[df_diff_summary['num paths missing from file2']==df_diff_summary['num total paths'],'only in file1'] = 1
    df_diff_summary['only in file2'] = 0
    df_diff_summary.loc[df_diff_summary['num paths missing from file1']==df_diff_summary['num total paths'],'only in file2'] = 1
    # NaN means zero
    df_diff_summary.loc[pd.isnull(df_diff_summary['num paths missing from file1']), 'num paths missing from file1'] = 0
    df_diff_summary.loc[pd.isnull(df_diff_summary['num paths missing from file2']), 'num paths missing from file2'] = 0

    # write detailed output
    detail_file = os.path.join(dir1, "ft_compare_pathset.csv")
    df_diff_summary.reset_index().to_csv(detail_file, index=False)
    FastTripsLogger.info("Wrote detailed pathset diff info to %s" % detail_file)

    # Report
    FastTripsLogger.info("                        Average pathset size: %.1f" % df_diff_summary['num total paths'].mean())
    FastTripsLogger.info("          Trips with paths ONLY in pathset 1: %d" % df_diff_summary['only in file1'].sum())
    FastTripsLogger.debug(" -- diffs --\n" + \
                          str(df_diff_summary.loc[df_diff_summary['only in file1']==1]) + "\n")

    FastTripsLogger.info("          Trips with paths ONLY in pathset 2: %d" % df_diff_summary['only in file2'].sum())
    FastTripsLogger.debug(" -- diffs --\n" + \
                          str(df_diff_summary.loc[df_diff_summary['only in file2']==1]) + "\n")
github mljar / mljar-supervised / supervised / preprocessing / preprocessing_missing.py View on Github external
def _fit_na_fill(self, X):
        for column in self._columns:
            if np.sum(pd.isnull(X[column]) == True) == 0:
                continue
            self._na_fill_params[column] = self._get_fill_value(X[column])
github wateraccounting / wa / Sheets / sheet3.py View on Github external
if not pd.isnull(lp_r04c07):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c07
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r04c08']''')[0]
    if not pd.isnull(lp_r04c08):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c08
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r04c09']''')[0]
    if not pd.isnull(lp_r04c09):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c09
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r04c10']''')[0]
    if not pd.isnull(lp_r04c10):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c10
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r04c11']''')[0]
    if not pd.isnull(lp_r04c11):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c11
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='lp_r04c12']''')[0]
    if not pd.isnull(lp_r04c12):
        xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c12
    else:
        xml_txt_box.getchildren()[0].text = '-'
    xml_txt_box = tree2.findall('''.//*[@id='wp_r01c01']''')[0]
    if not pd.isnull(wp_r01c01):
        xml_txt_box.getchildren()[0].text = '%.2f' % wp_r01c01
github fja05680 / pinkfish / examples / sma-percent-band / strategy.py View on Github external
start_flag = True
        end_flag = False

        for i, row in enumerate(self._ts.itertuples()):

            date = row.Index.to_pydatetime()
            high = row.high
            low = row.low
            close = row.close
            sma = row.sma
            upper_band = sma + sma * self._percent_band
            lower_band = sma - sma * self._percent_band
            end_flag = True if (i == len(self._ts) - 1) else False
            shares = 0

            if pd.isnull(sma) or date < self._start:
                continue
            elif start_flag:
                start_flag = False
                # set start and end
                self._start = date
                self._end = self._ts.index[-1]

            # buy
            if (self._tlog.num_open_trades() == 0
                and close > upper_band
                and not end_flag):

                # enter buy in trade log
                shares = self._tlog.enter_trade(date, close)

            # sell
github enigmampc / catalyst / catalyst / data / history_loader.py View on Github external
back_sid = back[0]
            dt = tc.previous_session_label(roll_dt)
            if self._frequency == 'minute':
                dt = tc.open_and_close_for_session(dt)[1]
                roll_dt = tc.open_and_close_for_session(roll_dt)[0]
            partitions.append((front_sid,
                               back_sid,
                               dt,
                               roll_dt))
        for partition in partitions:
            front_sid, back_sid, dt, roll_dt = partition
            last_front_dt = self._bar_reader.get_last_traded_dt(
                self._asset_finder.retrieve_asset(front_sid), dt)
            last_back_dt = self._bar_reader.get_last_traded_dt(
                self._asset_finder.retrieve_asset(back_sid), dt)
            if isnull(last_front_dt) or isnull(last_back_dt):
                continue
            front_close = self._bar_reader.get_value(
                front_sid, last_front_dt, 'close')
            back_close = self._bar_reader.get_value(
                back_sid, last_back_dt, 'close')
            adj_loc = dts.searchsorted(roll_dt)
            end_loc = adj_loc - 1
            adj = self._make_adjustment(cf.adjustment,
                                        front_close,
                                        back_close,
                                        end_loc)
            try:
                adjs[adj_loc].append(adj)
            except KeyError:
                adjs[adj_loc] = [adj]
        return adjs
github kearnz / autoimpute / autoimpute / imputations / dataframe / single_imputer.py View on Github external
if imputer.strategy in self.predictive_strategies:
                preds = self._preds[column]
                if preds == "all":
                    x_ = X.drop(column, axis=1)
                else:
                    x_ = X[preds]

                # isolate missingness
                if isinstance(x_, pd.Series):
                    x_ = x_.to_frame()
                    x_ = x_.loc[imp_ix]
                else:
                    x_ = x_.loc[imp_ix, :]

                # default univariate impute for missing covariates
                mis_cov = pd.isnull(x_).sum()
                mis_cov = mis_cov[mis_cov > 0]
                if any(mis_cov):
                    x_m = mis_cov.index
                    for col in x_m:
                        d = DefaultUnivarImputer()
                        d_imps = d.fit_impute(x_[col], None)
                        if mis_cov[col] == x_.shape[0]:
                            d_imps = 0
                        x_null = x_[col][x_[col].isnull()].index
                        x_.loc[x_null, col] = d_imps

                # handling encoding again for prediction of imputations
                x_ = _one_hot_encode(x_)

            # perform imputation given the specified imputer and value for x_
            X.loc[imp_ix, column] = imputer.impute(x_)
github HealthRex / CDSS / medinfo / dataconversion / FeatureMatrixTransform.py View on Github external
def filter_on_feature(self, feature, value):
        # remove rows where feature == value
        if pd.isnull(value): # nan is not comparable, so need different syntax
            rows_to_remove = self._matrix[pd.isnull(self._matrix[feature])].index
        else:
            try:
                rows_to_remove = self._matrix[self._matrix[feature] == value].index
            except TypeError:
                log.info('Cannot filter %s on %s; types are not comparable.' % (feature, str(value)))
                return

        self._matrix.drop(rows_to_remove, inplace = True)
        self._matrix.reset_index(drop=True, inplace = True)

        # return number of rows remaining
        return self._matrix.shape[0]
github rizac / stream2segment / stream2segment / io / db / pd_sql_utils.py View on Github external
:param autoincrement_pkey_col: an ORM column of some model mapped to an sql table. The
    column, as the name says, must be a primary key with auto-increment.
    **The column MUST be of sql type INTEGER, otherwise this method should not be used**
    dataframe[A] will have dtype int64, which is fine as we replace (or add) **all** the row values.
    Note that if we replaced only partially some values, then dataframe[A] might still hold the old
    dtype (e.g., float) which **would be bad** as some db (e.g., postgres) are strict and will issue
    an `sqlalchemy.exc.DataError` if inserting/updating a non-nan/non-int value
    (e.g., 6.0 instead of 6)
    :param dataframe: the dataframe with values to be inserted/updated/deleted from the table
    mapped by `autoincrement_pkey_col`
    '''
    max_pkey = (_get_max(session, autoincrement_pkey_col) if max is None else max) + 1
    pkeyname = autoincrement_pkey_col.key
    if not overwrite:
        if pkeyname in dataframe:
            mask = pd.isnull(dataframe[pkeyname])
            nacount = mask.sum()
            if nacount != len(dataframe):
                dataframe.iloc[mask: pkeyname] = np.arange(max_pkey, max_pkey+nacount, dtype=int)
                # cast values if we modified only SOME row values of dataframe[pkeyname]
                # This is why we might have had floats (because we had na) and now we still have
                # floats (postgres complains if we add 6.0 instead of 6!)
                return _cast_column(dataframe, autoincrement_pkey_col)

    # if we are here, either we want to set all values of dataframe[pkeyname],
    # or pkeyname is not a column of dataframe,
    # or all dataframe[pkeyname] are na
    # In ALL these cases pandas changes the dtype, so the cast is not needed
    new_pkeys = np.arange(max_pkey, max_pkey+len(dataframe), dtype=int)
    dataframe[pkeyname] = new_pkeys
    return dataframe