How to use pandas - 10 common examples

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github AugurProject / augur / tests / consensus / runtests.py View on Github external
try:
    from colorama import Fore, Style, init
except ImportError:
    pass
from ethereum import tester as t
from pyconsensus import Oracle

ROOT = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                    os.pardir, os.pardir, "consensus")

np.set_printoptions(linewidth=225,
                    suppress=True,
                    formatter={"float": "{: 0.6f}".format})

pd.set_option("display.max_rows", 25)
pd.set_option("display.width", 1000)
pd.set_option('display.float_format', lambda x: '%.8f' % x)

# max_iterations: number of blocks required to complete PCA
verbose = False
max_iterations = 5
tolerance = 0.05
variance_threshold = 0.85
max_components = 5
init()

YES = 2.0
NO = 1.0
BAD = 1.5
NA = 0.0

def BR(string): # bright red
github WheatonCS / Lexos / test / unit_test / test_similarity.py View on Github external
10.0, 5.0, 5.0, 5.0, 5.0, 0.0, 5.0, 5.0, 5.0]],
                            index=[0, 1, 2])
    test_front_end_option = SimilarityFrontEndOption(
        comp_file_id=2, sort_ascending=True, sort_column=0)
    test_id_table = {0: "F1.txt", 1: "F2.txt", 2: "F3.txt"}
    similarity_model = SimilarityModel(
        test_options=SimilarityTestOption(
            doc_term_matrix=test_dtm,
            front_end_option=test_front_end_option,
            document_label_map=test_id_table
        )
    )

    pd.testing.assert_frame_equal(
        similarity_model._get_similarity_query(),
        pd.DataFrame(index=["Documents", "Cosine Similarity"],
                     data=[["F1.txt", "F2.txt"], [1., 1.]]).transpose()
    )
github anirudhramanan / plutus-backtest / stockselection / portfolio.py View on Github external
"IOC", "HINDPETRO", "HEROMOTOCO",
            "M&M", "ULTRACEMCO", "BAJAJFINSV",
            "TATASTEEL", "HDFC", "BHARTIARTL",
            "EICHERMOT", "JSWSTEEL", "ASIANPAINT",
            "BAJAJ-AUTO", "AXISBANK", "YESBANK",
            "IBULHSGFIN", "ITC", "LT",
            "UPL", "KOTAKBANK", "HDFCBANK",
            "HINDUNILVR", "ONGC", "TITAN",
            "RELIANCE", "GAIL", "POWERGRID",
            "NTPC", "COALINDIA", "ICICIBANK",
            "SUNPHARMA", "INFRATEL", "GRASIM",
            "SBIN", "HCLTECH", "INFY", "TCS",
            "BAJFINANCE", "ZEEL", "CIPLA", "DRREDDY",
            "WIPRO", "TECHM"]

        self.all_stock_data = pd.DataFrame(columns=['Stock', 'Volume', 'High', 'Low'])

        i = 0
        for stock in self.nifty_50_stocks:
            stock_data = fetch_stock_data(stock, 1, '1d')
            self.all_stock_data.loc[i] = [stock, stock_data['Volume'].mean(), stock_data['High'].mean(),
                                          stock_data['Low'].mean()]
            i = i + 1

        print('Fetched data for all nifty 50 stocks')
github jmcarpenter2 / swifter / swifter / test_dataframe.py View on Github external
def test_nonvectorized_math_apply_on_large_dataframe_broadcast(self):
        LOG.info("test_nonvectorized_math_apply_on_large_dataframe_broadcast")
        df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)})

        tqdm.pandas(desc="Pandas Nonvec math apply + broadcast ~ DF")
        start_pd = time.time()
        pd_val = df.progress_apply(math_agg_foo, axis=1, result_type="broadcast")
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = df.swifter.progress_bar(desc="Nonvec math apply + broadcast ~ DF").apply(
            math_agg_foo, axis=1, result_type="broadcast"
        )
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
github antoinecarme / pyaf / tests / HeartRateTimeSeries / HeartRateTimeSeries_series3.py View on Github external
import pandas as pd
import numpy as np
import pyaf.ForecastEngine as autof
import datetime

#get_ipython().magic('matplotlib inline')

trainfile = "https://raw.githubusercontent.com/antoinecarme/TimeSeriesData/master/HeartRateTimeSeries/hr.207"
df = pd.read_csv(trainfile, sep=r',', engine='python', skiprows=0);
df.columns = ['HeartRate']
df['Date'] = range(df.shape[0]);
print(df.head());

lDateVar = 'Date'
lSignalVar = 'HeartRate'

lEngine = autof.cForecastEngine()
lEngine

H = 10;

#lEngine.mOptions.enable_slow_mode();
lEngine.mOptions.mDebugPerformance = True;
lEngine.train(df , lDateVar , lSignalVar, H);
lEngine.getModelInfo();
github tidepool-org / data-analytics / projects / iCGM-test-matrix / snapshot_processor.py View on Github external
def get_snapshot(data,
                 file_name,
                 evaluation_point_loc):
    """Main function wrapper to assemble snapshot dataframes"""

    # Start by getting the 48-hour window ± 24hrs around the evaluation point
    evaluation_index = data.index[data.id == evaluation_point_loc]

    data["rounded_local_time"] = \
        pd.to_datetime(data["est.localTime"],
                       utc=True).dt.ceil(freq="5min")

    evaluation_time = \
        pd.to_datetime(data.loc[evaluation_index,
                                'rounded_local_time'].values[0],
                       utc=True)

    df_misc = get_time_to_calculate_at(evaluation_time)

    start_time = evaluation_time - datetime.timedelta(days=1)
    end_time = evaluation_time + datetime.timedelta(days=1)

    snapshot_df = data[(data['rounded_local_time'] >= start_time) &
                       (data['rounded_local_time'] <= end_time)]

    # Get pumpSettings list of active schedules
    active_schedule = get_active_schedule(data,
                                          snapshot_df,
                                          file_name,
                                          evaluation_point_loc,
github openeemeter / eemeter / tests / test_caltrack_usage_per_day.py View on Github external
def test_caltrack_sufficiency_criteria_fail_no_gap_and_not_enough_days():
    data_quality = pd.DataFrame(
        {
            "meter_value": [1, 1],
            "temperature_not_null": [1, 5],
            "temperature_null": [0, 5],
            "start": pd.date_range(start="2016-01-02", periods=2, freq="D", tz="UTC"),
        }
    ).set_index("start")
    requested_start = pd.Timestamp("2016-01-02").tz_localize("UTC")
    requested_end = pd.Timestamp("2016-01-04").tz_localize("UTC")
    data_sufficiency = caltrack_sufficiency_criteria(
        data_quality,
        requested_start,
        requested_end,
        num_days=3,
        min_fraction_daily_coverage=0.9,
        min_fraction_hourly_temperature_coverage_per_period=0.9,
    )
    assert data_sufficiency.status == "FAIL"
    assert data_sufficiency.criteria_name == ("caltrack_sufficiency_criteria")
    assert len(data_sufficiency.warnings) == 4

    warning0 = data_sufficiency.warnings[0]
    assert warning0.qualified_name == (
        "eemeter.caltrack_sufficiency_criteria.incorrect_number_of_total_days"
github man-group / arctic / tests / integration / chunkstore / test_chunkstore.py View on Github external
df.index.name = 'date'
    chunkstore_lib.write('data', df, audit={'user': 'test_user'})
    df = DataFrame(data={'data': np.random.randint(0, 100, size=10)},
                   index=pd.date_range('2016-01-01', '2016-01-10'))
    df.index.name = 'date'
    chunkstore_lib.write('data', df, audit={'user': 'other_user'})

    assert(len(chunkstore_lib.read_audit_log()) == 2)
    assert(len(chunkstore_lib.read_audit_log(symbol='data')) == 2)
    assert(len(chunkstore_lib.read_audit_log(symbol='none')) == 0)

    chunkstore_lib.append('data', df, audit={'user': 'test_user'})
    assert(chunkstore_lib.read_audit_log()[-1]['appended_rows'] == 10)

    df = DataFrame(data={'data': np.random.randint(0, 100, size=5)},
                   index=pd.date_range('2017-01-01', '2017-01-5'))
    df.index.name = 'date'
    chunkstore_lib.update('data', df, audit={'user': 'other_user'})
    assert(chunkstore_lib.read_audit_log()[-1]['new_chunks'] == 5)

    chunkstore_lib.rename('data', 'data_new', audit={'user': 'temp_user'})
    assert(chunkstore_lib.read_audit_log()[-1]['action'] == 'symbol rename')

    chunkstore_lib.delete('data_new', chunk_range=DateRange('2016-01-01', '2016-01-02'), audit={'user': 'test_user'})
    chunkstore_lib.delete('data_new', audit={'user': 'test_user'})
    assert(chunkstore_lib.read_audit_log()[-1]['action'] == 'symbol delete')
    assert(chunkstore_lib.read_audit_log()[-2]['action'] == 'range delete')
github pmorissette / bt / tests / test_core.py View on Github external
def test_strategybase_tree_allocate_long_short():
    c1 = SecurityBase('c1')
    c2 = SecurityBase('c2')
    s = StrategyBase('p', [c1, c2])

    c1 = s['c1']
    c2 = s['c2']

    dts = pd.date_range('2010-01-01', periods=3)
    data = pd.DataFrame(index=dts, columns=['c1', 'c2'], data=100)
    data['c1'][dts[1]] = 105
    data['c2'][dts[1]] = 95

    s.setup(data)

    i = 0
    s.update(dts[i], data.ix[dts[i]])

    s.adjust(1000)
    c1.allocate(500)

    assert c1.position == 5
    assert c1.value == 500
    assert c1.weight == 500.0 / 1000
    assert s.capital == 1000 - 500
github pmorissette / bt / tests / test_algos.py View on Github external
def test_select_has_data_preselected():
    algo = algos.SelectHasData(min_count=3, lookback=pd.DateOffset(days=3))

    s = bt.Strategy('s')

    dts = pd.date_range('2010-01-01', periods=3)
    data = pd.DataFrame(index=dts, columns=['c1', 'c2'], data=100.)
    data['c1'].ix[dts[0]] = np.nan
    data['c1'].ix[dts[1]] = np.nan

    s.setup(data)
    s.update(dts[2])
    s.temp['selected'] = ['c1']

    assert algo(s)
    selected = s.temp['selected']
    assert len(selected) == 0