How to use the pandas.DataFrame function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github WheatonCS / Lexos / test / unit_test / test_similarity.py View on Github external
10.0, 5.0, 5.0, 5.0, 5.0, 0.0, 5.0, 5.0, 5.0]],
                            index=[0, 1, 2])
    test_front_end_option = SimilarityFrontEndOption(
        comp_file_id=2, sort_ascending=True, sort_column=0)
    test_id_table = {0: "F1.txt", 1: "F2.txt", 2: "F3.txt"}
    similarity_model = SimilarityModel(
        test_options=SimilarityTestOption(
            doc_term_matrix=test_dtm,
            front_end_option=test_front_end_option,
            document_label_map=test_id_table
        )
    )

    pd.testing.assert_frame_equal(
        similarity_model._get_similarity_query(),
        pd.DataFrame(index=["Documents", "Cosine Similarity"],
                     data=[["F1.txt", "F2.txt"], [1., 1.]]).transpose()
    )
github anirudhramanan / plutus-backtest / stockselection / portfolio.py View on Github external
"IOC", "HINDPETRO", "HEROMOTOCO",
            "M&M", "ULTRACEMCO", "BAJAJFINSV",
            "TATASTEEL", "HDFC", "BHARTIARTL",
            "EICHERMOT", "JSWSTEEL", "ASIANPAINT",
            "BAJAJ-AUTO", "AXISBANK", "YESBANK",
            "IBULHSGFIN", "ITC", "LT",
            "UPL", "KOTAKBANK", "HDFCBANK",
            "HINDUNILVR", "ONGC", "TITAN",
            "RELIANCE", "GAIL", "POWERGRID",
            "NTPC", "COALINDIA", "ICICIBANK",
            "SUNPHARMA", "INFRATEL", "GRASIM",
            "SBIN", "HCLTECH", "INFY", "TCS",
            "BAJFINANCE", "ZEEL", "CIPLA", "DRREDDY",
            "WIPRO", "TECHM"]

        self.all_stock_data = pd.DataFrame(columns=['Stock', 'Volume', 'High', 'Low'])

        i = 0
        for stock in self.nifty_50_stocks:
            stock_data = fetch_stock_data(stock, 1, '1d')
            self.all_stock_data.loc[i] = [stock, stock_data['Volume'].mean(), stock_data['High'].mean(),
                                          stock_data['Low'].mean()]
            i = i + 1

        print('Fetched data for all nifty 50 stocks')
github jmcarpenter2 / swifter / swifter / test_dataframe.py View on Github external
def test_nonvectorized_math_apply_on_large_dataframe_broadcast(self):
        LOG.info("test_nonvectorized_math_apply_on_large_dataframe_broadcast")
        df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)})

        tqdm.pandas(desc="Pandas Nonvec math apply + broadcast ~ DF")
        start_pd = time.time()
        pd_val = df.progress_apply(math_agg_foo, axis=1, result_type="broadcast")
        end_pd = time.time()
        pd_time = end_pd - start_pd

        start_swifter = time.time()
        swifter_val = df.swifter.progress_bar(desc="Nonvec math apply + broadcast ~ DF").apply(
            math_agg_foo, axis=1, result_type="broadcast"
        )
        end_swifter = time.time()
        swifter_time = end_swifter - start_swifter

        self.assertEqual(pd_val, swifter_val)  # equality test
        if self.ncores > 1:  # speed test
github geomdata / gda-public / multidim / __init__.py View on Github external
else:
            assert type(idx0) == np.ndarray \
                   and idx0.shape == (n,) \
                   and idx0.dtype == 'int64', \
                   "Wrong type or size for indexing data on pointcloud."

        points = pd.DataFrame({
            'height': hgt0,
            'mass': mas0,
            'pos': pos0,
            'rep': idx0,
        },
                columns=['height', 'mass', 'pos', 'rep'],
                index=idx0)

        self.coords = pd.DataFrame(data_array, index=idx0)
        self.covertree = None

        edges = stratum_maker(1)
        super(self.__class__, self).__init__(stratum={0: points, 1: edges})

        self.labels = np.zeros(shape=(self.coords.shape[0],), dtype=np.int64)
        self.source = np.zeros(shape=(self.coords.shape[0],), dtype=np.int64)
        self.label_info = pd.DataFrame(index=['black'])
        self.label_info['clouds'] = np.array([1], dtype=np.int64)
        self.label_info['points'] = np.array([n], dtype=np.int64)
        self.label_info['tot_mass'] = np.array([self.stratum[0]['mass'].sum()])
        self.label_info['int_index'] = np.array([0], dtype=np.int64)

        self.max_length = max_length
        if self.max_length > 0.0 or self.max_length == -1.0:
            # use covertree to make all appropriate edges.
github adminho / trading-stock-thailand / deep_q / env_trade.py View on Github external
def _getAllHistory(self):
		total_row = len(self.all_date)
		df = pd.DataFrame(index=range(0, total_row))
		df['date'] = pd.DataFrame(self.all_date)
		df['price'] = pd.DataFrame(self.all_price)
		df['signal'] = pd.DataFrame(self.all_signal)
		df['cumulative_return'] = pd.DataFrame(self.all_return)
		
		price = df['price']
		signal = df['signal']
		
		df_sell = pd.DataFrame(index=range(0, total_row), columns=['sell'])
		df_buy = pd.DataFrame(index=range(0, total_row) , columns=['buy'])
		
		if signal.loc[0] == SELL:
			pass # if signal is SELL, first row don't take action 
		elif signal.loc[0] == BUY:
			df_buy.loc[0] = price.loc[0]
		
		# first row
		old_signal = signal.loc[0]
		for index in range(1, total_row):
			new_signal = signal.loc[index]			
			if new_signal != old_signal:
				if new_signal == BUY:
					df_buy.loc[index] = price.loc[index]
				elif new_signal == SELL:
					df_sell.loc[index] = price.loc[index]
github SanPen / GridCal / Stable / GridCal / grid / PowerFlow.py View on Github external
# new circuit hosting the island grid
            circuit = MultiCircuitPowerFlow(baseMVA, bus_island, gen_island, branch_island, graph, self.solver_type, is_an_island=True)

            # add the circuit to the islands
            island_circuits.append(circuit)
            original_indices.append(original_indices_entry)
            fixed_power_indices.append(fixed_power)

            island_idx += 1

        recalculate_islands = False

        # turn rosetta into a pandas dataframe, it will allow easy querying later)
        cols = ['Original_idx', 'at_island_idx', 'island_idx', 'Fixed']
        bus_rosetta = pd.DataFrame(data=bus_rosetta_vals, columns=cols, dtype=int)
        gen_rosetta = pd.DataFrame(data=gen_rosetta_vals, columns=cols, dtype=int)

        return island_circuits, original_indices, recalculate_islands, fixed_power_indices, bus_rosetta, gen_rosetta
github openpathsampling / openpathsampling / openpathsampling / numerics / histogram.py View on Github external
"histogram" : hist,
            "normalized" : hist.normalized,
            "reverse_cumulative" : hist.reverse_cumulative,
            "cumulative" : hist.cumulative,
            "rebinned" : hist.rebinned
        }[fcn](**fcn_args).values()

        bin_edge = {
            "histogram" : "m",
            "normalized" : "m",
            "reverse_cumulative" : "l",
            "cumulative" : "r"
        }[fcn]
        xvals = hist.xvals(bin_edge)

        frames.append(pd.DataFrame({hist.name : hist_data}, index=xvals))
    all_frames = pd.concat(frames, axis=1)
    return all_frames.fillna(0.0)
github scofield7419 / sequence-labeling-BiLSTM-CRF / engines / BiLSTM_CRFs.py View on Github external
def save_test_out(tokens, labels):
            # transform format
            newtokens, newlabels = [], []
            for to, la in zip(tokens, labels):
                newtokens.extend(to)
                newtokens.append("")
                newlabels.extend(la)
                newlabels.append("")
            # save
            save_csv_(pd.DataFrame({"token": newtokens, "label": newlabels}), self.output_test_file, ["token", "label"],
                      delimiter=self.configs.delimiter)
github word-fish / wordfish-python / wordfish / nlp.py View on Github external
def get_term_counts_list(terms,text):
    # Convert words into stems
    stems = do_stem(terms)

    # data frame hold counts
    counts = pandas.DataFrame(0,columns=["count"],index=stems)

    for sentence in text:
        blob =  TextBlob(sentence)
        words = do_stem(blob.words)
        words = [w for w in words if w in stems]
        counts.loc[words] = counts.loc[words] + 1
    return counts
github microscopium / microscopium / microscopium / serve.py View on Github external
def empty_table(df):
    """Display an empty table with column headings."""
    table_source = ColumnDataSource(pd.DataFrame(columns=df.columns))
    columns = [TableColumn(field=col, title=col) for col in df.columns]
    table = DataTable(source=table_source, columns=columns, width=800)
    return table