How to use the datatable.count function in datatable

To help you get started, we’ve selected a few datatable examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / driverlessai-recipes / data / airlines_joined_data_flights_in_out_regression.py View on Github external
# add date
        date_col = 'Date'
        X[:, date_col] = dt.f['Year'] * 10000 + dt.f['Month'] * 100 + dt.f['DayofMonth']
        cols_to_keep = ['Date']

        # add number of flights in/out for each airport per given interval
        timeslice_mins = 60
        for name, new_col, col, group in [
            ("out", "CRSDepTime_mod", "CRSDepTime", "Origin"),
            ("in", "CRSArrTime_mod", "CRSArrTime", "Dest")
        ]:
            X[:, new_col] = X[:, dt.f[col] // timeslice_mins]
            group_cols = [date_col, group, new_col]
            new_name = 'flights_%s' % name
            flights = X[:, {new_name: dt.count()}, dt.by(*group_cols)]
            flights.key = group_cols
            cols_to_keep.append(new_name)
            X = X[:, :, dt.join(flights)]

        # Fill NaNs with 0s
        X[dt.isna(dt.f['DepDelay']), 'DepDelay'] = 0
        cols_to_keep.extend([
            'DepDelay',
            'Year',
            'Month',
            'DayofMonth',
            'DayOfWeek',
            'CRSDepTime',
            'UniqueCarrier',
            'FlightNum',
            'TailNum',
github h2oai / datatable / tests / test-groups.py View on Github external
def test_issue_2242(seed):
    n = 25000
    X = dt.Frame(AGE=[random.randint(1, 50) for i in range(n)],
                 PAY=[random.choice([True, False]) for i in range(n)])
    RES = X[:, dt.math.log((count() + 1)/(sum(f.PAY) + 0.5) - 1), by(f.AGE)]
    assert RES.shape == (50, 2)
    data = RES.to_list()
    assert data[0] == list(range(1, 51))
    assert all(isinstance(x, float) for x in data[1])
github h2oai / datatable / tests / test-groups.py View on Github external
def test_groups_large2_str(n, seed):
    random.seed(seed)
    while n == 0:
        n = int(random.expovariate(0.0005))
    src = ["%x" % random.getrandbits(6) for _ in range(n)]
    f0 = dt.Frame({"A": src})
    f1 = f0[:, count(), by("A")]
    frame_integrity_check(f1)
    assert f1.nrows == len(set(src))
github h2oai / datatable / tests / test_reduce.py View on Github external
def test_count_2d_array_integer():
    a_in = [[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1],
            [0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]]
    a_reduce = count(a_in)
    assert a_reduce == 2
github h2oai / datatable / tests / test-groups.py View on Github external
def test_group_empty_frame3():
    DT = dt.Frame(A=[], stype=dt.float32)
    D2 = DT[:, count(f.A), by(f.A)]
    frame_integrity_check(D2)
    assert D2.shape == (0, 2)
    assert D2.stypes == (DT.stype, dt.int64)
github h2oai / datatable / tests / test_reduce.py View on Github external
def test_count_dt_groupby_string():
    df_in = dt.Frame([None, "blue", "green", "indico", None, None, "orange",
                      "red", "violet", "yellow", "green", None, "blue"])
    df_reduce = df_in[:, [count(f.C0), count()], "C0"]
    frame_integrity_check(df_reduce)
    assert df_reduce.shape == (8, 3)
    assert df_reduce.ltypes == (ltype.str, ltype.int, ltype.int,)
    assert df_reduce.to_list() == [[None, "blue", "green", "indico", "orange",
                                    "red", "violet", "yellow"],
                                   [0, 2, 2, 1, 1, 1, 1, 1],
                                   [4, 2, 2, 1, 1, 1, 1, 1]]
github h2oai / datatable / tests / test_reduce.py View on Github external
def test_count_dt_integer_large(numpy):
    n = 12345678
    a_in = numpy.random.randint(2**20, size=n, dtype=numpy.int32)
    df_in = dt.Frame(a_in)
    df_reduce = df_in[:, count()]
    assert df_reduce.shape == (1, 1)
    assert df_reduce.ltypes == (ltype.int,)
    assert df_reduce.to_list() == [[n]]
github h2oai / datatable / tests / test_reduce.py View on Github external
def test_count_with_i():
    # See issue 1316
    DT = dt.Frame(A=range(100))
    assert DT[:5, count()][0, 0] == 5
    assert DT[-12:, count()][0, 0] == 12
    assert DT[::3, count()][0, 0] == 34
github h2oai / datatable / tests / test_reduce.py View on Github external
def test_count_2d_dt_integer():
    df_in = dt.Frame([[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1],
                      [0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]])
    df_reduce = df_in[:, [count(f.C0), count(f.C1), count()]]
    frame_integrity_check(df_reduce)
    assert df_reduce.shape == (1, 3)
    assert df_reduce.ltypes == (ltype.int, ltype.int, ltype.int)
    assert df_reduce.to_list() == [[10], [12], [13]]
github h2oai / driverlessai-recipes / recipes / ieee_fraud_2019.py View on Github external
)

                return X[:, dt.f["col_cnt"] / dt.f["daily_cnt"]]

            elif self.group_type == 'hour':

                X = dt.Frame(X[:, self.group_col])
                X[:, 'date'] = dt.Frame(ieee_datetime.dt.strftime('%Y%m%d_%H').values)

                # Compute daily counts
                hourly_cnt = X[:, {"hourly_cnt": dt.count()}, dt.by("date")]
                hourly_cnt.key = ["date"]
                X = X[:, :, dt.join(hourly_cnt)]

                # Compute card count
                col_cnt = X[:, {"col_cnt": dt.count()}, dt.by(*["date", self.group_col])]
                col_cnt.key = ["date", self.group_col]
                X = X[:, :, dt.join(col_cnt)]

                self._output_feature_names = ["IEEEGroupBys_{}_{}".format(self.group_col, self.group_type)]
                self._feature_desc = ["IEEEGroupBys_{}_{}".format(self.group_col, self.group_type)]

                print('=' * 50)
                print("MyIEEEGroupBysTransformers name  {} {}".format(
                    self._output_feature_names, self._feature_desc
                )
                )

                return X[:, dt.f["col_cnt"] / dt.f["hourly_cnt"]]


        else: