Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_no_cache():
# Check that assigning a key properly disposes of potentially cached
# types / names of the Frame
dt0 = dt.Frame([[1.1] * 4, list("ABCD"), [3, 5, 2, 1]],
names=["A", "B", "C"])
assert dt0.names == ("A", "B", "C")
assert dt0.ltypes == (dt.ltype.real, dt.ltype.str, dt.ltype.int)
assert dt0.stypes == (dt.float64, dt.str32, dt.int32)
assert dt0.colindex("B") == 1
frame_integrity_check(dt0)
dt0.key = "C"
assert dt0.names == ("C", "A", "B")
assert dt0.ltypes == (dt.ltype.int, dt.ltype.real, dt.ltype.str)
assert dt0.stypes == (dt.int32, dt.float64, dt.str32)
assert dt0.colindex("B") == 2
frame_integrity_check(dt0)
def test_numbers_with_quotes2():
d0 = dt.fread('A,B\n'
'83 ,"23948"\n'
'55 ,"20487203497"')
frame_integrity_check(d0)
assert d0.shape == (2, 2)
assert d0.ltypes == (dt.ltype.int, dt.ltype.int)
assert d0.names == ("A", "B")
assert d0.to_list() == [[83, 55], [23948, 20487203497]]
@pytest.mark.parametrize("st", dt.ltype.int.stypes)
def test_replace_into_int(st):
df = dt.Frame(A=[0, 5, 9, 0, 3, 1], stype=st)
df.replace([0, 1], None)
assert df.to_list() == [[None, 5, 9, None, 3, None]]
def test_stype_ltypes(c_stypes):
from datatable import stype, ltype
for st in stype:
assert st.ltype is ltype(c_stypes[st.code]["ltype"].lower())
def test_count_2d_dt_integer():
df_in = dt.Frame([[9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1],
[0, 1, 0, 5, 3, 8, 1, 0, 2, 5, None, 8, 1]])
df_reduce = df_in[:, [count(f.C0), count(f.C1), count()]]
frame_integrity_check(df_reduce)
assert df_reduce.shape == (1, 3)
assert df_reduce.ltypes == (ltype.int, ltype.int, ltype.int)
assert df_reduce.to_list() == [[10], [12], [13]]
def test_1line_not_header():
d0 = dt.fread(text="C1,C2,3")
frame_integrity_check(d0)
assert d0.shape == (1, 3)
assert d0.ltypes == (ltype.str, ltype.str, ltype.int)
assert d0.to_list() == [["C1"], ["C2"], [3]]
def test_create_from_list():
d0 = dt.Frame([1, 2, 3])
frame_integrity_check(d0)
assert d0.shape == (3, 1)
assert d0.names == ("C0", )
assert d0.ltypes == (ltype.int, )
def test_aggregate_view_1d_continuous_float():
d_in = dt.Frame([0.0, 1.1, None, 2.2, None, 3.1, 3.2, 4.1, 4.0, None, 5.1])
d_in_copy = dt.Frame(d_in)
d_in_view = d_in[5:11, :]
[d_exemplars, d_members] = aggregate(d_in_view, min_rows=0, n_bins=5)
frame_integrity_check(d_members)
assert d_members.shape == (6, 1)
assert d_members.ltypes == (ltype.int,)
assert d_members.to_list() == [[1, 1, 2, 2, 0, 3]]
frame_integrity_check(d_exemplars)
assert d_exemplars.shape == (4, 2)
assert d_exemplars.ltypes == (ltype.real, ltype.int)
assert d_exemplars.to_list() == [[None, 3.1, 4.1, 5.1], [1, 2, 2, 1]]
assert_equals(d_in, d_in_copy)
def test_create_from_string():
d0 = dt.Frame("""
A,B,C,D
1,2,3,boo
0,5.5,,bar
,NaN,1000,""
""")
frame_integrity_check(d0)
assert d0.names == ("A", "B", "C", "D")
assert d0.ltypes == (dt.ltype.bool, dt.ltype.real, dt.ltype.int,
dt.ltype.str)
assert d0.to_list() == [[True, False, None], [2.0, 5.5, None],
[3, None, 1000], ["boo", "bar", ""]]
def get_datetime_from_transactionDT(X: dt.Frame):
if "TransactionDT" not in X.names:
return None
if X[:, "TransactionDT"].ltypes[0] != dt.ltype.int:
return None
# This is the original integer feature
startdate = datetime.datetime.strptime('2017-12-01', "%Y-%m-%d")
return X[:, 'TransactionDT'].to_pandas()['TransactionDT'].apply(
lambda x: (startdate + datetime.timedelta(seconds=x))
)