Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
relationship=lambda stat, pvalue, alpha=0.01: (
stat > 0 and pvalue / 2 < alpha
),
relationship_kwargs={"alpha": 0.5}
)
]),
"sex": Column(String),
})
# Check the 3 happy paths are successful:
schema_pass_ttest_on_alpha_val_1.validate(df)
schema_pass_ttest_on_alpha_val_2.validate(df)
schema_pass_ttest_on_alpha_val_3.validate(df)
schema_pass_ttest_on_custom_relationship.validate(df)
schema_fail_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.05),
]),
"sex": Column(String)
})
schema_fail_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
schema_fail_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.05),
]),
"sex": Column(String)
})
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_1.validate(df)
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_2.validate(df)
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_3.validate(df)
# Initialise the different ways of calling a test:
schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.5),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
relationship_kwargs={"alpha": 0.5}
),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
# Initialise the different ways of calling a test:
schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.5),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
relationship_kwargs={"alpha": 0.5}
),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
schema_fail_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.05),
]),
"sex": Column(String)
})
schema_fail_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
relationship_kwargs={"alpha": 0.05}),
]),
"sex": Column(String)
})
schema_fail_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.05),
"height_in_feet": [6.5, 7, 6.1, 5.1, 4],
"sex": ["M", "M", "F", "F", "F"]
})
)
# Initialise the different ways of calling a test:
schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.5),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
relationship_kwargs={"alpha": 0.5}
),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
relationship="greater_than",
relationship_kwargs={"alpha": 0.05}),
]),
"sex": Column(String)
})
schema_fail_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.05),
]),
"sex": Column(String)
})
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_1.validate(df)
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_2.validate(df)
with pytest.raises(errors.SchemaError):
schema_fail_ttest_on_alpha_val_3.validate(df)
def test_hypothesis():
# Example df for tests:
df = (
pd.DataFrame({
"height_in_feet": [6.5, 7, 6.1, 5.1, 4],
"sex": ["M", "M", "F", "F", "F"]
})
)
# Initialise the different ways of calling a test:
schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.5),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
# Initialise the different ways of calling a test:
schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.5),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
relationship_kwargs={"alpha": 0.5}
),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_3 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
def test_hypothesis():
# Example df for tests:
df = (
pd.DataFrame({
"height_in_feet": [6.5, 7, 6.1, 5.1, 4],
"sex": ["M", "M", "F", "F", "F"]
})
)
# Initialise the different ways of calling a test:
schema_pass_ttest_on_alpha_val_1 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis.two_sample_ttest(
sample1="M",
sample2="F",
groupby="sex",
relationship="greater_than",
alpha=0.5),
]),
"sex": Column(String)
})
schema_pass_ttest_on_alpha_val_2 = DataFrameSchema({
"height_in_feet": Column(Float, [
Hypothesis(test=stats.ttest_ind,
samples=["M", "F"],
groupby="sex",
relationship="greater_than",
relationship_kwargs={"alpha": 0.5}