How to use the snorkel.classification.Operation function in snorkel

To help you get started, we’ve selected a few snorkel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github snorkel-team / snorkel / test / classification / training / test_trainer.py View on Github external
def create_task(task_name, module_suffixes=("", "")):
    module1_name = f"linear1{module_suffixes[0]}"
    module2_name = f"linear2{module_suffixes[1]}"

    module_pool = nn.ModuleDict(
        {
            module1_name: nn.Sequential(nn.Linear(2, 10), nn.ReLU()),
            module2_name: nn.Linear(10, 2),
        }
    )

    op1 = Operation(module_name=module1_name, inputs=[("_input_", "data")])
    op2 = Operation(module_name=module2_name, inputs=[op1.name])

    op_sequence = [op1, op2]

    task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence)

    return task
github snorkel-team / snorkel / test / classification / test_multitask_classifier.py View on Github external
def test_no_input_spec(self):
        # Confirm model doesn't break when a module does not specify specific inputs
        dataset = create_dataloader("task", shuffle=False).dataset
        task = Task(
            name="task",
            module_pool=nn.ModuleDict({"identity": nn.Identity()}),
            op_sequence=[Operation("identity", [])],
        )
        model = MultitaskClassifier(tasks=[task], dataparallel=False)
        outputs = model.forward(dataset.X_dict, ["task"])
        self.assertIn("_input_", outputs)
github snorkel-team / snorkel / test / classification / test_classifier_convergence.py View on Github external
def create_task(task_name: str, module_suffixes: List[str]) -> Task:
    module1_name = f"linear1{module_suffixes[0]}"
    module2_name = f"linear2{module_suffixes[1]}"

    module_pool = nn.ModuleDict(
        {
            module1_name: nn.Sequential(nn.Linear(2, 20), nn.ReLU()),
            module2_name: nn.Linear(20, 2),
        }
    )

    op1 = Operation(module_name=module1_name, inputs=[("_input_", "coordinates")])
    op2 = Operation(module_name=module2_name, inputs=[op1.name])

    op_sequence = [op1, op2]

    task = Task(
        name=task_name,
        module_pool=module_pool,
        op_sequence=op_sequence,
        scorer=Scorer(metrics=["accuracy"]),
    )

    return task
github snorkel-team / snorkel-tutorials / visual_relation / model.py View on Github external
# define an operation to extract word embeddings for subject and object categories
    word_emb_op = Operation(
        name="word_emb_op",
        module_name="word_emb",
        inputs=[("_input_", "sub_category"), ("_input_", "obj_category")],
    )

    # define an operation to concatenate image features and word embeddings
    concat_op = Operation(
        name="concat_op",
        module_name="feat_concat",
        inputs=["obj_feat_op", "sub_feat_op", "union_feat_op", "word_emb_op"],
    )

    # define an operation to make a prediction over all concatenated features
    prediction_op = Operation(
        name="head_op", module_name="prediction_head", inputs=["concat_op"]
    )

    return [
        sub_feat_op,
        obj_feat_op,
        union_feat_op,
        word_emb_op,
        concat_op,
        prediction_op,
    ]
github snorkel-team / snorkel-tutorials / mtl / multitask_tutorial.py View on Github external
#
# For example, below we define the module pool and task flow for the circle task:

# %%
import torch.nn as nn
from snorkel.classification import Operation

# Define a two-layer MLP module and a one-layer prediction "head" module
base_mlp = nn.Sequential(nn.Linear(2, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
head_module = nn.Linear(4, 2)

# The module pool contains all the modules this task uses
module_pool = nn.ModuleDict({"base_mlp": base_mlp, "circle_head_module": head_module})

# "From the input dictionary, pull out 'circle_data' and send it through input_module"
op1 = Operation(
    name="base_mlp", module_name="base_mlp", inputs=[("_input_", "circle_data")]
)

# "From the output of op1 (the input op), pull out the 0th indexed output
# (i.e., the only output) and send it through the head_module"
op2 = Operation(
    name="circle_head", module_name="circle_head_module", inputs=[("base_mlp", 0)]
)

task_flow = [op1, op2]

# %% [markdown]
# The output of the final module in that sequence will then go into a `loss_func()` to calculate the loss (e.g., cross-entropy) during training or an `output_func()` (e.g., softmax) to convert the logits into a prediction. Each of these functions accepts as the first argument the final `module_name` to indicate inputs—we indicate this with the `partial(fn, module_name)` syntax.
#
# Each `Task` also specifies which metrics it supports, which are bundled together in a `Scorer` object. For this tutorial, we'll just look at accuracy.
github snorkel-team / snorkel-tutorials / visual_relation / model.py View on Github external
def get_op_sequence():
    # define feature extractors for each of the (union, subject, and object) image crops
    union_feat_op = Operation(
        name="union_feat_op",
        module_name="feat_extractor",
        inputs=[("_input_", "union_crop")],
    )

    sub_feat_op = Operation(
        name="sub_feat_op",
        module_name="feat_extractor",
        inputs=[("_input_", "sub_crop")],
    )

    obj_feat_op = Operation(
        name="obj_feat_op",
        module_name="feat_extractor",
        inputs=[("_input_", "obj_crop")],
    )

    # define an operation to extract word embeddings for subject and object categories
    word_emb_op = Operation(
        name="word_emb_op",
        module_name="word_emb",
        inputs=[("_input_", "sub_category"), ("_input_", "obj_category")],
    )

    # define an operation to concatenate image features and word embeddings
    concat_op = Operation(
        name="concat_op",
        module_name="feat_concat",
github snorkel-team / snorkel-tutorials / mtl / multitask_tutorial.py View on Github external
# Note that `Task` objects are not dependent on a particular dataset; multiple datasets can be passed through the same modules for pre-training or co-training.

# %% [markdown]
# We'll now define the square task, but more succinctly—for example, using the fact that the default name for an `Operation` is its `module_name` (since most tasks only use their modules once per forward pass).
#
# We'll also define the square task to share the first module in its task flow (`base_mlp`) with the circle task to demonstrate how to share modules. (Note that this is purely for illustrative purposes; for this toy task, it is very possible that this is not the optimal arrangement of modules).
#
# Finally, the most common task definitions we see in practice are classification tasks with cross-entropy loss and softmax on the output of the last module, and accuracy is most often the primary metric of interest, these are all the default values, so we can drop them here for brevity.

# %%
square_task = Task(
    name="square_task",
    module_pool=nn.ModuleDict({"base_mlp": base_mlp, "square_head": nn.Linear(4, 2)}),
    task_flow=[
        Operation("base_mlp", [("_input_", "square_data")]),
        Operation("square_head", [("base_mlp", 0)]),
    ],
    scorer=Scorer(metrics=["accuracy"]),
)

# %% [markdown]
# ## Model

# %% [markdown]
# With our tasks defined, constructing a model is simple: we simply pass the list of tasks in and the model constructs itself using information from the task flows.
#
# Note that the model uses the names of modules (not the modules themselves) to determine whether two modules specified by separate tasks are the same module (and should share weights) or different modules (with separate weights).
# So because both the `square_task` and `circle_task` include "base_mlp" in their module pools, this module will be shared between the two tasks.

# %%
from snorkel.classification import SnorkelClassifier
github snorkel-team / snorkel-tutorials / mtl / multitask_tutorial.py View on Github external
# Define a two-layer MLP module and a one-layer prediction "head" module
base_mlp = nn.Sequential(nn.Linear(2, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())
head_module = nn.Linear(4, 2)

# The module pool contains all the modules this task uses
module_pool = nn.ModuleDict({"base_mlp": base_mlp, "circle_head_module": head_module})

# "From the input dictionary, pull out 'circle_data' and send it through input_module"
op1 = Operation(
    name="base_mlp", module_name="base_mlp", inputs=[("_input_", "circle_data")]
)

# "From the output of op1 (the input op), pull out the 0th indexed output
# (i.e., the only output) and send it through the head_module"
op2 = Operation(
    name="circle_head", module_name="circle_head_module", inputs=[("base_mlp", 0)]
)

task_flow = [op1, op2]

# %% [markdown]
# The output of the final module in that sequence will then go into a `loss_func()` to calculate the loss (e.g., cross-entropy) during training or an `output_func()` (e.g., softmax) to convert the logits into a prediction. Each of these functions accepts as the first argument the final `module_name` to indicate inputs—we indicate this with the `partial(fn, module_name)` syntax.
#
# Each `Task` also specifies which metrics it supports, which are bundled together in a `Scorer` object. For this tutorial, we'll just look at accuracy.

# %% [markdown]
# Putting this all together, we define the circle task:

# %%
from functools import partial
github snorkel-team / snorkel-tutorials / multitask / multitask_tutorial.py View on Github external
# %% [markdown]
# ### Again, but faster

# %% [markdown]
# We'll now define the square task, but more succinctly—for example, using the fact that the default name for an `Operation` is its `module_name` (since most tasks only use their modules once per forward pass).
#
# We'll also define the square task to share the first module in its task flow (`base_mlp`) with the circle task to demonstrate how to share modules. (Note that this is purely for illustrative purposes; for this toy task, it is quite possible that this is not the optimal arrangement of modules).
#
# Finally, the most common task definitions we see in practice are classification tasks with cross-entropy loss and softmax on the output of the last module, and accuracy is most often the primary metric of interest, these are all the default values, so we can drop them here for brevity.

# %%
square_task = Task(
    name="square_task",
    module_pool=nn.ModuleDict({"base_mlp": base_mlp, "square_head": nn.Linear(4, 2)}),
    op_sequence=[
        Operation("base_mlp", [("_input_", "square_data")]),
        Operation("square_head", ["base_mlp"]),
    ],
)

# %% [markdown]
# ## Model

# %% [markdown]
# With our tasks defined, constructing a model is simple: we simply pass the list of tasks in and the model constructs itself using information from the task flows.
#
# Note that the model uses the names of modules (not the modules themselves) to determine whether two modules specified by separate tasks are the same module (and should share weights) or different modules (with separate weights).
# So because both the `square_task` and `circle_task` include "base_mlp" in their module pools, this module will be shared between the two tasks.

# %%
from snorkel.classification import MultitaskClassifier
github snorkel-team / snorkel-tutorials / visual_relation / model.py View on Github external
obj_feat_op = Operation(
        name="obj_feat_op",
        module_name="feat_extractor",
        inputs=[("_input_", "obj_crop")],
    )

    # define an operation to extract word embeddings for subject and object categories
    word_emb_op = Operation(
        name="word_emb_op",
        module_name="word_emb",
        inputs=[("_input_", "sub_category"), ("_input_", "obj_category")],
    )

    # define an operation to concatenate image features and word embeddings
    concat_op = Operation(
        name="concat_op",
        module_name="feat_concat",
        inputs=["obj_feat_op", "sub_feat_op", "union_feat_op", "word_emb_op"],
    )

    # define an operation to make a prediction over all concatenated features
    prediction_op = Operation(
        name="head_op", module_name="prediction_head", inputs=["concat_op"]
    )

    return [
        sub_feat_op,
        obj_feat_op,
        union_feat_op,
        word_emb_op,
        concat_op,