How to use the openml.flows function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
"""
        classes_without_random_state = \
            ['sklearn.model_selection._search.GridSearchCV',
             'sklearn.pipeline.Pipeline',
             'sklearn.linear_model.base.LinearRegression',
             ]

        def _remove_random_state(flow):
            if 'random_state' in flow.parameters:
                del flow.parameters['random_state']
            for component in flow.components.values():
                _remove_random_state(component)

        flow = self.extension.model_to_flow(clf)
        flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
        if not openml.flows.flow_exists(flow.name, flow.external_version):
            flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))

        task = openml.tasks.get_task(task_id)

        X, y = task.get_X_and_y()
        self.assertEqual(np.count_nonzero(np.isnan(X)), n_missing_vals)
        run = openml.runs.run_flow_on_task(
            flow=flow,
            task=task,
            seed=seed,
            avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
        )
        run_ = run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
github openml / openml-python / tests / test_flows / test_flow.py View on Github external
def test_tagging(self):
        flow_list = openml.flows.list_flows(size=1)
        flow_id = list(flow_list.keys())[0]
        flow = openml.flows.get_flow(flow_id)
        tag = "testing_tag_{}_{}".format(self.id(), time.time())
        flow_list = openml.flows.list_flows(tag=tag)
        self.assertEqual(len(flow_list), 0)
        flow.push_tag(tag)
        flow_list = openml.flows.list_flows(tag=tag)
        self.assertEqual(len(flow_list), 1)
        self.assertIn(flow_id, flow_list)
        flow.remove_tag(tag)
        flow_list = openml.flows.list_flows(tag=tag)
        self.assertEqual(len(flow_list), 0)
github openml / openml-python / openml / setups / functions.py View on Github external
def initialize_model(setup_id: int) -> Any:
    """
    Initialized a model based on a setup_id (i.e., using the exact
    same parameter settings)

    Parameters
    ----------
    setup_id : int
        The Openml setup_id

    Returns
    -------
    model
    """
    setup = get_setup(setup_id)
    flow = openml.flows.get_flow(setup.flow_id)

    # instead of using scikit-learns or any other library's "set_params" function, we override the
    # OpenMLFlow objects default parameter value so we can utilize the
    # Extension.flow_to_model() function to reinitialize the flow with the set defaults.
    for hyperparameter in setup.parameters.values():
        structure = flow.get_structure('flow_id')
        if len(structure[hyperparameter.flow_id]) > 0:
            subflow = flow.get_subflow(structure[hyperparameter.flow_id])
        else:
            subflow = flow
        subflow.parameters[hyperparameter.parameter_name] = \
            hyperparameter.value

    model = flow.extension.flow_to_model(flow)
    return model
github openml / openml-python / circle_drop / _downloads / flows_and_runs_tutorial.py View on Github external
# pprint(vars(run), depth=2)

############################################################################
# Share the run on the OpenML server
#
# So far the run is only available locally. By calling the publish function, the run is send to the OpenML server:

myrun = run.publish()
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))

############################################################################
# We can now also inspect the flow object which was automatically created:

flow = openml.flows.get_flow(run.flow_id)
pprint(vars(flow), depth=1)

############################################################################
# It also works with pipelines
# ############################
#
# When you need to handle 'dirty' data, build pipelines to model then automatically.
task = openml.tasks.get_task(115)
pipe = pipeline.Pipeline(steps=[
    ('Imputer', preprocessing.Imputer(strategy='median')),
    ('OneHotEncoder', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')),
    ('Classifier', ensemble.RandomForestClassifier())
])
flow = openml.flows.sklearn_to_flow(pipe)

run = openml.runs.run_flow_on_task(flow, task, avoid_duplicate_runs=False)
github openml / openml.org / src / dashboard / layouts.py View on Github external
def get_flow_overview():
    """

    :return: overview page for flows
    """

    df = flows.list_flows(output_format='dataframe')
    count = pd.DataFrame(df["name"].value_counts()).reset_index()
    count.columns = ["name", "count"]
    count = count[0:1000]
    short = []
    for name in count["name"]:
        try:
            short.append(SklearnExtension.trim_flow_name(name))
        except:
            pass
    count["name"] = short
    fig = go.Figure(data=[go.Bar(y=count["name"].values, x=count["count"].values,
                                 marker=dict(color='blue',
                                             opacity=0.8),
                                 orientation="h")])
    fig.update_layout(
                      yaxis=dict(autorange="reversed"),
github openml / openml-python / openml / runs / run.py View on Github external
def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
        """ Collect all information to display in the __repr__ body. """
        fields = {"Uploader Name": self.uploader_name,
                  "Metric": self.task_evaluation_measure,
                  "Run ID": self.run_id,
                  "Task ID": self.task_id,
                  "Task Type": self.task_type,
                  "Task URL": openml.tasks.OpenMLTask.url_for_id(self.task_id),
                  "Flow ID": self.flow_id,
                  "Flow Name": self.flow_name,
                  "Flow URL": openml.flows.OpenMLFlow.url_for_id(self.flow_id),
                  "Setup ID": self.setup_id,
                  "Setup String": self.setup_string,
                  "Dataset ID": self.dataset_id,
                  "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id)}
        if self.uploader is not None:
            fields["Uploader Profile"] = "{}/u/{}".format(openml.config.get_server_base_url(),
                                                          self.uploader)
        if self.run_id is not None:
            fields["Run URL"] = self.openml_url
        if self.evaluations is not None and self.task_evaluation_measure in self.evaluations:
            fields["Result"] = self.evaluations[self.task_evaluation_measure]

        # determines the order in which the information will be printed
        order = ["Uploader Name", "Uploader Profile", "Metric", "Result", "Run ID", "Run URL",
                 "Task ID", "Task Type", "Task URL", "Flow ID", "Flow Name", "Flow URL",
                 "Setup ID", "Setup String", "Dataset ID", "Dataset URL"]
github openml / openml-python / openml / runs / run.py View on Github external
"(This should never happen.) "
            )
        if self.flow_id is None:
            if self.flow is None:
                raise PyOpenMLError(
                    "OpenMLRun object does not contain a flow id or reference to OpenMLFlow "
                    "(these should have been added while executing the task). "
                )
            else:
                # publish the linked Flow before publishing the run.
                self.flow.publish()
                self.flow_id = self.flow.flow_id

        if self.parameter_settings is None:
            if self.flow is None:
                self.flow = openml.flows.get_flow(self.flow_id)
            self.parameter_settings = self.flow.extension.obtain_parameter_values(
                self.flow,
                self.model,
            )

        file_elements = {'description': ("description.xml", self._to_xml())}

        if self.error_message is None:
            predictions = arff.dumps(self._generate_arff_dict())
            file_elements['predictions'] = ("predictions.arff", predictions)

        if self.trace is not None:
            trace_arff = arff.dumps(self.trace.trace_to_arff())
            file_elements['trace'] = ("trace.arff", trace_arff)
        return file_elements
github openml / openml-python / openml / extensions / sklearn / extension.py View on Github external
flow : OpenMLFlow
            OpenMLFlow object (containing flow ids, i.e., it has to be downloaded from the server)

        model: Any, optional (default=None)
            The model from which to obtain the parameter values. Must match the flow signature.
            If None, use the model specified in ``OpenMLFlow.model``.

        Returns
        -------
        list
            A list of dicts, where each dict has the following entries:
            - ``oml:name`` : str: The OpenML parameter name
            - ``oml:value`` : mixed: A representation of the parameter value
            - ``oml:component`` : int: flow id to which the parameter belongs
        """
        openml.flows.functions._check_flow_for_server_id(flow)

        def get_flow_dict(_flow):
            flow_map = {_flow.name: _flow.flow_id}
            for subflow in _flow.components:
                flow_map.update(get_flow_dict(_flow.components[subflow]))
            return flow_map

        def extract_parameters(_flow, _flow_dict, component_model,
                               _main_call=False, main_id=None):
            def is_subcomponent_specification(values):
                # checks whether the current value can be a specification of
                # subcomponents, as for example the value for steps parameter
                # (in Pipeline) or transformers parameter (in
                # ColumnTransformer). These are always lists/tuples of lists/
                # tuples, size bigger than 2 and an OpenMLFlow item involved.
                if not isinstance(values, (tuple, list)):