How to use the openml.runs function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / tests / test_setups / test_setup_functions.py View on Github external
# setups (yet) as it hasn't been ran
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)

        # now run the flow on an easy task:
        task = openml.tasks.get_task(115)  # diabetes
        run = openml.runs.run_flow_on_task(flow, task)
        # spoof flow id, otherwise the sentinel is ignored
        run.flow_id = flow.flow_id
        run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
        # download the run, as it contains the right setup id
        run = openml.runs.get_run(run.run_id)

        # execute the function we are interested in
        setup_id = openml.setups.setup_exists(flow)
        self.assertEqual(setup_id, run.setup_id)
github openml / openml-python / tests / test_study / test_study_functions.py View on Github external
def test_study_attach_illegal(self):
        run_list = openml.runs.list_runs(size=10)
        self.assertEqual(len(run_list), 10)
        run_list_more = openml.runs.list_runs(size=20)
        self.assertEqual(len(run_list_more), 20)

        study = openml.study.create_study(
            alias=None,
            benchmark_suite=None,
            name='study with illegal runs',
            description='none',
            run_ids=list(run_list.keys())
        )
        study.publish()
        TestBase._mark_entity_for_removal('study', study.id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], study.id))
        study_original = openml.study.get_study(study.id)

        with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
                                    'Problem attaching entities.'):
github openml / openml-python / tests / test_extensions / test_sklearn_extension / test_sklearn_extension.py View on Github external
def test_openml_param_name_to_sklearn(self):
        scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
        boosting = sklearn.ensemble.AdaBoostClassifier(
            base_estimator=sklearn.tree.DecisionTreeClassifier())
        model = sklearn.pipeline.Pipeline(steps=[
            ('scaler', scaler), ('boosting', boosting)])
        flow = self.extension.model_to_flow(model)
        task = openml.tasks.get_task(115)
        run = openml.runs.run_flow_on_task(flow, task)
        run = run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
        run = openml.runs.get_run(run.run_id)
        setup = openml.setups.get_setup(run.setup_id)

        # make sure to test enough parameters
        self.assertGreater(len(setup.parameters), 15)

        for parameter in setup.parameters.values():
            sklearn_name = self.extension._openml_param_name_to_sklearn(parameter, flow)

            # test the inverse. Currently, OpenML stores the hyperparameter
            # fullName as flow.name + flow.version + parameter.name on the
            # server (but this behaviour is not documented and might or might
            # not change in the future. Hence, we won't offer this
github openml / openml-python / tests / test_runs / test_run.py View on Github external
def test_tagging(self):

        runs = openml.runs.list_runs(size=1)
        run_id = list(runs.keys())[0]
        run = openml.runs.get_run(run_id)
        tag = "testing_tag_{}_{}".format(self.id(), time())
        run_list = openml.runs.list_runs(tag=tag)
        self.assertEqual(len(run_list), 0)
        run.push_tag(tag)
        run_list = openml.runs.list_runs(tag=tag)
        self.assertEqual(len(run_list), 1)
        self.assertIn(run_id, run_list)
        run.remove_tag(tag)
        run_list = openml.runs.list_runs(tag=tag)
        self.assertEqual(len(run_list), 0)
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
del flow.parameters['random_state']
            for component in flow.components.values():
                _remove_random_state(component)

        flow = self.extension.model_to_flow(clf)
        flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
        if not openml.flows.flow_exists(flow.name, flow.external_version):
            flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))

        task = openml.tasks.get_task(task_id)

        X, y = task.get_X_and_y()
        self.assertEqual(np.count_nonzero(np.isnan(X)), n_missing_vals)
        run = openml.runs.run_flow_on_task(
            flow=flow,
            task=task,
            seed=seed,
            avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
        )
        run_ = run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
        self.assertEqual(run_, run)
        self.assertIsInstance(run.dataset_id, int)

        # This is only a smoke check right now
        # TODO add a few asserts here
        run._to_xml()
        if run.trace is not None:
            # This is only a smoke check right now
github openml / openml.org / src / client / dash / callbacks.py View on Github external
for data_id in df["data_id"].values:
            link = "<a href="\&quot;https://www.openml.org/d/&quot;">"
            tick_text.append(link)
        hover_text = []
        if parameter == 'None':
            color = [1] * 1000
            hover_text = df["value"]
            marker = dict(opacity=0.8, symbol='diamond',
                                       color=color,  # set color equal to a variable
                                       colorscale='Jet')
            print ('None')
        else:
            color = []
            for run_id in df.run_id[:1000]:
                p = pd.DataFrame(runs.get_runs([run_id])[0].parameter_settings)
                row = p[p['oml:name'] == parameter]
                if row.empty:
                    color.append('0')
                else:
                    color.append(row['oml:value'].values[0])
                    hover_text.append(row['oml:value'].values[0])


            if color[0].isdigit():
                print(color)
                color = list(map(int, color))
            else:
                color = pd.DataFrame(color)[0].astype('category').cat.codes
            marker = dict(opacity=0.8, symbol='diamond',
                          color=color,  # set color equal to a variable
                          colorscale='Jet', colorbar=dict(title='Colorbar'))</a>
github openml / openml-python / examples / flows_and_runs_tutorial.py View on Github external
############################################################################
# Runs: Easily explore models
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
# We can run (many) scikit-learn algorithms on (many) OpenML tasks.

# Get a task
task = openml.tasks.get_task(403)

# Build any classifier or pipeline
clf = tree.ExtraTreeClassifier()

# Create a flow
flow = openml.flows.sklearn_to_flow(clf)

# Run the flow
run = openml.runs.run_flow_on_task(flow, task)

# pprint(vars(run), depth=2)

############################################################################
# Share the run on the OpenML server
#
# So far the run is only available locally. By calling the publish function, the run is send to the OpenML server:

myrun = run.publish()
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))

############################################################################
# We can now also inspect the flow object which was automatically created:
github openml / openml-python / develop / _downloads / 6b1e091fbd3ac8d106b6552c91cf05cc / run_setup_tutorial.py View on Github external
model_original.set_params(**hyperparameters_original)

# solve the task and upload the result (this implicitly creates the flow)
run = openml.runs.run_model_on_task(
    model_original,
    task,
    avoid_duplicate_runs=False)
run_original = run.publish()  # this implicitly uploads the flow

###############################################################################
# 2) Download the flow and solve the same task again.
###############################################################################

# obtain setup id (note that the setup id is assigned by the OpenML server -
# therefore it was not yet available in our local copy of the run)
run_downloaded = openml.runs.get_run(run_original.run_id)
setup_id = run_downloaded.setup_id

# after this, we can easily reinstantiate the model
model_duplicate = openml.setups.initialize_model(setup_id)
# it will automatically have all the hyperparameters set

# and run the task again
run_duplicate = openml.runs.run_model_on_task(
    model_duplicate, task, avoid_duplicate_runs=False)


###############################################################################
# 3) We will verify that the obtained results are exactly the same.
###############################################################################

# the run has stored all predictions in the field data content
github openml / openml-python / openml / __init__.py View on Github external
"""
    if task_ids is not None:
        for task_id in task_ids:
            tasks.functions.get_task(task_id)

    if dataset_ids is not None:
        for dataset_id in dataset_ids:
            datasets.functions.get_dataset(dataset_id)

    if flow_ids is not None:
        for flow_id in flow_ids:
            flows.functions.get_flow(flow_id)

    if run_ids is not None:
        for run_id in run_ids:
            runs.functions.get_run(run_id)
github openml / openml-python / circle_drop / _downloads / introduction_tutorial.py View on Github external
# * Add the  line **cachedir = 'MYDIR'** to the config file, replacing 'MYDIR' with the path to the cache directory. By default, OpenML will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.

import os
# Uncomment and set your OpenML cache directory
# openml.config.cache_directory = os.path.expanduser('YOURDIR')

############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
flow = openml.flows.sklearn_to_flow(clf)
run = openml.runs.run_flow_on_task(flow, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))