How to use the openml.runs.run_model_on_task function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
def test_local_run_swapped_parameter_order_model(self):

        # construct sci-kit learn classifier
        clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
                              ('estimator', RandomForestClassifier())])

        # download task
        task = openml.tasks.get_task(7)

        # invoke OpenML run
        run = openml.runs.run_model_on_task(
            task, clf,
            avoid_duplicate_runs=False,
            upload_flow=False,
        )

        self._test_local_evaluations(run)
github openml / openml-python / tests / test_runs / test_run.py View on Github external
def test_to_from_filesystem_search(self):

        model = Pipeline([
            ('imputer', SimpleImputer(strategy='mean')),
            ('classifier', DecisionTreeClassifier(max_depth=1)),
        ])
        model = GridSearchCV(
            estimator=model,
            param_grid={
                "classifier__max_depth": [1, 2, 3, 4, 5],
                "imputer__strategy": ['mean', 'median'],
            }
        )

        task = openml.tasks.get_task(119)
        run = openml.runs.run_model_on_task(
            model=model,
            task=task,
            add_local_measures=False,
            avoid_duplicate_runs=False,
        )

        cache_path = os.path.join(
            self.workdir,
            'runs',
            str(random.getrandbits(128)),
        )
        run.to_filesystem(cache_path)

        run_prime = openml.runs.OpenMLRun.from_filesystem(cache_path)
        self._test_run_obj_equals(run, run_prime)
        run_prime.publish()
github openml / openml-python / develop / _downloads / 9e0617073c8209f15abf91f273871776 / flows_and_runs_tutorial.py View on Github external
('Imputer', impute.SimpleImputer(strategy='most_frequent')),
                    (
                        'Encoder',
                        preprocessing.OneHotEncoder(
                            sparse=False, handle_unknown='ignore',
                        )
                    ),
                ]),
                nominal_feature_indices,
             ),
        ]),
    ),
    ('Classifier', ensemble.RandomForestClassifier(n_estimators=10))
])

run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)
myrun = run.publish()
print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))

###############################################################################
# Running flows on tasks offline for later upload
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# For those scenarios where there is no access to internet, it is possible to run
# a model on a task without uploading results or flows to the server immediately.

# To perform the following line offline, it is required to have been called before
# such that the task is cached on the local openml cache directory:
task = openml.tasks.get_task(6)

# The following lines can then be executed offline:
run = openml.runs.run_model_on_task(
    pipe,
github openml / openml-python / examples / sklearn / openml_run_example.py View on Github external
# Uncomment and set your OpenML key. Don't share your key with others.
# openml.config.apikey = 'YOURKEY'

# Define a scikit-learn pipeline
clf = pipeline.Pipeline(
    steps=[
        ('imputer', impute.SimpleImputer()),
        ('estimator', tree.DecisionTreeClassifier())
    ]
)
############################################################################
# Download the OpenML task for the german credit card dataset.
task = openml.tasks.get_task(97)
############################################################################
# Run the scikit-learn model on the task (requires an API key).
run = openml.runs.run_model_on_task(clf, task)
# Publish the experiment on OpenML (optional, requires an API key).
run.publish()

print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))

############################################################################
openml.config.stop_using_configuration_for_example()
github openml / openml-python / master / _downloads / 911f16d4db6b665d864c4483331b062a / introduction_tutorial.py View on Github external
#   'MYDIR' with the path to the cache directory. By default, OpenML
#   will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.

# Uncomment and set your OpenML cache directory
# import os
# openml.config.cache_directory = os.path.expanduser('YOURDIR')

############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not crowd the main server with runs created by examples.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))

############################################################################
openml.config.stop_using_configuration_for_example()
github openml / openml-python / examples / 30_extended / study_tutorial.py View on Github external
openml.config.start_using_configuration_for_example()

# Very simple classifier which ignores the feature type
clf = sklearn.pipeline.Pipeline(steps=[
    ('imputer', sklearn.impute.SimpleImputer()),
    ('estimator', sklearn.tree.DecisionTreeClassifier(max_depth=5)),
])

suite = openml.study.get_suite(1)
# We'll create a study with one run on three random datasets each
tasks = np.random.choice(suite.tasks, size=3, replace=False)
run_ids = []
for task_id in tasks:
    task = openml.tasks.get_task(task_id)
    run = openml.runs.run_model_on_task(clf, task)
    run.publish()
    run_ids.append(run.run_id)

# The study needs a machine-readable and unique alias. To obtain this,
# we simply generate a random uuid.
alias = uuid.uuid4().hex

new_study = openml.study.create_study(
    name='Test-Study',
    description='Test study for the Python tutorial on studies',
    run_ids=run_ids,
    alias=alias,
    benchmark_suite=suite.study_id,
)
new_study.publish()
print(new_study)
github openml / openml-python / examples / introduction_tutorial.py View on Github external
#   'MYDIR' with the path to the cache directory. By default, OpenML
#   will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.

# Uncomment and set your OpenML cache directory
# import os
# openml.config.cache_directory = os.path.expanduser('YOURDIR')

############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))
github openml / openml-python / develop / _downloads / 6b1e091fbd3ac8d106b6552c91cf05cc / run_setup_tutorial.py View on Github external
# Let's change some hyperparameters. Of course, in any good application we
# would tune them using, e.g., Random Search or Bayesian Optimization, but for
# the purpose of this tutorial we set them to some specific values that might
# or might not be optimal
hyperparameters_original = {
    'simpleimputer__strategy': 'median',
    'randomforestclassifier__criterion': 'entropy',
    'randomforestclassifier__max_features': 0.2,
    'randomforestclassifier__min_samples_leaf': 1,
    'randomforestclassifier__n_estimators': 16,
    'randomforestclassifier__random_state': 42,
}
model_original.set_params(**hyperparameters_original)

# solve the task and upload the result (this implicitly creates the flow)
run = openml.runs.run_model_on_task(
    model_original,
    task,
    avoid_duplicate_runs=False)
run_original = run.publish()  # this implicitly uploads the flow

###############################################################################
# 2) Download the flow and solve the same task again.
###############################################################################

# obtain setup id (note that the setup id is assigned by the OpenML server -
# therefore it was not yet available in our local copy of the run)
run_downloaded = openml.runs.get_run(run_original.run_id)
setup_id = run_downloaded.setup_id

# after this, we can easily reinstantiate the model
model_duplicate = openml.setups.initialize_model(setup_id)
github openml / openml-python / develop / _downloads / f7235831b23f4fbf340b2edb7769476f / openml_run_example.py View on Github external
# Uncomment and set your OpenML key. Don't share your key with others.
# openml.config.apikey = 'YOURKEY'

# Define a scikit-learn pipeline
clf = pipeline.Pipeline(
    steps=[
        ('imputer', impute.SimpleImputer()),
        ('estimator', tree.DecisionTreeClassifier())
    ]
)
############################################################################
# Download the OpenML task for the german credit card dataset.
task = openml.tasks.get_task(97)
############################################################################
# Run the scikit-learn model on the task (requires an API key).
run = openml.runs.run_model_on_task(clf, task)
# Publish the experiment on OpenML (optional, requires an API key).
run.publish()

print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))

############################################################################
openml.config.stop_using_configuration_for_example()
github openml / openml-python / develop / _downloads / 6b1e091fbd3ac8d106b6552c91cf05cc / run_setup_tutorial.py View on Github external
###############################################################################
# 2) Download the flow and solve the same task again.
###############################################################################

# obtain setup id (note that the setup id is assigned by the OpenML server -
# therefore it was not yet available in our local copy of the run)
run_downloaded = openml.runs.get_run(run_original.run_id)
setup_id = run_downloaded.setup_id

# after this, we can easily reinstantiate the model
model_duplicate = openml.setups.initialize_model(setup_id)
# it will automatically have all the hyperparameters set

# and run the task again
run_duplicate = openml.runs.run_model_on_task(
    model_duplicate, task, avoid_duplicate_runs=False)


###############################################################################
# 3) We will verify that the obtained results are exactly the same.
###############################################################################

# the run has stored all predictions in the field data content
np.testing.assert_array_equal(run_original.data_content,
                              run_duplicate.data_content)

###############################################################################

openml.config.stop_using_configuration_for_example()