How to use the openml.testing.TestBase._mark_entity_for_removal function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / tests / test_flows / test_flow.py View on Github external
flow.publish()
        # Not collecting flow_id for deletion since this is a test for failed upload

        self.assertEqual(api_call_mock.call_count, 1)
        self.assertEqual(get_flow_mock.call_count, 1)
        self.assertEqual(flow_exists_mock.call_count, 1)

        flow_copy = copy.deepcopy(flow)
        flow_copy.name = flow_copy.name[:-1]
        get_flow_mock.return_value = flow_copy
        flow_exists_mock.return_value = 1

        with self.assertRaises(ValueError) as context_manager:
            flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                                flow.flow_id))

        fixture = (
            "The flow on the server is inconsistent with the local flow. "
            "The server flow ID is 1. Please check manually and remove "
            "the flow if necessary! Error is:\n"
            "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
            "values for attribute 'name' differ: "
            "'sklearn.ensemble.forest.RandomForestClassifier'"
            "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
        )

        self.assertEqual(context_manager.exception.args[0], fixture)
        self.assertEqual(get_flow_mock.call_count, 2)
github openml / openml-python / tests / test_setups / test_setup_functions.py View on Github external
def _existing_setup_exists(self, classif):

        flow = self.extension.model_to_flow(classif)
        flow.name = 'TEST%s%s' % (get_sentinel(), flow.name)
        flow.publish()
        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))

        # although the flow exists, we can be sure there are no
        # setups (yet) as it hasn't been ran
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)

        # now run the flow on an easy task:
        task = openml.tasks.get_task(115)  # diabetes
        run = openml.runs.run_flow_on_task(flow, task)
        # spoof flow id, otherwise the sentinel is ignored
        run.flow_id = flow.flow_id
        run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
github openml / openml-python / tests / test_setups / test_setup_functions.py View on Github external
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))

        # although the flow exists, we can be sure there are no
        # setups (yet) as it hasn't been ran
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)

        # now run the flow on an easy task:
        task = openml.tasks.get_task(115)  # diabetes
        run = openml.runs.run_flow_on_task(flow, task)
        # spoof flow id, otherwise the sentinel is ignored
        run.flow_id = flow.flow_id
        run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
        # download the run, as it contains the right setup id
        run = openml.runs.get_run(run.run_id)

        # execute the function we are interested in
        setup_id = openml.setups.setup_exists(flow)
        self.assertEqual(setup_id, run.setup_id)
github openml / openml-python / tests / test_flows / test_flow.py View on Github external
def test_publish_existing_flow(self, flow_exists_mock):
        clf = sklearn.tree.DecisionTreeClassifier(max_depth=2)
        flow = self.extension.model_to_flow(clf)
        flow_exists_mock.return_value = 1

        with self.assertRaises(openml.exceptions.PyOpenMLError) as context_manager:
            flow.publish(raise_error_if_exists=True)
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                                flow.flow_id))

        self.assertTrue('OpenMLFlow already exists' in context_manager.exception.message)
github openml / openml-python / tests / test_study / test_study_examples.py View on Github external
('estimator', sklearn.tree.DecisionTreeClassifier())
            ]
        )  # build a sklearn classifier
        for task_id in benchmark_suite.tasks[:1]:  # iterate over all tasks
            task = openml.tasks.get_task(task_id)  # download the OpenML task
            X, y = task.get_X_and_y()  # get the data (not used in this example)
            openml.config.apikey = openml.config.apikey  # set the OpenML Api Key
            run = openml.runs.run_model_on_task(
                clf, task, avoid_duplicate_runs=False
            )  # run classifier on splits (requires API key)
            score = run.get_metric_fn(
                sklearn.metrics.accuracy_score
            )  # print accuracy score
            print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name, score.mean()))
            run.publish()  # publish the experiment on OpenML (optional)
            TestBase._mark_entity_for_removal('run', run.run_id)
            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                                run.run_id))
            print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
['sklearn.model_selection._search.GridSearchCV',
             'sklearn.pipeline.Pipeline',
             'sklearn.linear_model.base.LinearRegression',
             ]

        def _remove_random_state(flow):
            if 'random_state' in flow.parameters:
                del flow.parameters['random_state']
            for component in flow.components.values():
                _remove_random_state(component)

        flow = self.extension.model_to_flow(clf)
        flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
        if not openml.flows.flow_exists(flow.name, flow.external_version):
            flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))

        task = openml.tasks.get_task(task_id)

        X, y = task.get_X_and_y()
        self.assertEqual(np.count_nonzero(np.isnan(X)), n_missing_vals)
        run = openml.runs.run_flow_on_task(
            flow=flow,
            task=task,
            seed=seed,
            avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
        )
        run_ = run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
        self.assertEqual(run_, run)
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
task = openml.tasks.get_task(115)

        for clf in clfs:
            try:
                # first populate the server with this run.
                # skip run if it was already performed.
                run = openml.runs.run_model_on_task(
                    model=clf,
                    task=task,
                    seed=rs,
                    avoid_duplicate_runs=True,
                    upload_flow=True
                )
                run.publish()
                TestBase._mark_entity_for_removal('run', run.run_id)
                TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
            except openml.exceptions.PyOpenMLError:
                # run already existed. Great.
                pass

            flow = self.extension.model_to_flow(clf)
            flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
            self.assertGreater(flow_exists, 0)
            # Do NOT use get_flow reinitialization, this potentially sets
            # hyperparameter values wrong. Rather use the local model.
            downloaded_flow = openml.flows.get_flow(flow_exists)
            downloaded_flow.model = clf
            setup_exists = openml.setups.setup_exists(downloaded_flow)
            self.assertGreater(setup_exists, 0)
            run_ids = run_exists(task.task_id, setup_exists)
            self.assertTrue(run_ids, msg=(run_ids, clf))
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
def test_initialize_model_from_run(self):
        clf = sklearn.pipeline.Pipeline(steps=[
            ('Imputer', SimpleImputer(strategy='median')),
            ('VarianceThreshold', VarianceThreshold(threshold=0.05)),
            ('Estimator', GaussianNB())])
        task = openml.tasks.get_task(11)
        run = openml.runs.run_model_on_task(
            model=clf,
            task=task,
            avoid_duplicate_runs=False,
        )
        run_ = run.publish()
        TestBase._mark_entity_for_removal('run', run_.run_id)
        TestBase.logger.info("collected from test_run_functions: {}".format(run_.run_id))
        run = openml.runs.get_run(run_.run_id)

        modelR = openml.runs.initialize_model_from_run(run_id=run.run_id)
        modelS = openml.setups.initialize_model(setup_id=run.setup_id)

        flowR = self.extension.model_to_flow(modelR)
        flowS = self.extension.model_to_flow(modelS)
        flowL = self.extension.model_to_flow(clf)
        openml.flows.assert_flows_equal(flowR, flowL)
        openml.flows.assert_flows_equal(flowS, flowL)

        self.assertEqual(flowS.components['Imputer'].
                         parameters['strategy'], '"median"')
        self.assertEqual(flowS.components['VarianceThreshold'].
                         parameters['threshold'], '0.05')
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
upload_flow=False
        )

        cache_path = os.path.join(
            self.workdir,
            'runs',
            str(random.getrandbits(128)),
        )
        run.to_filesystem(cache_path)
        loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)

        expected_message_regex = ("Flow does not exist on the server, "
                                  "but 'flow.flow_id' is not None.")
        with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
            loaded_run.publish()
            TestBase._mark_entity_for_removal('run', loaded_run.run_id)
            TestBase.logger.info("collected from test_run_functions: {}".format(loaded_run.run_id))