How to use the openml.testing.TestBase.logger.info function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / tests / test_flows / test_flow.py View on Github external
self.assertEqual(
            flow.upload_date,
            flow.components['lr'].upload_date,
            msg=(
                flow.name,
                flow.flow_id,
                flow.components['lr'].name, flow.components['lr'].flow_id,
            ),
        )

        clf1 = sklearn.tree.DecisionTreeClassifier(max_depth=2)
        flow1 = self.extension.model_to_flow(clf1)
        flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None)
        flow1.publish()
        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                            flow1.flow_id))

        # In order to assign different upload times to the flows!
        time.sleep(1)

        clf2 = sklearn.ensemble.VotingClassifier(
            [('dt', sklearn.tree.DecisionTreeClassifier(max_depth=2))])
        flow2 = self.extension.model_to_flow(clf2)
        flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
        flow2.publish()
        TestBase._mark_entity_for_removal('flow', (flow2.flow_id, flow2.name))
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                            flow2.flow_id))
        # If one component was published before the other, the components in
        # the flow should have different upload dates
        self.assertNotEqual(flow2.upload_date,
github openml / openml-python / tests / test_datasets / test_dataset_functions.py View on Github external
language='English',
            licence='MIT',
            default_target_attribute='col_{}'.format(data.shape[1] - 1),
            row_id_attribute=None,
            ignore_attribute=None,
            citation='None',
            attributes=attributes,
            data=data,
            version_label='test',
            original_data_url='http://openml.github.io/openml-python',
            paper_url='http://openml.github.io/openml-python'
        )

        dataset.publish()
        TestBase._mark_entity_for_removal('data', dataset.id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                            dataset.id))

        self.assertEqual(
            _get_online_dataset_arff(dataset.id),
            dataset._dataset,
            "Uploaded arff does not match original one"
        )
        self.assertEqual(
            _get_online_dataset_format(dataset.id),
            'arff',
            "Wrong format for dataset"
        )
github openml / openml-python / tests / test_flows / test_flow_functions.py View on Github external
def test_sklearn_to_flow_list_of_lists(self):
        from sklearn.preprocessing import OrdinalEncoder
        ordinal_encoder = OrdinalEncoder(categories=[[0, 1], [0, 1]])
        extension = openml.extensions.sklearn.SklearnExtension()

        # Test serialization works
        flow = extension.model_to_flow(ordinal_encoder)

        # Test flow is accepted by server
        self._add_sentinel_to_flow_name(flow)
        flow.publish()
        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))
        # Test deserialization works
        server_flow = openml.flows.get_flow(flow.flow_id, reinstantiate=True)
        self.assertEqual(server_flow.parameters['categories'], '[[0, 1], [0, 1]]')
        self.assertEqual(server_flow.model.categories, flow.model.categories)
github openml / openml-python / tests / test_datasets / test_dataset_functions.py View on Github external
contributor=None,
            collection_date=None,
            language='English',
            licence=None,
            default_target_attribute='y',
            row_id_attribute=None,
            ignore_attribute=None,
            citation=None,
            attributes=column_names,
            data=sparse_data,
            version_label='test',
        )

        xor_dataset.publish()
        TestBase._mark_entity_for_removal('data', xor_dataset.id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                            xor_dataset.id))
        self.assertEqual(
            _get_online_dataset_arff(xor_dataset.id),
            xor_dataset._dataset,
            "Uploaded ARFF does not match original one"
        )
        self.assertEqual(
            _get_online_dataset_format(xor_dataset.id),
            'sparse_arff',
            "Wrong format for dataset"
        )
github openml / openml-python / tests / test_setups / test_setup_functions.py View on Github external
def _existing_setup_exists(self, classif):

        flow = self.extension.model_to_flow(classif)
        flow.name = 'TEST%s%s' % (get_sentinel(), flow.name)
        flow.publish()
        TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], flow.flow_id))

        # although the flow exists, we can be sure there are no
        # setups (yet) as it hasn't been ran
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)
        setup_id = openml.setups.setup_exists(flow)
        self.assertFalse(setup_id)

        # now run the flow on an easy task:
        task = openml.tasks.get_task(115)  # diabetes
        run = openml.runs.run_flow_on_task(flow, task)
        # spoof flow id, otherwise the sentinel is ignored
        run.flow_id = flow.flow_id
        run.publish()
        TestBase._mark_entity_for_removal('run', run.run_id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
github openml / openml-python / tests / test_flows / test_flow.py View on Github external
('hotencoding', sklearn.preprocessing.OneHotEncoder(**ohe_params)),
            (
                'variencethreshold',
                sklearn.feature_selection.VarianceThreshold(),
            ),
            ('classifier', sklearn.tree.DecisionTreeClassifier())
        ]
        complicated = sklearn.pipeline.Pipeline(steps=steps)

        for classifier in [nb, complicated]:
            flow = self.extension.model_to_flow(classifier)
            flow, _ = self._add_sentinel_to_flow_name(flow, None)
            # publish the flow
            flow = flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                                flow.flow_id))
            # redownload the flow
            flow = openml.flows.get_flow(flow.flow_id)

            # check if flow exists can find it
            flow = openml.flows.get_flow(flow.flow_id)
            downloaded_flow_id = openml.flows.flow_exists(
                flow.name,
                flow.external_version,
            )
            self.assertEqual(downloaded_flow_id, flow.flow_id)
github openml / openml-python / tests / test_flows / test_flow.py View on Github external
flow.publish()
        # Not collecting flow_id for deletion since this is a test for failed upload

        self.assertEqual(api_call_mock.call_count, 1)
        self.assertEqual(get_flow_mock.call_count, 1)
        self.assertEqual(flow_exists_mock.call_count, 1)

        flow_copy = copy.deepcopy(flow)
        flow_copy.name = flow_copy.name[:-1]
        get_flow_mock.return_value = flow_copy
        flow_exists_mock.return_value = 1

        with self.assertRaises(ValueError) as context_manager:
            flow.publish()
            TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
            TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                                flow.flow_id))

        fixture = (
            "The flow on the server is inconsistent with the local flow. "
            "The server flow ID is 1. Please check manually and remove "
            "the flow if necessary! Error is:\n"
            "'Flow sklearn.ensemble.forest.RandomForestClassifier: "
            "values for attribute 'name' differ: "
            "'sklearn.ensemble.forest.RandomForestClassifier'"
            "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'"
        )

        self.assertEqual(context_manager.exception.args[0], fixture)
        self.assertEqual(get_flow_mock.call_count, 2)
github openml / openml-python / tests / test_datasets / test_dataset_functions.py View on Github external
def test_upload_dataset_with_url(self):

        dataset = OpenMLDataset(
            "%s-UploadTestWithURL" % self._get_sentinel(),
            "test",
            data_format="arff",
            version=1,
            url="https://www.openml.org/data/download/61/dataset_61_iris.arff",
        )
        dataset.publish()
        TestBase._mark_entity_for_removal('data', dataset.dataset_id)
        TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1],
                                                            dataset.dataset_id))
        self.assertIsInstance(dataset.dataset_id, int)
github openml / openml-python / tests / test_runs / test_run_functions.py View on Github external
# from the past
        try:
            # in case the run did not exists yet
            run = openml.runs.run_model_on_task(
                model=clf,
                task=task,
                avoid_duplicate_runs=True,
            )

            self.assertEqual(
                len(run.trace.trace_iterations),
                num_iterations * num_folds,
            )
            run = run.publish()
            TestBase._mark_entity_for_removal('run', run.run_id)
            TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
            self._wait_for_processed_run(run.run_id, 200)
            run_id = run.run_id
        except openml.exceptions.OpenMLRunsExistError as e:
            # The only error we expect, should fail otherwise.
            run_ids = [int(run_id) for run_id in e.run_ids]
            self.assertGreater(len(run_ids), 0)
            run_id = random.choice(list(run_ids))

        # now the actual unit test ...
        run_trace = openml.runs.get_run_trace(run_id)
        self.assertEqual(len(run_trace.trace_iterations), num_iterations * num_folds)