How to use the openml.flows.OpenMLFlow function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / automlbenchmark / scripts / create_flows.py View on Github external
memory='32',
            cores='8'
        ),
        parameters_meta_info=OrderedDict(
            cores=OrderedDict(description='number of available cores', data_type='int'),
            memory=OrderedDict(description='memory in gigabytes', data_type='int'),
            time=OrderedDict(description='time in minutes', data_type='int'),
        ),
        language='English',
        tags=['amlb', 'benchmark', 'study_218'],
        dependencies='amlb==0.9',
        model=None
    )

    autosklearn_flow = openml.flows.get_flow(15275)  # auto-sklearn 0.5.1
    autosklearn_amlb_flow = openml.flows.OpenMLFlow(
        name='automlbenchmark_autosklearn',
        description=('Auto-sklearn as set up by the AutoML Benchmark'
                     'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
        components=OrderedDict(automl_tool=autosklearn_flow),
        **standard_kwargs
    )
    autosklearn_amlb_flow.publish()
    print(f'autosklearn flow created: {autosklearn_amlb_flow.flow_id}')
    # for dev purposes, since we're rerunning this often, we want to double-check no new flows are created
    assert autosklearn_amlb_flow.flow_id == 15509, "! NEW FLOW CREATED UNEXPECTEDLY!"

    tpot_flow = openml.flows.get_flow(15508)  # TPOT 0.9.6
    tpot_amlb_flow = openml.flows.OpenMLFlow(
        name='automlbenchmark_tpot',
        description=('TPOT as set up by the AutoML Benchmark'
                     'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
github openml / openml-python / openml / extensions / sklearn / extension.py View on Github external
def is_subcomponent_specification(values):
                # checks whether the current value can be a specification of
                # subcomponents, as for example the value for steps parameter
                # (in Pipeline) or transformers parameter (in
                # ColumnTransformer). These are always lists/tuples of lists/
                # tuples, size bigger than 2 and an OpenMLFlow item involved.
                if not isinstance(values, (tuple, list)):
                    return False
                for item in values:
                    if not isinstance(item, (tuple, list)):
                        return False
                    if len(item) < 2:
                        return False
                    if not isinstance(item[1], openml.flows.OpenMLFlow):
                        return False
                return True
github openml / openml-python / openml / runs / run.py View on Github external
trace_path = os.path.join(directory, 'trace.arff')
        model_path = os.path.join(directory, 'model.pkl')

        if not os.path.isfile(description_path):
            raise ValueError('Could not find description.xml')
        if not os.path.isfile(predictions_path):
            raise ValueError('Could not find predictions.arff')
        if not os.path.isfile(model_path) and expect_model:
            raise ValueError('Could not find model.pkl')

        with open(description_path, 'r') as fht:
            xml_string = fht.read()
        run = openml.runs.functions._create_run_from_xml(xml_string, from_server=False)

        if run.flow_id is None:
            flow = openml.flows.OpenMLFlow.from_filesystem(directory)
            run.flow = flow
            run.flow_name = flow.name

        with open(predictions_path, 'r') as fht:
            predictions = arff.load(fht)
            run.data_content = predictions['data']

        if os.path.isfile(model_path):
            # note that it will load the model if the file exists, even if
            # expect_model is False
            with open(model_path, 'rb') as fhb:
                run.model = pickle.load(fhb)

        if os.path.isfile(trace_path):
            run.trace = openml.runs.OpenMLRunTrace._from_filesystem(trace_path)
github openml / openml-python / openml / flows / functions.py View on Github external
flow2 : OpenMLFlow

    ignore_parameter_values_on_older_children : str (optional)
        If set to ``OpenMLFlow.upload_date``, ignores parameters in a child
        flow if it's upload date predates the upload date of the parent flow.

    ignore_parameter_values : bool
        Whether to ignore parameter values when comparing flows.

    ignore_custom_name_if_none : bool
        Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.

    check_description : bool
        Whether to ignore matching of flow descriptions.
    """
    if not isinstance(flow1, OpenMLFlow):
        raise TypeError('Argument 1 must be of type OpenMLFlow, but is %s' %
                        type(flow1))

    if not isinstance(flow2, OpenMLFlow):
        raise TypeError('Argument 2 must be of type OpenMLFlow, but is %s' %
                        type(flow2))

    # TODO as they are actually now saved during publish, it might be good to
    # check for the equality of these as well.
    generated_by_the_server = ['flow_id', 'uploader', 'version', 'upload_date',
                               # Tags aren't directly created by the server,
                               # but the uploader has no control over them!
                               'tags']
    ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
                             'model', '_entity_id']
github openml / openml-python / openml / extensions / sklearn / extension.py View on Github external
external_version = self._get_external_version_string(model, subcomponents)

        dependencies = '\n'.join([
            self._format_external_version(
                'sklearn',
                sklearn.__version__,
            ),
            'numpy>=1.6.1',
            'scipy>=0.9',
        ])

        sklearn_version = self._format_external_version('sklearn', sklearn.__version__)
        sklearn_version_formatted = sklearn_version.replace('==', '_')

        sklearn_description = self._get_sklearn_description(model)
        flow = OpenMLFlow(name=name,
                          class_name=class_name,
                          custom_name=short_name,
                          description=sklearn_description,
                          model=model,
                          components=subcomponents,
                          parameters=parameters,
                          parameters_meta_info=parameters_meta_info,
                          external_version=external_version,
                          tags=['openml-python', 'sklearn', 'scikit-learn',
                                'python', sklearn_version_formatted,
                                # TODO: add more tags based on the scikit-learn
                                # module a flow is in? For example automatically
                                # annotate a class of sklearn.svm.SVC() with the
                                # tag svm?
                                ],
                          extension=self,
github openml / openml-python / openml / utils.py View on Github external
def _get_rest_api_type_alias(oml_object: 'OpenMLBase') -> str:
    """ Return the alias of the openml entity as it is defined for the REST API. """
    rest_api_mapping = [
        (openml.datasets.OpenMLDataset, 'data'),
        (openml.flows.OpenMLFlow, 'flow'),
        (openml.tasks.OpenMLTask, 'task'),
        (openml.runs.OpenMLRun, 'run'),
        ((openml.study.OpenMLStudy, openml.study.OpenMLBenchmarkSuite), 'study')
    ]  # type: List[Tuple[Union[Type, Tuple], str]]
    _, api_type_alias = [(python_type, api_alias)
                         for (python_type, api_alias) in rest_api_mapping
                         if isinstance(oml_object, python_type)][0]
    return api_type_alias
github openml / openml-python / openml / flows / sklearn_converter.py View on Github external
sub_components_names += "," + sub_components[key].name

    if sub_components_names:
        # slice operation on string in order to get rid of leading comma
        name = '%s(%s)' % (class_name, sub_components_names[1:])
    else:
        name = class_name

    # Get the external versions of all sub-components
    external_version = _get_external_version_string(model, sub_components)

    dependencies = [_format_external_version('sklearn', sklearn.__version__),
                    'numpy>=1.6.1', 'scipy>=0.9']
    dependencies = '\n'.join(dependencies)

    flow = OpenMLFlow(name=name,
                      class_name=class_name,
                      description='Automatically created scikit-learn flow.',
                      model=model,
                      components=sub_components,
                      parameters=parameters,
                      parameters_meta_info=parameters_meta_info,
                      external_version=external_version,
                      tags=[],
                      language='English',
                      # TODO fill in dependencies!
                      dependencies=dependencies)

    return flow
github openml / openml-python / openml / flows / functions.py View on Github external
flow if it's upload date predates the upload date of the parent flow.

    ignore_parameter_values : bool
        Whether to ignore parameter values when comparing flows.

    ignore_custom_name_if_none : bool
        Whether to ignore the custom name field if either flow has `custom_name` equal to `None`.

    check_description : bool
        Whether to ignore matching of flow descriptions.
    """
    if not isinstance(flow1, OpenMLFlow):
        raise TypeError('Argument 1 must be of type OpenMLFlow, but is %s' %
                        type(flow1))

    if not isinstance(flow2, OpenMLFlow):
        raise TypeError('Argument 2 must be of type OpenMLFlow, but is %s' %
                        type(flow2))

    # TODO as they are actually now saved during publish, it might be good to
    # check for the equality of these as well.
    generated_by_the_server = ['flow_id', 'uploader', 'version', 'upload_date',
                               # Tags aren't directly created by the server,
                               # but the uploader has no control over them!
                               'tags']
    ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
                             'model', '_entity_id']

    for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
        if key in generated_by_the_server + ignored_by_python_api:
            continue
        attr1 = getattr(flow1, key, None)
github openml / automlbenchmark / scripts / create_flows.py View on Github external
print(f'h2o flow created: {h2o_amlb_flow.flow_id}')
    assert h2o_amlb_flow.flow_id == 16115, "! NEW FLOW CREATED UNEXPECTEDLY!"

    autoweka_amlb_flow = openml.flows.OpenMLFlow(
        name='automlbenchmark_autoweka',
        description=('Auto-WEKA 2.6 as set up by the AutoML Benchmark'
                     'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
        components=OrderedDict(),
        **standard_kwargs
    )
    autoweka_amlb_flow.publish()
    print(f'autoweka flow created: {autoweka_amlb_flow.flow_id}')
    assert autoweka_amlb_flow.flow_id == 16116, "! NEW FLOW CREATED UNEXPECTEDLY!"

    rf_flow = openml.flows.get_flow(16117)
    rf_amlb_flow = openml.flows.OpenMLFlow(
        name='automlbenchmark_randomforest',
        description=('Random Forest baseline as set up by the AutoML Benchmark'
                     'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
        components=OrderedDict(randomforest=rf_flow),
        **standard_kwargs
    )
    rf_amlb_flow.publish()
    print(f'rf flow created: {rf_amlb_flow.flow_id}')
    assert rf_amlb_flow.flow_id == 16118, "! NEW FLOW CREATED UNEXPECTEDLY!"

    trf_amlb_flow = openml.flows.OpenMLFlow(
        name='automlbenchmark_tunedrandomforest',
        description=('Tuned Random Forest baseline as set up by the AutoML Benchmark'
                     'Source: source: https://github.com/openml/automlbenchmark/releases/tag/v0.9'),
        components=OrderedDict(randomforest=rf_flow),
        **standard_kwargs