How to use the openml.utils._create_cache_directory_for_id function in openml

To help you get started, we’ve selected a few openml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openml / openml-python / openml / tasks / functions.py View on Github external
task_id : int or str
        The OpenML task id.
    download_data : bool
        Option to trigger download of data along with the meta data.

    Returns
    -------
    task
    """
    try:
        task_id = int(task_id)
    except (ValueError, TypeError):
        raise ValueError("Dataset ID is neither an Integer nor can be "
                         "cast to an Integer.")

    tid_cache_dir = openml.utils._create_cache_directory_for_id(
        TASKS_CACHE_DIR_NAME, task_id,
    )

    try:
        task = _get_task_description(task_id)
        dataset = get_dataset(task.dataset_id, download_data)
        # List of class labels availaible in dataset description
        # Including class labels as part of task meta data handles
        #   the case where data download was initially disabled
        if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)):
            task.class_labels = \
                dataset.retrieve_class_labels(task.target_name)
        # Clustering tasks do not have class labels
        # and do not offer download_split
        if download_data:
            if isinstance(task, OpenMLSupervisedTask):
github openml / openml-python / openml / datasets / functions.py View on Github external
Returns
    -------
    dataset : :class:`openml.OpenMLDataset`
        The downloaded dataset.
    """
    if isinstance(dataset_id, str):
        try:
            dataset_id = int(dataset_id)
        except ValueError:
            dataset_id = _name_to_id(dataset_id, version, error_if_multiple)  # type: ignore
    elif not isinstance(dataset_id, int):
        raise TypeError("`dataset_id` must be one of `str` or `int`, not {}."
                        .format(type(dataset_id)))

    did_cache_dir = _create_cache_directory_for_id(
        DATASETS_CACHE_DIR_NAME, dataset_id,
    )

    try:
        remove_dataset_cache = True
        description = _get_dataset_description(did_cache_dir, dataset_id)
        features = _get_dataset_features(did_cache_dir, dataset_id)

        try:
            qualities = _get_dataset_qualities(did_cache_dir, dataset_id)
        except OpenMLServerException as e:
            if e.code == 362 and str(e) == 'No qualities found - None':
                logger.warning("No qualities found for dataset {}".format(dataset_id))
                qualities = None
            else:
                raise
github openml / openml-python / openml / datasets / functions.py View on Github external
def _get_cached_dataset_description(dataset_id):
    did_cache_dir = _create_cache_directory_for_id(
        DATASETS_CACHE_DIR_NAME, dataset_id,
    )
    description_file = os.path.join(did_cache_dir, "description.xml")
    try:
        with io.open(description_file, encoding='utf8') as fh:
            dataset_xml = fh.read()
        return xmltodict.parse(dataset_xml)["oml:data_set_description"]
    except (IOError, OSError):
        raise OpenMLCacheException(
            "Dataset description for dataset id %d not "
            "cached" % dataset_id)
github openml / openml-python / openml / flows / functions.py View on Github external
Parameters
    ----------
    flow_id : int
        The OpenML flow id.

    Returns
    -------
    OpenMLFlow
    """
    try:
        return _get_cached_flow(flow_id)
    except OpenMLCacheException:

        xml_file = os.path.join(
            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
            "flow.xml",
        )

        flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method='get')
        with io.open(xml_file, "w", encoding='utf8') as fh:
            fh.write(flow_xml)

        return _create_flow_from_xml(flow_xml)
github openml / openml-python / openml / tasks / functions.py View on Github external
def _get_task_description(task_id):

    try:
        return _get_cached_task(task_id)
    except OpenMLCacheException:
        xml_file = os.path.join(
            openml.utils._create_cache_directory_for_id(
                TASKS_CACHE_DIR_NAME,
                task_id,
            ),
            "task.xml",
        )
        task_xml = openml._api_calls._perform_api_call("task/%d" % task_id,
                                                       'get')

        with io.open(xml_file, "w", encoding='utf8') as fh:
            fh.write(task_xml)
        return _create_task_from_xml(task_xml)
github openml / openml-python / openml / runs / functions.py View on Github external
Parameters
    ----------
    run_id : int

    ignore_cache : bool
        Whether to ignore the cache. If ``true`` this will download and overwrite the run xml
        even if the requested run is already cached.

    ignore_cache

    Returns
    -------
    run : OpenMLRun
        Run corresponding to ID, fetched from the server.
    """
    run_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME,
                                                          run_id)
    run_file = os.path.join(run_dir, "description.xml")

    if not os.path.exists(run_dir):
        os.makedirs(run_dir)

    try:
        if not ignore_cache:
            return _get_cached_run(run_id)
        else:
            raise OpenMLCacheException(message='dummy')

    except OpenMLCacheException:
        run_xml = openml._api_calls._perform_api_call("run/%d" % run_id, 'get')
        with io.open(run_file, "w", encoding='utf8') as fh:
            fh.write(run_xml)
github openml / openml-python / openml / datasets / functions.py View on Github external
output_filename : string
        Location of ARFF file.
    """
    if isinstance(description, dict):
        md5_checksum_fixture = description.get("oml:md5_checksum")
        url = description['oml:url']
        did = description.get('oml:id')
    elif isinstance(description, OpenMLDataset):
        md5_checksum_fixture = description.md5_checksum
        url = description.url
        did = description.dataset_id
    else:
        raise TypeError("`description` should be either OpenMLDataset or Dict.")

    if cache_directory is None:
        cache_directory = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, did)
    output_file_path = os.path.join(cache_directory, "dataset.arff")

    try:
        openml._api_calls._download_text_file(
            source=url,
            output_path=output_file_path,
            md5_checksum=md5_checksum_fixture
        )
    except OpenMLHashException as e:
        additional_info = " Raised when downloading dataset {}.".format(did)
        e.args = (e.args[0] + additional_info,)
        raise

    return output_file_path
github openml / openml-python / openml / flows / functions.py View on Github external
def _get_cached_flow(fid: int) -> OpenMLFlow:
    """Get the cached flow with the given id.

    Parameters
    ----------
    fid : int
        Flow id.

    Returns
    -------
    OpenMLFlow.
    """

    fid_cache_dir = openml.utils._create_cache_directory_for_id(
        FLOWS_CACHE_DIR_NAME,
        fid
    )
    flow_file = os.path.join(fid_cache_dir, "flow.xml")

    try:
        with io.open(flow_file, encoding='utf8') as fh:
            return _create_flow_from_xml(fh.read())
    except (OSError, IOError):
        openml.utils._remove_cache_dir_for_id(FLOWS_CACHE_DIR_NAME, fid_cache_dir)
        raise OpenMLCacheException("Flow file for fid %d not "
                                   "cached" % fid)