How to use the mlblocks.datasets.Dataset function in mlblocks

To help you get started, we’ve selected a few mlblocks examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
"""UMLs Dataset.

    The data consists of information about a 135 Graph and the relations between
    their nodes given as a DataFrame with three columns, source, target and type,
    indicating which nodes are related and with which type of link. The target is
    a 1d numpy binary integer array indicating whether the indicated link exists
    or not.
    """
    dataset_path = _load('umls')

    X = _load_csv(dataset_path, 'data')
    y = X.pop('label').values

    graph = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph.gml')))

    return Dataset(load_umls.__doc__, X, y, accuracy_score, stratify=True, graph=graph)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
def load_boston():
    """Boston House Prices Dataset."""
    dataset = datasets.load_boston()
    return Dataset(load_boston.__doc__, dataset.data, dataset.target, r2_score)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
def load_newsgroups():
    """20 News Groups Dataset.

    The data of this dataset is a 1d numpy array vector containing the texts
    from 11314 newsgroups posts, and the target is a 1d numpy integer array
    containing the label of one of the 20 topics that they are about.
    """
    dataset = datasets.fetch_20newsgroups()
    return Dataset(load_newsgroups.__doc__, np.array(dataset.data), dataset.target,
                   accuracy_score, stratify=True)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
def load_jester():
    """Ratings from the Jester Online Joke Recommender System.

    This dataset consists of over 1.7 million instances of (user_id, item_id, rating)
    triples, which is split 50-50 into train and test data.

    source: "University of California Berkeley, CA"
    sourceURI: "http://eigentaste.berkeley.edu/dataset/"
    """

    dataset_path = _load('jester')

    X = _load_csv(dataset_path, 'data')
    y = X.pop('rating').values

    return Dataset(load_jester.__doc__, X, y, r2_score)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
def load_usps():
    """USPs Digits Dataset.

    The data of this dataset is a 3d numpy array vector with shape (224, 224, 3)
    containing 9298 224x224 RGB photos of handwritten digits, and the target is
    a 1d numpy integer array containing the label of the digit represented in
    the image.
    """
    dataset_path = _load('usps')

    df = _load_csv(dataset_path, 'data')
    X = _load_images(os.path.join(dataset_path, 'images'), df.image)
    y = df.label.values

    return Dataset(load_usps.__doc__, X, y, accuracy_score, stratify=True)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
"""Amazon product co-purchasing network and ground-truth communities.

    Network was collected by crawling Amazon website. It is based on Customers Who Bought
    This Item Also Bought feature of the Amazon website. If a product i is frequently
    co-purchased with product j, the graph contains an undirected edge from i to j.
    Each product category provided by Amazon defines each ground-truth community.
    """

    dataset_path = _load('amazon')

    X = _load_csv(dataset_path, 'data')
    y = X.pop('label').values

    graph = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph.gml')))

    return Dataset(load_amazon.__doc__, X, y, normalized_mutual_info_score, graph=graph)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
vocabulary = _load_csv(dataset_path, 'vocabulary', set_index=True)

    entities = {
        'data': (data, 'd3mIndex', None),
        'questions': (questions, 'qIndex', None),
        'sentences': (sentences, 'sIndex', None),
        'vocabulary': (vocabulary, 'index', None)
    }
    relationships = [
        ('questions', 'qIndex', 'data', 'qIndex'),
        ('sentences', 'sIndex', 'data', 'sIndex')
    ]

    target = data.pop('isAnswer').values

    return Dataset(load_wikiqa.__doc__, data, target, accuracy_score, startify=True,
                   entities=entities, relationships=relationships)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
def load_iris():
    """Iris Dataset."""
    dataset = datasets.load_iris()
    return Dataset(load_iris.__doc__, dataset.data, dataset.target,
                   accuracy_score, stratify=True)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
def load_handgeometry():
    """Hand Geometry Dataset.

    The data of this dataset is a 3d numpy array vector with shape (224, 224, 3)
    containing 112 224x224 RGB photos of hands, and the target is a 1d numpy
    float array containing the width of the wrist in centimeters.
    """
    dataset_path = _load('handgeometry')

    df = _load_csv(dataset_path, 'data')
    X = _load_images(os.path.join(dataset_path, 'images'), df.image)
    y = df.target.values

    return Dataset(load_handgeometry.__doc__, X, y, r2_score)
github HDI-Project / MLBlocks / mlblocks / datasets.py View on Github external
y = X.pop('label').values

    graph1 = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph1.gml')))
    graph2 = nx.Graph(nx.read_gml(os.path.join(dataset_path, 'graph2.gml')))

    graph = graph1.copy()
    graph.add_nodes_from(graph2.nodes(data=True))
    graph.add_edges_from(graph2.edges)
    graph.add_edges_from(X[['graph1', 'graph2']].values)

    graphs = {
        'graph1': graph1,
        'graph2': graph2,
    }

    return Dataset(load_dic28.__doc__, X, y, accuracy_score,
                   stratify=True, graph=graph, graphs=graphs)