How to use the ampligraph.datasets.AmpligraphDatasetAdapter function in ampligraph

To help you get started, we’ve selected a few ampligraph examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Accenture / AmpliGraph / ampligraph / evaluation / protocol.py View on Github external
dataset_handle.use_mappings(model.rel_to_idx, model.ent_to_idx)
            dataset_handle.set_data(X_test, "test")
        elif isinstance(X, AmpligraphDatasetAdapter):
            dataset_handle = X
        else:
            msg = "X must be either a numpy array or an AmpligraphDatasetAdapter."
            logger.error(msg)
            raise ValueError(msg)

        if filter_triples is not None:
            if isinstance(filter_triples, np.ndarray):
                logger.debug('Getting filtered triples.')
                filter_triples = filter_unseen_entities(filter_triples, model, verbose=verbose, strict=strict)
                dataset_handle.set_filter(filter_triples)
                model.set_filter_for_eval()
            elif isinstance(X, AmpligraphDatasetAdapter):
                if not isinstance(filter_triples, bool):
                    raise Exception('Expected a boolean type')
                if filter_triples is True:
                    model.set_filter_for_eval()
            else:
                raise Exception('Invalid datatype for filter. Expected a numpy array or preset data in the adapter.')

        eval_dict = {'default_protocol': False}

        if use_default_protocol:
            corrupt_side = 's+o'
            eval_dict['default_protocol'] = True

        if entities_subset is not None:
            idx_entities = np.asarray([idx for uri, idx in model.ent_to_idx.items() if uri in entities_subset])
            eval_dict['corruption_entities'] = idx_entities
github Accenture / AmpliGraph / ampligraph / latent_features / models / EmbeddingModel.py View on Github external
"""
        try:
            self.x_valid = self.early_stopping_params['x_valid']

            if isinstance(self.x_valid, np.ndarray):
                if self.x_valid.ndim <= 1 or (np.shape(self.x_valid)[1]) != 3:
                    msg = 'Invalid size for input x_valid. Expected (n,3):  got {}'.format(np.shape(self.x_valid))
                    logger.error(msg)
                    raise ValueError(msg)

                # store the validation data in the data handler
                self.x_valid = to_idx(self.x_valid, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
                self.train_dataset_handle.set_data(self.x_valid, "valid", mapped_status=True)
                self.eval_dataset_handle = self.train_dataset_handle

            elif isinstance(self.x_valid, AmpligraphDatasetAdapter):
                # this assumes that the validation data has already been set in the adapter
                self.eval_dataset_handle = self.x_valid
            else:
                msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, \
                       got {}'.format(type(self.x_valid))
                logger.error(msg)
                raise ValueError(msg)
        except KeyError:
            msg = 'x_valid must be passed for early fitting.'
            logger.error(msg)
            raise KeyError(msg)

        self.early_stopping_criteria = self.early_stopping_params.get(
            'criteria', constants.DEFAULT_CRITERIA_EARLY_STOPPING)
        if self.early_stopping_criteria not in ['hits10', 'hits1', 'hits3',
                                                'mrr']:
github Accenture / AmpliGraph / ampligraph / latent_features / models.py View on Github external
- **'stop_interval'**: int : Stop if criteria is performing worse over n consecutive checks (default: 3)
                - **'corruption_entities'**: List of entities to be used for corruptions. If 'all',
                  it uses all entities (default: 'all')
                - **'corrupt_side'**: Specifies which side to corrupt. 's', 'o', 's+o' (default)

                Example: ``early_stopping_params={x_valid=X['valid'], 'criteria': 'mrr'}``

        """
        self.train_dataset_handle = None
        # try-except block is mainly to handle clean up in case of exception or manual stop in jupyter notebook
        try:
            if isinstance(X, np.ndarray):
                # Adapt the numpy data in the internal format - to generalize
                self.train_dataset_handle = NumpyDatasetAdapter()
                self.train_dataset_handle.set_data(X, "train")
            elif isinstance(X, AmpligraphDatasetAdapter):
                self.train_dataset_handle = X
            else:
                msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, got {}'.format(type(X))
                logger.error(msg)
                raise ValueError(msg)

            # create internal IDs mappings
            self.rel_to_idx, self.ent_to_idx = self.train_dataset_handle.generate_mappings()
            prefetch_batches = 1

            if len(self.ent_to_idx) > ENTITY_THRESHOLD:
                self.dealing_with_large_graphs = True
                prefetch_batches = 0

                logger.warning('Your graph has a large number of distinct entities. '
                               'Found {} distinct entities'.format(len(self.ent_to_idx)))
github Accenture / AmpliGraph / ampligraph / evaluation / protocol.py View on Github external
>>> mrr_score(ranks)
    0.24049691297347323
    >>> hits_at_n_score(ranks, n=10)
    0.4
    """
    dataset_handle = None
    # try-except block is mainly to handle clean up in case of exception or manual stop in jupyter notebook
    try:
        logger.debug('Evaluating the performance of the embedding model.')
        if isinstance(X, np.ndarray):
            X_test = filter_unseen_entities(X, model, verbose=verbose, strict=strict)

            dataset_handle = NumpyDatasetAdapter()
            dataset_handle.use_mappings(model.rel_to_idx, model.ent_to_idx)
            dataset_handle.set_data(X_test, "test")
        elif isinstance(X, AmpligraphDatasetAdapter):
            dataset_handle = X
        else:
            msg = "X must be either a numpy array or an AmpligraphDatasetAdapter."
            logger.error(msg)
            raise ValueError(msg)

        if filter_triples is not None:
            if isinstance(filter_triples, np.ndarray):
                logger.debug('Getting filtered triples.')
                filter_triples = filter_unseen_entities(filter_triples, model, verbose=verbose, strict=strict)
                dataset_handle.set_filter(filter_triples)
                model.set_filter_for_eval()
            elif isinstance(X, AmpligraphDatasetAdapter):
                if not isinstance(filter_triples, bool):
                    raise Exception('Expected a boolean type')
                if filter_triples is True:
github Accenture / AmpliGraph / ampligraph / datasets / sqlite_adapter.py View on Github external
import numpy as np
from ..datasets import AmpligraphDatasetAdapter
import tempfile
import sqlite3
import time
import os
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)


class SQLiteAdapter(AmpligraphDatasetAdapter):
    '''SQLLite adapter
    '''
    def __init__(self, existing_db_name=None, ent_to_idx=None, rel_to_idx=None):
        """Initialize the class variables
        Parameters
        ----------
        existing_db_name : string
            Name of an existing database to use. 
            Assumes that the database has schema as required by the adapter and the persisted data is already mapped
        ent_to_idx : dictionary of mappings
            Mappings of entity to idx
        rel_to_idx : dictionary of mappings
            Mappings of relation to idx
        """
        super(SQLiteAdapter, self).__init__()
        # persistance status of the data
github Accenture / AmpliGraph / ampligraph / latent_features / models / EmbeddingModel.py View on Github external
- **'stop_interval'**: int : Stop if criteria is performing worse over n consecutive checks (default: 3)
                - **'corruption_entities'**: List of entities to be used for corruptions. If 'all',
                  it uses all entities (default: 'all')
                - **'corrupt_side'**: Specifies which side to corrupt. 's', 'o', 's+o' (default)

                Example: ``early_stopping_params={x_valid=X['valid'], 'criteria': 'mrr'}``

        """
        self.train_dataset_handle = None
        # try-except block is mainly to handle clean up in case of exception or manual stop in jupyter notebook
        try:
            if isinstance(X, np.ndarray):
                # Adapt the numpy data in the internal format - to generalize
                self.train_dataset_handle = NumpyDatasetAdapter()
                self.train_dataset_handle.set_data(X, "train")
            elif isinstance(X, AmpligraphDatasetAdapter):
                self.train_dataset_handle = X
            else:
                msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, got {}'.format(type(X))
                logger.error(msg)
                raise ValueError(msg)

            # create internal IDs mappings
            self.rel_to_idx, self.ent_to_idx = self.train_dataset_handle.generate_mappings()
            prefetch_batches = 1

            if len(self.ent_to_idx) > ENTITY_THRESHOLD:
                self.dealing_with_large_graphs = True

                logger.warning('Your graph has a large number of distinct entities. '
                               'Found {} distinct entities'.format(len(self.ent_to_idx)))
github Accenture / AmpliGraph / ampligraph / latent_features / models.py View on Github external
"""
        try:
            self.x_valid = self.early_stopping_params['x_valid']
            
            if isinstance(self.x_valid, np.ndarray):
                if self.x_valid.ndim <= 1 or (np.shape(self.x_valid)[1]) != 3:
                    msg = 'Invalid size for input x_valid. Expected (n,3):  got {}'.format(np.shape(self.x_valid))
                    logger.error(msg)
                    raise ValueError(msg)
                
                # store the validation data in the data handler 
                self.x_valid = to_idx(self.x_valid, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
                self.train_dataset_handle.set_data(self.x_valid, "valid", mapped_status=True)
                self.eval_dataset_handle = self.train_dataset_handle
                
            elif isinstance(self.x_valid, AmpligraphDatasetAdapter):
                # this assumes that the validation data has already been set in the adapter
                self.eval_dataset_handle = self.x_valid
            else:
                msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, \
                       got {}'.format(type(self.x_valid))
                logger.error(msg)
                raise ValueError(msg)
        except KeyError:
            msg = 'x_valid must be passed for early fitting.'
            logger.error(msg)
            raise KeyError(msg)

        self.early_stopping_criteria = self.early_stopping_params.get(
            'criteria', DEFAULT_CRITERIA_EARLY_STOPPING)
        if self.early_stopping_criteria not in ['hits10', 'hits1', 'hits3',
                                                'mrr']:
github Accenture / AmpliGraph / ampligraph / datasets / numpy_adapter.py View on Github external
import numpy as np
from ..datasets import AmpligraphDatasetAdapter, SQLiteAdapter


class NumpyDatasetAdapter(AmpligraphDatasetAdapter):
    def __init__(self):
        """Initialize the class variables
        """
        super(NumpyDatasetAdapter, self).__init__()
        # NumpyDatasetAdapter uses SQLAdapter to filter (if filters are set)
        self.filter_adapter = None
    
    def generate_mappings(self, use_all=False):
        """Generate mappings from either train set or use all dataset to generate mappings
        Parameters
        ----------
        use_all : boolean
            If True, it generates mapping from all the data. If False, it only uses training set to generate mappings
            
        Returns
        -------