Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dataset_handle.use_mappings(model.rel_to_idx, model.ent_to_idx)
dataset_handle.set_data(X_test, "test")
elif isinstance(X, AmpligraphDatasetAdapter):
dataset_handle = X
else:
msg = "X must be either a numpy array or an AmpligraphDatasetAdapter."
logger.error(msg)
raise ValueError(msg)
if filter_triples is not None:
if isinstance(filter_triples, np.ndarray):
logger.debug('Getting filtered triples.')
filter_triples = filter_unseen_entities(filter_triples, model, verbose=verbose, strict=strict)
dataset_handle.set_filter(filter_triples)
model.set_filter_for_eval()
elif isinstance(X, AmpligraphDatasetAdapter):
if not isinstance(filter_triples, bool):
raise Exception('Expected a boolean type')
if filter_triples is True:
model.set_filter_for_eval()
else:
raise Exception('Invalid datatype for filter. Expected a numpy array or preset data in the adapter.')
eval_dict = {'default_protocol': False}
if use_default_protocol:
corrupt_side = 's+o'
eval_dict['default_protocol'] = True
if entities_subset is not None:
idx_entities = np.asarray([idx for uri, idx in model.ent_to_idx.items() if uri in entities_subset])
eval_dict['corruption_entities'] = idx_entities
"""
try:
self.x_valid = self.early_stopping_params['x_valid']
if isinstance(self.x_valid, np.ndarray):
if self.x_valid.ndim <= 1 or (np.shape(self.x_valid)[1]) != 3:
msg = 'Invalid size for input x_valid. Expected (n,3): got {}'.format(np.shape(self.x_valid))
logger.error(msg)
raise ValueError(msg)
# store the validation data in the data handler
self.x_valid = to_idx(self.x_valid, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
self.train_dataset_handle.set_data(self.x_valid, "valid", mapped_status=True)
self.eval_dataset_handle = self.train_dataset_handle
elif isinstance(self.x_valid, AmpligraphDatasetAdapter):
# this assumes that the validation data has already been set in the adapter
self.eval_dataset_handle = self.x_valid
else:
msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, \
got {}'.format(type(self.x_valid))
logger.error(msg)
raise ValueError(msg)
except KeyError:
msg = 'x_valid must be passed for early fitting.'
logger.error(msg)
raise KeyError(msg)
self.early_stopping_criteria = self.early_stopping_params.get(
'criteria', constants.DEFAULT_CRITERIA_EARLY_STOPPING)
if self.early_stopping_criteria not in ['hits10', 'hits1', 'hits3',
'mrr']:
- **'stop_interval'**: int : Stop if criteria is performing worse over n consecutive checks (default: 3)
- **'corruption_entities'**: List of entities to be used for corruptions. If 'all',
it uses all entities (default: 'all')
- **'corrupt_side'**: Specifies which side to corrupt. 's', 'o', 's+o' (default)
Example: ``early_stopping_params={x_valid=X['valid'], 'criteria': 'mrr'}``
"""
self.train_dataset_handle = None
# try-except block is mainly to handle clean up in case of exception or manual stop in jupyter notebook
try:
if isinstance(X, np.ndarray):
# Adapt the numpy data in the internal format - to generalize
self.train_dataset_handle = NumpyDatasetAdapter()
self.train_dataset_handle.set_data(X, "train")
elif isinstance(X, AmpligraphDatasetAdapter):
self.train_dataset_handle = X
else:
msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, got {}'.format(type(X))
logger.error(msg)
raise ValueError(msg)
# create internal IDs mappings
self.rel_to_idx, self.ent_to_idx = self.train_dataset_handle.generate_mappings()
prefetch_batches = 1
if len(self.ent_to_idx) > ENTITY_THRESHOLD:
self.dealing_with_large_graphs = True
prefetch_batches = 0
logger.warning('Your graph has a large number of distinct entities. '
'Found {} distinct entities'.format(len(self.ent_to_idx)))
>>> mrr_score(ranks)
0.24049691297347323
>>> hits_at_n_score(ranks, n=10)
0.4
"""
dataset_handle = None
# try-except block is mainly to handle clean up in case of exception or manual stop in jupyter notebook
try:
logger.debug('Evaluating the performance of the embedding model.')
if isinstance(X, np.ndarray):
X_test = filter_unseen_entities(X, model, verbose=verbose, strict=strict)
dataset_handle = NumpyDatasetAdapter()
dataset_handle.use_mappings(model.rel_to_idx, model.ent_to_idx)
dataset_handle.set_data(X_test, "test")
elif isinstance(X, AmpligraphDatasetAdapter):
dataset_handle = X
else:
msg = "X must be either a numpy array or an AmpligraphDatasetAdapter."
logger.error(msg)
raise ValueError(msg)
if filter_triples is not None:
if isinstance(filter_triples, np.ndarray):
logger.debug('Getting filtered triples.')
filter_triples = filter_unseen_entities(filter_triples, model, verbose=verbose, strict=strict)
dataset_handle.set_filter(filter_triples)
model.set_filter_for_eval()
elif isinstance(X, AmpligraphDatasetAdapter):
if not isinstance(filter_triples, bool):
raise Exception('Expected a boolean type')
if filter_triples is True:
import numpy as np
from ..datasets import AmpligraphDatasetAdapter
import tempfile
import sqlite3
import time
import os
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
class SQLiteAdapter(AmpligraphDatasetAdapter):
'''SQLLite adapter
'''
def __init__(self, existing_db_name=None, ent_to_idx=None, rel_to_idx=None):
"""Initialize the class variables
Parameters
----------
existing_db_name : string
Name of an existing database to use.
Assumes that the database has schema as required by the adapter and the persisted data is already mapped
ent_to_idx : dictionary of mappings
Mappings of entity to idx
rel_to_idx : dictionary of mappings
Mappings of relation to idx
"""
super(SQLiteAdapter, self).__init__()
# persistance status of the data
- **'stop_interval'**: int : Stop if criteria is performing worse over n consecutive checks (default: 3)
- **'corruption_entities'**: List of entities to be used for corruptions. If 'all',
it uses all entities (default: 'all')
- **'corrupt_side'**: Specifies which side to corrupt. 's', 'o', 's+o' (default)
Example: ``early_stopping_params={x_valid=X['valid'], 'criteria': 'mrr'}``
"""
self.train_dataset_handle = None
# try-except block is mainly to handle clean up in case of exception or manual stop in jupyter notebook
try:
if isinstance(X, np.ndarray):
# Adapt the numpy data in the internal format - to generalize
self.train_dataset_handle = NumpyDatasetAdapter()
self.train_dataset_handle.set_data(X, "train")
elif isinstance(X, AmpligraphDatasetAdapter):
self.train_dataset_handle = X
else:
msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, got {}'.format(type(X))
logger.error(msg)
raise ValueError(msg)
# create internal IDs mappings
self.rel_to_idx, self.ent_to_idx = self.train_dataset_handle.generate_mappings()
prefetch_batches = 1
if len(self.ent_to_idx) > ENTITY_THRESHOLD:
self.dealing_with_large_graphs = True
logger.warning('Your graph has a large number of distinct entities. '
'Found {} distinct entities'.format(len(self.ent_to_idx)))
"""
try:
self.x_valid = self.early_stopping_params['x_valid']
if isinstance(self.x_valid, np.ndarray):
if self.x_valid.ndim <= 1 or (np.shape(self.x_valid)[1]) != 3:
msg = 'Invalid size for input x_valid. Expected (n,3): got {}'.format(np.shape(self.x_valid))
logger.error(msg)
raise ValueError(msg)
# store the validation data in the data handler
self.x_valid = to_idx(self.x_valid, ent_to_idx=self.ent_to_idx, rel_to_idx=self.rel_to_idx)
self.train_dataset_handle.set_data(self.x_valid, "valid", mapped_status=True)
self.eval_dataset_handle = self.train_dataset_handle
elif isinstance(self.x_valid, AmpligraphDatasetAdapter):
# this assumes that the validation data has already been set in the adapter
self.eval_dataset_handle = self.x_valid
else:
msg = 'Invalid type for input X. Expected ndarray/AmpligraphDataset object, \
got {}'.format(type(self.x_valid))
logger.error(msg)
raise ValueError(msg)
except KeyError:
msg = 'x_valid must be passed for early fitting.'
logger.error(msg)
raise KeyError(msg)
self.early_stopping_criteria = self.early_stopping_params.get(
'criteria', DEFAULT_CRITERIA_EARLY_STOPPING)
if self.early_stopping_criteria not in ['hits10', 'hits1', 'hits3',
'mrr']:
import numpy as np
from ..datasets import AmpligraphDatasetAdapter, SQLiteAdapter
class NumpyDatasetAdapter(AmpligraphDatasetAdapter):
def __init__(self):
"""Initialize the class variables
"""
super(NumpyDatasetAdapter, self).__init__()
# NumpyDatasetAdapter uses SQLAdapter to filter (if filters are set)
self.filter_adapter = None
def generate_mappings(self, use_all=False):
"""Generate mappings from either train set or use all dataset to generate mappings
Parameters
----------
use_all : boolean
If True, it generates mapping from all the data. If False, it only uses training set to generate mappings
Returns
-------