Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_compositional_feature_1():
class FakeFeaturizer(BaseCompositionFeaturizer):
@property
def feature_labels(self):
return ['min:' + s for s in self._elements]
def mix_function(self, elems, nums):
elems_ = self._elements.loc[elems, :]
w_ = nums / np.sum(nums)
return w_.dot(elems_)
desc = FakeFeaturizer(n_jobs=1)
tmp = desc.fit_transform([{'H': 2}])
assert isinstance(tmp, list)
with pytest.raises(KeyError):
"""
super().__init__(n_jobs=n_jobs,
on_errors=on_errors,
return_type=return_type)
def mix_function(self, elems, _):
elems_ = self._elements.loc[elems, :]
return elems_.max().values
@property
def feature_labels(self):
return ['max:' + s for s in self._elements]
class MinPooling(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
on_errors=on_errors,
return_type=return_type)
def mix_function(self, elems, nums):
elems_ = self._elements.loc[elems, :].values
w_ = nums / np.sum(nums)
mean_ = w_.dot(elems_)
var_ = elems_ - mean_
return w_.dot(var_**2)
@property
def feature_labels(self):
return ['var:' + s for s in self._elements]
class MaxPooling(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
super().__init__(n_jobs=n_jobs,
on_errors=on_errors,
return_type=return_type)
def mix_function(self, elems, nums):
elems_ = self._elements.loc[elems, :].values
w_ = np.array(nums)
return w_.dot(elems_)
@property
def feature_labels(self):
return ['sum:' + s for s in self._elements]
class GeometricMean(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
on_errors=on_errors,
return_type=return_type)
def mix_function(self, elems, nums):
elems_ = 1 / self._elements.loc[elems, :].values
w_ = np.array(nums)
tmp = w_.dot(elems_)
return sum(w_) / tmp
@property
def feature_labels(self):
return ['hmean:' + s for s in self._elements]
class WeightedVariance(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
def mix_function(self, elems, nums):
vec = np.zeros(len(self._elems), dtype=np.int)
for i, e in enumerate(elems):
if self.one_hot_vec:
vec[self._elems.index(e)] = 1
else:
vec[self._elems.index(e)] = nums[i]
return vec
@property
def feature_labels(self):
return self._elems
class WeightedAverage(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
super().__init__(n_jobs=n_jobs,
on_errors=on_errors,
return_type=return_type)
def mix_function(self, elems, nums):
elems_ = self._elements.loc[elems, :].values
w_ = np.array(nums).reshape(-1, 1)
tmp = elems_**w_
return np.power(tmp.prod(axis=0), 1 / sum(w_))
@property
def feature_labels(self):
return ['gmean:' + s for s in self._elements]
class HarmonicMean(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
super().__init__(n_jobs=n_jobs,
on_errors=on_errors,
return_type=return_type)
def mix_function(self, elems, nums):
elems_ = self._elements.loc[elems, :].values
w_ = nums / np.sum(nums)
return w_.dot(elems_)
@property
def feature_labels(self):
return ['ave:' + s for s in self._elements]
class WeightedSum(BaseCompositionFeaturizer):
def __init__(self, *, n_jobs=-1, on_errors='raise', return_type='any'):
"""
Parameters
----------
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.
on_errors: string
How to handle exceptions in feature calculations. Can be 'nan', 'keep', 'raise'.
When 'nan', return a column with ``np.nan``.
The length of column corresponding to the number of feature labs.
When 'keep', return a column with exception objects.
The default is 'raise' which will raise up the exception.
return_type: str
# license that can be found in the LICENSE file.
from typing import Union, List
import numpy as np
from xenonpy.descriptor.base import BaseDescriptor, BaseCompositionFeaturizer
__all__ = [
'Compositions', 'Counting', 'WeightedAverage', 'WeightedSum',
'WeightedVariance', 'HarmonicMean', 'GeometricMean', 'MaxPooling',
'MinPooling'
]
class Counting(BaseCompositionFeaturizer):
def __init__(self,
*,
one_hot_vec=False,
n_jobs=-1,
on_errors='raise',
return_type='any'):
"""
Parameters
----------
one_hot_vec : bool
Set ``true`` to using one-hot-vector encoding.
n_jobs: int
The number of jobs to run in parallel for both fit and predict.
Set -1 to use all cpu cores (default).
Inputs ``X`` will be split into some blocks then run on each cpu cores.