Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _fit_hyper_transformer(self, data):
"""Create and return a new ``rdt.HyperTransformer`` instance.
First get the ``dtypes`` and then use them to build a transformer dictionary
to be used by the ``HyperTransformer``.
Returns:
rdt.HyperTransformer
"""
dtypes = self.get_dtypes(ids=False)
transformers_dict = self._get_transformers(dtypes)
self._hyper_transformer = rdt.HyperTransformer(transformers=transformers_dict)
self._hyper_transformer.fit(data[list(dtypes.keys())])
metavar='FILENAME')
options, args = parser.parse_args()
if options.url is None:
print('You must enter a url to download\n')
parser.print_help()
exit()
if not options.file:
options.file = re.search('[^/]*$', options.url).group(0)
if not options.file or len(options.file) < 1:
print('Invalid file name specified\n')
parser.print_help()
exit()
log.startLogging(sys.stdout)
factory = RTSPClientFactory(options.url, options.file)
factory.protocol = RDTClient
factory.bandwidth = 99999999999
factory.deferred.addCallback(success).addErrback(error)
reactor.connectTCP(factory.host, factory.port, factory)
reactor.callLater(1, progress, factory)
reactor.run()
def _get_transformers(self, dtypes):
"""Create the transformer instances needed to process the given dtypes.
Args:
dtypes (dict):
mapping of field names and dtypes.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformer_templates = {
'i': rdt.transformers.NumericalTransformer(dtype=int),
'f': rdt.transformers.NumericalTransformer(dtype=float),
'O': rdt.transformers.CategoricalTransformer,
'b': rdt.transformers.BooleanTransformer,
'M': rdt.transformers.DatetimeTransformer,
}
transformer_templates.update(self._transformer_templates)
transformers = dict()
for name, dtype in dtypes.items():
transformer_template = transformer_templates[np.dtype(dtype).kind]
if isinstance(transformer_template, type):
transformer = transformer_template()
else:
transformer = copy.deepcopy(transformer_template)
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
Args:
dtypes (dict):
mapping of field names and dtypes.
pii_fields (dict):
mapping of pii field names and categories.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformers_dict = dict()
for name, dtype in dtypes.items():
dtype = np.dtype(dtype)
if dtype.kind == 'i':
transformer = transformers.NumericalTransformer(dtype=int)
elif dtype.kind == 'f':
transformer = transformers.NumericalTransformer(dtype=float)
elif dtype.kind == 'O':
anonymize = pii_fields.get(name)
transformer = transformers.CategoricalTransformer(anonymize=anonymize)
elif dtype.kind == 'b':
transformer = transformers.BooleanTransformer()
elif dtype.kind == 'M':
transformer = transformers.DatetimeTransformer()
else:
raise ValueError('Unsupported dtype: {}'.format(dtype))
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers_dict[name] = transformer
dtypes (dict):
mapping of field names and dtypes.
pii_fields (dict):
mapping of pii field names and categories.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformers_dict = dict()
for name, dtype in dtypes.items():
dtype = np.dtype(dtype)
if dtype.kind == 'i':
transformer = transformers.NumericalTransformer(dtype=int)
elif dtype.kind == 'f':
transformer = transformers.NumericalTransformer(dtype=float)
elif dtype.kind == 'O':
anonymize = pii_fields.get(name)
transformer = transformers.CategoricalTransformer(anonymize=anonymize)
elif dtype.kind == 'b':
transformer = transformers.BooleanTransformer()
elif dtype.kind == 'M':
transformer = transformers.DatetimeTransformer()
else:
raise ValueError('Unsupported dtype: {}'.format(dtype))
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers_dict[name] = transformer
return transformers_dict
mapping of pii field names and categories.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformers_dict = dict()
for name, dtype in dtypes.items():
dtype = np.dtype(dtype)
if dtype.kind == 'i':
transformer = transformers.NumericalTransformer(dtype=int)
elif dtype.kind == 'f':
transformer = transformers.NumericalTransformer(dtype=float)
elif dtype.kind == 'O':
anonymize = pii_fields.get(name)
transformer = transformers.CategoricalTransformer(anonymize=anonymize)
elif dtype.kind == 'b':
transformer = transformers.BooleanTransformer()
elif dtype.kind == 'M':
transformer = transformers.DatetimeTransformer()
else:
raise ValueError('Unsupported dtype: {}'.format(dtype))
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers_dict[name] = transformer
return transformers_dict
def _get_transformers(self, dtypes):
"""Create the transformer instances needed to process the given dtypes.
Args:
dtypes (dict):
mapping of field names and dtypes.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformer_templates = {
'i': rdt.transformers.NumericalTransformer(dtype=int),
'f': rdt.transformers.NumericalTransformer(dtype=float),
'O': rdt.transformers.CategoricalTransformer,
'b': rdt.transformers.BooleanTransformer,
'M': rdt.transformers.DatetimeTransformer,
}
transformer_templates.update(self._transformer_templates)
transformers = dict()
for name, dtype in dtypes.items():
transformer_template = transformer_templates[np.dtype(dtype).kind]
if isinstance(transformer_template, type):
transformer = transformer_template()
else:
transformer = copy.deepcopy(transformer_template)
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers[name] = transformer
},
'categorical_transformer': {
'type': 'str',
'default': 'categoircal_fuzzy',
'description': 'Type of transformer to use for the categorical variables',
'choices': [
'categorical',
'categorical_fuzzy',
'one_hot_encoding',
'label_encoding'
]
}
}
DEFAULT_TRANSFORMER = 'one_hot_encoding'
CATEGORICAL_TRANSFORMERS = {
'categorical': rdt.transformers.CategoricalTransformer(fuzzy=False),
'categorical_fuzzy': rdt.transformers.CategoricalTransformer(fuzzy=True),
'one_hot_encoding': rdt.transformers.OneHotEncodingTransformer,
'label_encoding': rdt.transformers.LabelEncodingTransformer,
}
TRANSFORMER_TEMPLATES = {
'O': rdt.transformers.OneHotEncodingTransformer
}
def __init__(self, distribution=None, categorical_transformer=None, *args, **kwargs):
super().__init__(*args, **kwargs)
if self._metadata is not None and 'model_kwargs' in self._metadata._metadata:
model_kwargs = self._metadata._metadata['model_kwargs']
if distribution is None:
distribution = model_kwargs['distribution']
'categorical_transformer': {
'type': 'str',
'default': 'categoircal_fuzzy',
'description': 'Type of transformer to use for the categorical variables',
'choices': [
'categorical',
'categorical_fuzzy',
'one_hot_encoding',
'label_encoding'
]
}
}
DEFAULT_TRANSFORMER = 'one_hot_encoding'
CATEGORICAL_TRANSFORMERS = {
'categorical': rdt.transformers.CategoricalTransformer(fuzzy=False),
'categorical_fuzzy': rdt.transformers.CategoricalTransformer(fuzzy=True),
'one_hot_encoding': rdt.transformers.OneHotEncodingTransformer,
'label_encoding': rdt.transformers.LabelEncodingTransformer,
}
TRANSFORMER_TEMPLATES = {
'O': rdt.transformers.OneHotEncodingTransformer
}
def __init__(self, distribution=None, categorical_transformer=None, *args, **kwargs):
super().__init__(*args, **kwargs)
if self._metadata is not None and 'model_kwargs' in self._metadata._metadata:
model_kwargs = self._metadata._metadata['model_kwargs']
if distribution is None:
distribution = model_kwargs['distribution']
if categorical_transformer is None:
"""Create the transformer instances needed to process the given dtypes.
Args:
dtypes (dict):
mapping of field names and dtypes.
Returns:
dict:
mapping of field names and transformer instances.
"""
transformer_templates = {
'i': rdt.transformers.NumericalTransformer(dtype=int),
'f': rdt.transformers.NumericalTransformer(dtype=float),
'O': rdt.transformers.CategoricalTransformer,
'b': rdt.transformers.BooleanTransformer,
'M': rdt.transformers.DatetimeTransformer,
}
transformer_templates.update(self._transformer_templates)
transformers = dict()
for name, dtype in dtypes.items():
transformer_template = transformer_templates[np.dtype(dtype).kind]
if isinstance(transformer_template, type):
transformer = transformer_template()
else:
transformer = copy.deepcopy(transformer_template)
LOGGER.info('Loading transformer %s for field %s',
transformer.__class__.__name__, name)
transformers[name] = transformer
return transformers