Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_register(self):
registry = formats.get_registry()
assert_false(CustomFormat in registry.values())
formats.register('trt', CustomFormat)
assert_in(CustomFormat, registry.values())
assert_in('trt', registry.keys())
def test_detect_csv(self):
with open(CSV_FILE) as fp:
format = formats.detect(fp)
assert_equal(format, formats.CSV)
def test_init_with_custom_format(self):
redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')]
class MockRedisFormat(formats.BaseFormat):
def __init__(self, client, port):
self.client = client
self.port = port
@classmethod
def detect(cls, stream):
return True
def to_iterable(self):
return redis_train
formats.register('redis', MockRedisFormat)
mock_redis = mock.Mock()
cl = NaiveBayesClassifier(mock_redis, format='redis', port=1234)
assert_equal(cl.train_set, redis_train)
def test_register(self):
registry = formats.get_registry()
assert_false(CustomFormat in registry.values())
formats.register('trt', CustomFormat)
assert_in(CustomFormat, registry.values())
assert_in('trt', registry.keys())
with open(JSON_FILE, 'r') as fp:
stream = fp.read()
assert_true(formats.JSON.detect(stream))
with open(CSV_FILE, 'r') as fp:
stream = fp.read()
assert_false(formats.JSON.detect(stream))
def test_to_iterable(self):
with open(JSON_FILE) as fp:
d = formats.JSON(fp)
data = d.to_iterable()
first = data[0]
text, label = first[0], first[1]
assert_true(isinstance(text, unicode))
class CustomFormat(formats.BaseFormat):
def to_iterable():
return [
('I like turtles', 'pos'),
('I hate turtles', 'neg')
]
@classmethod
def detect(cls, stream):
return True
class TestRegistry(unittest.TestCase):
def setUp(self):
pass
def test_register(self):
registry = formats.get_registry()
def test_read_from_filename(self):
with open(CSV_FILE) as fp:
data = formats.CSV(fp)
def _read_data(self, dataset, format=None):
"""Reads a data file and returns and iterable that can be used as
testing or training data."""
# Attempt to detect file format if "format" isn't specified
if not format:
format_class = formats.detect(dataset)
else:
if format not in formats.AVAILABLE.keys():
raise ValueError("'{0}' format not supported.".format(format))
format_class = formats.AVAILABLE[format]
return format_class(dataset).to_iterable()
def _read_data(self, dataset, format=None):
"""Reads a data file and returns an iterable that can be used
as testing or training data.
"""
# Attempt to detect file format if "format" isn't specified
if not format:
format_class = formats.detect(dataset)
if not format_class:
raise FormatError('Could not automatically detect format for the given '
'data source.')
else:
registry = formats.get_registry()
if format not in registry.keys():
raise ValueError("'{0}' format not supported.".format(format))
format_class = registry[format]
return format_class(dataset, **self.format_kwargs).to_iterable()
def _read_data(self, dataset, format=None):
"""Reads a data file and returns an iterable that can be used
as testing or training data.
"""
# Attempt to detect file format if "format" isn't specified
if not format:
format_class = formats.detect(dataset)
if not format_class:
raise FormatError('Could not automatically detect format for the given '
'data source.')
else:
registry = formats.get_registry()
if format not in registry.keys():
raise ValueError("'{0}' format not supported.".format(format))
format_class = registry[format]
return format_class(dataset, **self.format_kwargs).to_iterable()