How to use the textblob.formats function in textblob

To help you get started, we’ve selected a few textblob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sloria / TextBlob / tests / test_formats.py View on Github external
def test_register(self):
        registry = formats.get_registry()
        assert_false(CustomFormat in registry.values())

        formats.register('trt', CustomFormat)

        assert_in(CustomFormat, registry.values())
        assert_in('trt', registry.keys())
github sloria / TextBlob / tests / test_formats.py View on Github external
def test_detect_csv(self):
        with open(CSV_FILE) as fp:
            format = formats.detect(fp)
        assert_equal(format, formats.CSV)
github sloria / TextBlob / tests / test_classifiers.py View on Github external
def test_init_with_custom_format(self):
        redis_train = [('I like turtles', 'pos'), ('I hate turtles', 'neg')]

        class MockRedisFormat(formats.BaseFormat):
            def __init__(self, client, port):
                self.client = client
                self.port = port

            @classmethod
            def detect(cls, stream):
                return True

            def to_iterable(self):
                return redis_train

        formats.register('redis', MockRedisFormat)
        mock_redis = mock.Mock()
        cl = NaiveBayesClassifier(mock_redis, format='redis', port=1234)
        assert_equal(cl.train_set, redis_train)
github sloria / TextBlob / tests / test_formats.py View on Github external
def test_register(self):
        registry = formats.get_registry()
        assert_false(CustomFormat in registry.values())

        formats.register('trt', CustomFormat)

        assert_in(CustomFormat, registry.values())
        assert_in('trt', registry.keys())
github sloria / TextBlob / tests / test_formats.py View on Github external
with open(JSON_FILE, 'r') as fp:
            stream = fp.read()
            assert_true(formats.JSON.detect(stream))
        with open(CSV_FILE, 'r') as fp:
            stream = fp.read()
            assert_false(formats.JSON.detect(stream))

    def test_to_iterable(self):
        with open(JSON_FILE) as fp:
            d = formats.JSON(fp)
        data = d.to_iterable()
        first = data[0]
        text, label = first[0], first[1]
        assert_true(isinstance(text, unicode))

class CustomFormat(formats.BaseFormat):
    def to_iterable():
        return [
            ('I like turtles', 'pos'),
            ('I hate turtles', 'neg')
        ]
    @classmethod
    def detect(cls, stream):
        return True


class TestRegistry(unittest.TestCase):
    def setUp(self):
        pass

    def test_register(self):
        registry = formats.get_registry()
github sloria / TextBlob / tests / test_formats.py View on Github external
def test_read_from_filename(self):
        with open(CSV_FILE) as fp:
            data = formats.CSV(fp)
github markuskiller / textblob-de / textblob_de / classifiers.py View on Github external
def _read_data(self, dataset, format=None):
        """Reads a data file and returns and iterable that can be used as
        testing or training data."""
        # Attempt to detect file format if "format" isn't specified
        if not format:
            format_class = formats.detect(dataset)
        else:
            if format not in formats.AVAILABLE.keys():
                raise ValueError("'{0}' format not supported.".format(format))
            format_class = formats.AVAILABLE[format]
        return format_class(dataset).to_iterable()
github sloria / TextBlob / textblob / classifiers.py View on Github external
def _read_data(self, dataset, format=None):
        """Reads a data file and returns an iterable that can be used
        as testing or training data.
        """
        # Attempt to detect file format if "format" isn't specified
        if not format:
            format_class = formats.detect(dataset)
            if not format_class:
                raise FormatError('Could not automatically detect format for the given '
                                  'data source.')
        else:
            registry = formats.get_registry()
            if format not in registry.keys():
                raise ValueError("'{0}' format not supported.".format(format))
            format_class = registry[format]
        return format_class(dataset, **self.format_kwargs).to_iterable()
github sloria / TextBlob / textblob / classifiers.py View on Github external
def _read_data(self, dataset, format=None):
        """Reads a data file and returns an iterable that can be used
        as testing or training data.
        """
        # Attempt to detect file format if "format" isn't specified
        if not format:
            format_class = formats.detect(dataset)
            if not format_class:
                raise FormatError('Could not automatically detect format for the given '
                                  'data source.')
        else:
            registry = formats.get_registry()
            if format not in registry.keys():
                raise ValueError("'{0}' format not supported.".format(format))
            format_class = registry[format]
        return format_class(dataset, **self.format_kwargs).to_iterable()