How to use scrubadub - 10 common examples

To help you get started, we’ve selected a few scrubadub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datascopeanalytics / scrubadub / tests / test_canonical_string_set.py View on Github external
def test_init(self):
        """make sure that lower case casting works in __init__"""
        s = CanonicalStringSet(['TKTK', 'tKtK', 'Tktk'])
        self.assertTrue('tktk' in s)
        self.assertEqual(len(s), 1)
github datascopeanalytics / scrubadub / tests / test_canonical_string_set.py View on Github external
def test_add(self):
        """make sure that lower case casting works in add"""
        s = CanonicalStringSet()
        s.add('TKTK')
        s.add('tKtK')
        s.add('Tktk')
        self.assertTrue('tktk' in s)
        self.assertEqual(len(s), 1)
github datascopeanalytics / scrubadub / tests / test_canonical_string_set.py View on Github external
def test_update(self):
        """make sure lower case casting works in update"""
        s = CanonicalStringSet()
        s.update(['TKTK', 'tKtK', 'Tktk'])
        self.assertTrue('tktk' in s)
        self.assertEqual(len(s), 1)
github datascopeanalytics / scrubadub / tests / test_canonical_string_set.py View on Github external
def test_contains(self):
        """make sure __contains__ casts things properly"""
        s = CanonicalStringSet(['tktk'])
        self.assertTrue('TKTK' in s)
        self.assertTrue('Tktk' in s)
        self.assertTrue('tKtK' in s)
github datascopeanalytics / scrubadub / tests / test_canonical_string_set.py View on Github external
def test_remove(self):
        """make sure remove works properly"""
        s = CanonicalStringSet(['tktk'])
        s.remove('TKTK')
        self.assertFalse('tktk' in s)
github datascopeanalytics / scrubadub / tests / test_canonical_string_set.py View on Github external
def test_discard(self):
        """make sure discard works properly"""
        s = CanonicalStringSet(['tktk'])
        s.discard('TKTK')
        s.discard('TkTk')
        s.discard('Tktk')
        self.assertFalse('tktk' in s)
github datascopeanalytics / scrubadub / tests / test_advanced.py View on Github external
def test_disable_email(self):
        """
        BEFORE: contact Joe Duffy at joe@example.com
        AFTER:  contact {{NAME}} {{NAME}} at joe@example.com
        """
        before, after = self.get_before_after()
        import scrubadub
        scrubber = scrubadub.Scrubber()
        scrubber.remove_detector('email')
        self.check_equal(after, scrubber.clean(before))
github datascopeanalytics / scrubadub / tests / test_scrubbers.py View on Github external
def test_filth_merge_placeholder(self):
        """filths should be merged into the biggest filth"""
        text = "you can skype me at john.doe"
        scrubber = scrubadub.Scrubber()
        for filth in scrubber.iter_filth(text):
            self.assertIsInstance(filth, MergedFilth)
            self.assertTrue('SKYPE' in filth.placeholder, filth.placeholder)
            self.assertTrue('EMAIL' in filth.placeholder, filth.placeholder)
github datascopeanalytics / scrubadub / tests / test_scrubbers.py View on Github external
def test_filth_ordering(self):
        """make sure filth is returned in order"""
        scrubber = scrubadub.Scrubber()
        text = (
            "Alan can be reached by email alan@example.com or "
            "phone +1.312.456.6421"
        )
        order = []
        for filth in scrubber.iter_filth(text):
            order.append(filth.beg)
            order.append(filth.end)
        self.assertEqual(sorted(order), order)
github datascopeanalytics / scrubadub / tests / test_advanced.py View on Github external
def test_customize_filth_identification(self):
        """
        BEFORE: contact Joe Duffy at joe@example.com
        AFTER:  contact <b>NAME</b> <b>NAME</b> at <b>EMAIL</b>
        """
        before, after = self.get_before_after()
        import scrubadub
        prefix = scrubadub.filth.base.Filth.prefix
        suffix = scrubadub.filth.base.Filth.suffix
        scrubadub.filth.base.Filth.prefix = u'<b>'
        scrubadub.filth.base.Filth.suffix = u'</b>'
        scrubber = scrubadub.Scrubber()
        self.check_equal(after, scrubber.clean(before))
        scrubadub.filth.base.Filth.prefix = prefix
        scrubadub.filth.base.Filth.suffix = suffix