Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_detector_filth_cls(self):
"""Detector.filth_cls should always exist"""
for name, detector_cls in scrubadub.detectors.types.iteritems():
self.assertTrue(getattr(detector_cls, 'filth_cls', False),
'%s does not have a filth_cls set' % detector_cls
)
def test_add_duplicate_detector(self):
"""make sure adding a detector that already exists raises an error"""
scrubber = scrubadub.Scrubber()
with self.assertRaises(KeyError):
scrubber.add_detector(scrubadub.detectors.email.EmailDetector)
"""scrubadub has some very conservative defaults (high recall) for identifying
filth. One of the key ways in which scrubadub can be customized is in improving
the precision of filth detection.
For example, if a user knows that the word 'iPhone' is not a person's name, but
a product, then a user should be able to easily adapt how scrubadub identifies
names.
"""
import scrubadub
# fine-tune how scrubadub detects names and omit product names
# https://github.com/deanmalmgren/scrubadub/issues/6
class MyNameDetector(scrubadub.detectors.NameDetector):
def iter_filth(self, text):
for filth in super(MyNameDetector, self).iter_filth(text):
if filth != "iPhone":
yield filth
# instantiate a scrubber and change the name detector to use our custom class
scrubber = scrubadub.Scrubber()
scrubber.detectors['name'] = MyNameDetector()
# these methods have identical on a Scrubber object should have identical
# behavior to the scrubadub.clean convenience function
clean_text = scrubber.clean(text)
clean_text = scrubber.clean(text, replace_with="placeholder")
clean_text = scrubber.clean(text, replace_with="surrogate")
clean_text = scrubber.clean(text, replace_with="identifier", lookup=lookup)
def __init__(self, *args, **kwargs):
super(Scrubber, self).__init__(*args, **kwargs)
# instantiate all of the detectors which, by default, uses all of the
# detectors that are in the detectors.types dictionary
self._detectors = {}
for detector_cls in detectors.iter_detector_clss():
self.add_detector(detector_cls)