Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_disable_email(self):
"""
BEFORE: contact Joe Duffy at joe@example.com
AFTER: contact {{NAME}} {{NAME}} at joe@example.com
"""
before, after = self.get_before_after()
import scrubadub
scrubber = scrubadub.Scrubber()
scrubber.remove_detector('email')
self.check_equal(after, scrubber.clean(before))
def test_filth_merge_placeholder(self):
"""filths should be merged into the biggest filth"""
text = "you can skype me at john.doe"
scrubber = scrubadub.Scrubber()
for filth in scrubber.iter_filth(text):
self.assertIsInstance(filth, MergedFilth)
self.assertTrue('SKYPE' in filth.placeholder, filth.placeholder)
self.assertTrue('EMAIL' in filth.placeholder, filth.placeholder)
def test_filth_ordering(self):
"""make sure filth is returned in order"""
scrubber = scrubadub.Scrubber()
text = (
"Alan can be reached by email alan@example.com or "
"phone +1.312.456.6421"
)
order = []
for filth in scrubber.iter_filth(text):
order.append(filth.beg)
order.append(filth.end)
self.assertEqual(sorted(order), order)
def test_customize_filth_identification(self):
"""
BEFORE: contact Joe Duffy at joe@example.com
AFTER: contact <b>NAME</b> <b>NAME</b> at <b>EMAIL</b>
"""
before, after = self.get_before_after()
import scrubadub
prefix = scrubadub.filth.base.Filth.prefix
suffix = scrubadub.filth.base.Filth.suffix
scrubadub.filth.base.Filth.prefix = u'<b>'
scrubadub.filth.base.Filth.suffix = u'</b>'
scrubber = scrubadub.Scrubber()
self.check_equal(after, scrubber.clean(before))
scrubadub.filth.base.Filth.prefix = prefix
scrubadub.filth.base.Filth.suffix = suffix
def test_add_non_detector(self):
"""make sure you can't add a detector that is not a Detector"""
class NotDetector(object):
pass
scrubber = scrubadub.Scrubber()
with self.assertRaises(TypeError):
scrubber.add_detector(NotDetector)
def test_filth_merge(self):
"""filth should merge properly"""
# this looks like an email address 'me at john.doe' and skype
text = "you can skype me at john.doe"
scrubber = scrubadub.Scrubber()
filths = [filth for filth in scrubber.iter_filth(text)]
self.assertEqual(len(filths), 1)
def test_add_duplicate_detector(self):
"""make sure adding a detector that already exists raises an error"""
scrubber = scrubadub.Scrubber()
with self.assertRaises(KeyError):
scrubber.add_detector(scrubadub.detectors.email.EmailDetector)
"""scrubadub ships with a very good method for resolving conflicts between
overlapping pieces of filth. There may be cases where it is necessary to
resolve these conflicts in a customized way to account for additional
information that someone might have.
For example, a user may preferentially want to remove any hint of a name from
text.
"""
import scrubadub
from scrubadub.filth import NameFilth
class MyScrubber(scrubadub.Scrubber):
def resolve_conflicting_filth(self, *filths):
for filth in filths:
if isinstance(filth, NameFilth):
return filth
return super(MyScrubber, self).resolve_conflicting_filth(*filths)
# these methods on a Scrubber object should have identical behavior to the
# scrubadub.clean convenience function
scrubber = MyScrubber()
scrubber.clean(text)
scrubber.clean(text, replace_with="placeholder")
scrubber.clean(text, replace_with="identifier")
scrubber.clean(text, replace_with="surrogate")
a product, then a user should be able to easily adapt how scrubadub identifies
names.
"""
import scrubadub
# fine-tune how scrubadub detects names and omit product names
# https://github.com/deanmalmgren/scrubadub/issues/6
class MyNameDetector(scrubadub.detectors.NameDetector):
def iter_filth(self, text):
for filth in super(MyNameDetector, self).iter_filth(text):
if filth != "iPhone":
yield filth
# instantiate a scrubber and change the name detector to use our custom class
scrubber = scrubadub.Scrubber()
scrubber.detectors['name'] = MyNameDetector()
# these methods have identical on a Scrubber object should have identical
# behavior to the scrubadub.clean convenience function
clean_text = scrubber.clean(text)
clean_text = scrubber.clean(text, replace_with="placeholder")
clean_text = scrubber.clean(text, replace_with="surrogate")
clean_text = scrubber.clean(text, replace_with="identifier", lookup=lookup)