Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def add_detector(self, detector_cls):
"""Add a ``Detector`` to scrubadub"""
if not issubclass(detector_cls, detectors.base.Detector):
raise TypeError((
'"%(detector_cls)s" is not a subclass of Detector'
) % locals())
# TODO: should add tests to make sure filth_cls is actually a proper
# filth_cls
name = detector_cls.filth_cls.type
if name in self._detectors:
raise KeyError((
'can not add Detector "%(name)s"---it already exists. '
'Try removing it first.'
) % locals())
self._detectors[name] = detector_cls()
import re
from .. import exceptions
from ..filth import Filth, RegexFilth
class Detector(object):
filth_cls = None
def iter_filth(self, text):
raise NotImplementedError('must be overridden by base classes')
class RegexDetector(Detector):
def iter_filth(self, text):
if not issubclass(self.filth_cls, RegexFilth):
raise exceptions.UnexpectedFilth(
'RegexFilth required for RegexDetector'
)
if self.filth_cls.regex is None:
raise StopIteration
for match in self.filth_cls.regex.finditer(text):
yield self.filth_cls(match)
import phonenumbers
from .base import Detector
from ..filth import PhoneFilth
class PhoneDetector(Detector):
"""Remove phone numbers from dirty dirty ``text`` using
`python-phonenumbers
`_, a port of a
Google project to correctly format phone numbers in text.
``region`` specifies the best guess region to start with (default:
``"US"``). Specify ``None`` to only consider numbers with a leading
``+`` to be considered.
"""
filth_cls = PhoneFilth
region = 'US'
def iter_filth(self, text):
# create a copy of text to handle multiple phone numbers correctly
for match in phonenumbers.PhoneNumberMatcher(text, self.region):
yield PhoneFilth(
def iter_detector_clss():
"""Iterate over all of the detectors that are included in this sub-package.
This is a convenience method for capturing all new Detectors that are added
over time and it is used both by the unit tests and in the
``Scrubber.__init__`` method.
"""
return iter_subclasses(
os.path.dirname(os.path.abspath(__file__)),
Detector,
_is_abstract_detector,
)