How to use the scrubadub.detectors.base.Detector function in scrubadub

To help you get started, we’ve selected a few scrubadub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datascopeanalytics / scrubadub / scrubadub / scrubbers.py View on Github external
def add_detector(self, detector_cls):
        """Add a ``Detector`` to scrubadub"""
        if not issubclass(detector_cls, detectors.base.Detector):
            raise TypeError((
                '"%(detector_cls)s" is not a subclass of Detector'
            ) % locals())
        # TODO: should add tests to make sure filth_cls is actually a proper
        # filth_cls
        name = detector_cls.filth_cls.type
        if name in self._detectors:
            raise KeyError((
                'can not add Detector "%(name)s"---it already exists. '
                'Try removing it first.'
            ) % locals())
        self._detectors[name] = detector_cls()
github datascopeanalytics / scrubadub / scrubadub / detectors / base.py View on Github external
import re

from .. import exceptions
from ..filth import Filth, RegexFilth


class Detector(object):
    filth_cls = None

    def iter_filth(self, text):
        raise NotImplementedError('must be overridden by base classes')


class RegexDetector(Detector):

    def iter_filth(self, text):
        if not issubclass(self.filth_cls, RegexFilth):
            raise exceptions.UnexpectedFilth(
                'RegexFilth required for RegexDetector'
            )
        if self.filth_cls.regex is None:
            raise StopIteration
        for match in self.filth_cls.regex.finditer(text):
            yield self.filth_cls(match)
github datascopeanalytics / scrubadub / scrubadub / detectors / phone.py View on Github external
import phonenumbers

from .base import Detector
from ..filth import PhoneFilth


class PhoneDetector(Detector):
    """Remove phone numbers from dirty dirty ``text`` using
    `python-phonenumbers
    `_, a port of a
    Google project to correctly format phone numbers in text.

    ``region`` specifies the best guess region to start with (default:
    ``"US"``). Specify ``None`` to only consider numbers with a leading
    ``+`` to be considered.
    """
    filth_cls = PhoneFilth
    region = 'US'

    def iter_filth(self, text):
        # create a copy of text to handle multiple phone numbers correctly
        for match in phonenumbers.PhoneNumberMatcher(text, self.region):
            yield PhoneFilth(
github datascopeanalytics / scrubadub / scrubadub / detectors / __init__.py View on Github external
def iter_detector_clss():
    """Iterate over all of the detectors that are included in this sub-package.
    This is a convenience method for capturing all new Detectors that are added
    over time and it is used both by the unit tests and in the
    ``Scrubber.__init__`` method.
    """
    return iter_subclasses(
        os.path.dirname(os.path.abspath(__file__)),
        Detector,
        _is_abstract_detector,
    )