How to use the scrubadub.filth.base.Filth function in scrubadub

To help you get started, we’ve selected a few scrubadub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datascopeanalytics / scrubadub / tests / test_detector.py View on Github external
def test_regex_filth(self):
        """make sure RegexDetector only works with RegexFilth"""

        class MyFilth(Filth):
            pass

        class MyDetector(RegexDetector):
            filth_cls = MyFilth

        text = 'dirty dirty text'
        detector = MyDetector()
        with self.assertRaises(UnexpectedFilth):
            for filth in detector.iter_filth(text):
                pass
github datascopeanalytics / scrubadub / scrubadub / filth / phone.py View on Github external
from .base import Filth


class PhoneFilth(Filth):
    type = 'phone'
github datascopeanalytics / scrubadub / scrubadub / filth / base.py View on Github external
def replace_with(self, replace_with='placeholder', **kwargs):
        if replace_with == 'placeholder':
            return self.prefix + self.placeholder + self.suffix
        # elif replace_with == 'surrogate':
        #     raise NotImplementedError
        elif replace_with == 'identifier':
            return self.prefix + self.identifier + self.suffix
        else:
            raise exceptions.InvalidReplaceWith(replace_with)

    def merge(self, other_filth):
        return MergedFilth(self, other_filth)


class MergedFilth(Filth):
    """This class takes care of merging different types of filth"""

    def __init__(self, a_filth, b_filth):
        super(MergedFilth, self).__init__(
            beg=a_filth.beg,
            end=a_filth.end,
            text=a_filth.text,
        )
        self.filths = [a_filth]
        self._update_content(b_filth)

    def _update_content(self, other_filth):
        """this updates the bounds, text and placeholder for the merged
        filth
        """
        if self.end < other_filth.beg or other_filth.end < self.beg:
github datascopeanalytics / scrubadub / scrubadub / filth / __init__.py View on Github external
def iter_filth_clss():
    """Iterate over all of the filths that are included in this sub-package.
    This is a convenience method for capturing all new Filth that are added
    over time.
    """
    return iter_subclasses(
        os.path.dirname(os.path.abspath(__file__)),
        Filth,
        _is_abstract_filth,
    )
github datascopeanalytics / scrubadub / scrubadub / filth / base.py View on Github external
self.filths.append(other_filth)
        self._placeholder = '+'.join([filth.type for filth in self.filths])

    @property
    def placeholder(self):
        return self._placeholder.upper()

    def merge(self, other_filth):
        """Be smart about merging filth in this case to avoid nesting merged
        filths.
        """
        self._update_content(other_filth)
        return self


class RegexFilth(Filth):
    """Convenience class for instantiating a ``Filth`` object from a regular
    expression match
    """

    # The regex is stored on the RegexFilth so you can use groups in the
    # regular expression to properly configure the placeholder
    regex = None

    def __init__(self, match):
        self.match = match
        super(RegexFilth, self).__init__(
            beg=match.start(),
            end=match.end(),
            text=match.string[match.start():match.end()],
        )