Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_nonoverlapping_filth(self):
"""can't merge non-overlapping filth"""
a_filth = Filth(beg=0, end=3, text="the")
b_filth = Filth(beg=4, end=7, text="end")
with self.assertRaises(FilthMergeError):
a_filth.merge(b_filth)
with self.assertRaises(FilthMergeError):
b_filth.merge(a_filth)
def test_text_merge(self):
"""make sure text length is correct"""
class SomeFilth(Filth):
type = 'something'
text = "the end"
a_filth = SomeFilth(beg=0, end=3, text=text[:3])
b_filth = SomeFilth(beg=1, end=7, text=text[1:])
c_filth = a_filth.merge(b_filth)
self.assertEqual(c_filth.text, text)
def test_nonoverlapping_filth(self):
"""can't merge non-overlapping filth"""
a_filth = Filth(beg=0, end=3, text="the")
b_filth = Filth(beg=4, end=7, text="end")
with self.assertRaises(FilthMergeError):
a_filth.merge(b_filth)
with self.assertRaises(FilthMergeError):
b_filth.merge(a_filth)
def test_disallowed_replace_with(self):
"""replace_with should fail gracefully"""
filth = Filth()
with self.assertRaises(InvalidReplaceWith):
filth.replace_with('surrogate')
with self.assertRaises(InvalidReplaceWith):
filth.replace_with('something_invalid')
def clean(self, text, **kwargs):
"""This is the master method that cleans all of the filth out of the
dirty dirty ``text``. All keyword arguments to this function are passed
through to the ``Filth.replace_with`` method to fine-tune how the
``Filth`` is cleaned.
"""
if sys.version_info < (3, 0): # Only in Python 2, in 3 every string is a Python 2 unicode
if not isinstance(text, unicode):
raise exceptions.UnicodeRequired
clean_chunks = []
filth = Filth()
for next_filth in self.iter_filth(text):
clean_chunks.append(text[filth.end:next_filth.beg])
clean_chunks.append(next_filth.replace_with(**kwargs))
filth = next_filth
clean_chunks.append(text[filth.end:])
return u''.join(clean_chunks)
def iter_filth(self, text):
"""Iterate over the different types of filth that can exist.
"""
# currently doing this by aggregating all_filths and then sorting
# inline instead of with a Filth.__cmp__ method, which is apparently
# much slower http://stackoverflow.com/a/988728/564709
#
# NOTE: we could probably do this in a more efficient way by iterating
# over all detectors simultaneously. just trying to get something
# working right now and we can worry about efficiency later
all_filths = []
for detector in self._detectors.values():
for filth in detector.iter_filth(text):
if not isinstance(filth, Filth):
raise TypeError('iter_filth must always yield Filth')
all_filths.append(filth)
# Sort by start position. If two filths start in the same place then
# return the longer one first
all_filths.sort(key=lambda f: (f.beg, -f.end))
# this is where the Scrubber does its hard work and merges any
# overlapping filths.
if not all_filths:
raise StopIteration
filth = all_filths[0]
for next_filth in all_filths[1:]:
if filth.end < next_filth.beg:
yield filth
filth = next_filth