Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_init(self):
"""make sure that lower case casting works in __init__"""
s = CanonicalStringSet(['TKTK', 'tKtK', 'Tktk'])
self.assertTrue('tktk' in s)
self.assertEqual(len(s), 1)
def test_add(self):
"""make sure that lower case casting works in add"""
s = CanonicalStringSet()
s.add('TKTK')
s.add('tKtK')
s.add('Tktk')
self.assertTrue('tktk' in s)
self.assertEqual(len(s), 1)
def test_update(self):
"""make sure lower case casting works in update"""
s = CanonicalStringSet()
s.update(['TKTK', 'tKtK', 'Tktk'])
self.assertTrue('tktk' in s)
self.assertEqual(len(s), 1)
def test_contains(self):
"""make sure __contains__ casts things properly"""
s = CanonicalStringSet(['tktk'])
self.assertTrue('TKTK' in s)
self.assertTrue('Tktk' in s)
self.assertTrue('tKtK' in s)
def test_remove(self):
"""make sure remove works properly"""
s = CanonicalStringSet(['tktk'])
s.remove('TKTK')
self.assertFalse('tktk' in s)
def test_discard(self):
"""make sure discard works properly"""
s = CanonicalStringSet(['tktk'])
s.discard('TKTK')
s.discard('TkTk')
s.discard('Tktk')
self.assertFalse('tktk' in s)
def add(self, element):
return super(CanonicalStringSet, self).add(
self._cast_as_lower(element)
)
import textblob
from .base import RegexDetector
from ..filth import NameFilth
from ..utils import CanonicalStringSet
class NameDetector(RegexDetector):
"""Use part of speech tagging to clean proper nouns out of the dirty dirty
``text``. Disallow particular nouns by adding them to the
``NameDetector.disallowed_nouns`` set.
"""
filth_cls = NameFilth
disallowed_nouns = CanonicalStringSet(["skype"])
def iter_filth(self, text):
if not isinstance(self.disallowed_nouns, CanonicalStringSet):
raise TypeError(
'NameDetector.disallowed_nouns must be CanonicalStringSet'
)
# find the set of proper nouns using textblob.
proper_nouns = set()
blob = textblob.TextBlob(text)
for word, part_of_speech in blob.tags:
is_proper_noun = part_of_speech in ("NNP", "NNPS")
if is_proper_noun and word.lower() not in self.disallowed_nouns:
proper_nouns.add(word)
def iter_filth(self, text):
if not isinstance(self.disallowed_nouns, CanonicalStringSet):
raise TypeError(
'NameDetector.disallowed_nouns must be CanonicalStringSet'
)
# find the set of proper nouns using textblob.
proper_nouns = set()
blob = textblob.TextBlob(text)
for word, part_of_speech in blob.tags:
is_proper_noun = part_of_speech in ("NNP", "NNPS")
if is_proper_noun and word.lower() not in self.disallowed_nouns:
proper_nouns.add(word)
# use a regex to replace the proper nouns by first escaping any
# lingering punctuation in the regex
# http://stackoverflow.com/a/4202559/564709
if proper_nouns: