Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
self.word_delimiter = word_delimiter
def process_string(self, s: str):
return s.split(self.word_delimiter)
def process_list(self, inp: List[str]):
words = []
for sentence in inp:
words.extend(self.process_string(sentence))
return words
class RemoveSpecificWords(BaseRemoveTransform):
def __init__(self, words_to_remove: List[str]):
super().__init__(words_to_remove)
class RemoveWhiteSpace(BaseRemoveTransform):
def __init__(self, replace_by_space: bool = False):
characters = [c for c in string.whitespace]
if replace_by_space:
replace_token = " "
else:
replace_token = ""
super().__init__(characters, replace_token=replace_token)
def process_list(self, inp: List[str]):
words = []
for sentence in inp:
words.extend(self.process_string(sentence))
return words
class RemoveSpecificWords(BaseRemoveTransform):
def __init__(self, words_to_remove: List[str]):
super().__init__(words_to_remove)
class RemoveWhiteSpace(BaseRemoveTransform):
def __init__(self, replace_by_space: bool = False):
characters = [c for c in string.whitespace]
if replace_by_space:
replace_token = " "
else:
replace_token = ""
super().__init__(characters, replace_token=replace_token)
class RemovePunctuation(BaseRemoveTransform):
def __init__(self):
characters = [c for c in string.punctuation]
super().__init__(characters)
super().__init__(words_to_remove)
class RemoveWhiteSpace(BaseRemoveTransform):
def __init__(self, replace_by_space: bool = False):
characters = [c for c in string.whitespace]
if replace_by_space:
replace_token = " "
else:
replace_token = ""
super().__init__(characters, replace_token=replace_token)
class RemovePunctuation(BaseRemoveTransform):
def __init__(self):
characters = [c for c in string.punctuation]
super().__init__(characters)
class RemoveMultipleSpaces(AbstractTransform):
def process_string(self, s: str):
return re.sub(r"\s\s+", " ", s)
def process_list(self, inp: List[str]):
return [self.process_string(s) for s in inp]
class Strip(AbstractTransform):
def process_string(self, s: str):