Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def extract_text(self):
txt = self.data.decode(DEFAULT_TEXT_ENCODING)
# Hack to handle Apple's extensions to the RTF format
txt = txt.replace("\\\n\\\n", "\\\n\\par\n")
return rtf_to_text(txt)
import glob
from striprtf.striprtf import rtf_to_text
if __name__ == '__main__':
rtf_files = glob.glob('Documents/*.rtf')
for rtf_name in rtf_files:
txt_name = rtf_name.replace('.rtf','.txt')
txt_file = open(txt_name, "x")
content_txt = rtf_to_text(open(rtf_name).read())
txt_file.write(content_txt)
txt_file.close()
def cleanup_message_body(
body: AnyStr, body_type: BodyType, size_threshold: int = 0
) -> str:
# Decode first
body = decode(body)
if body_type is BodyType.RTF:
# Strip formatting
body = rtf_to_text(body)
elif body_type is BodyType.HTML:
# Strip markup
body = BeautifulSoup(body, "html.parser").get_text()
# Strip what might be lines of base64 encoded data
if len(body) > size_threshold:
body = re.sub(r"^[>\s]*[A-Za-z0-9+/]{76,}\n?", "", body, flags=re.MULTILINE)
# Strip uuencoded attachments
if len(body) > size_threshold:
body = re.sub(r"begin [0-7]{3}.*?end", "", body, flags=re.DOTALL)
# Strip notes/calendar data
if len(body) > size_threshold:
body = re.sub(