Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _calc_emoji_offset(to_calc) -> int:
# Get all emoji in text.
emoticons = emoji.get_emoji_regexp().finditer(to_calc)
# Check the utf16 length of the emoji to determine the offset it caused.
# Normal, 1 character emoji don't affect; hence sub 1.
# special, eg with two emoji characters (eg face, and skin col) will have length 2, so by subbing one we
# know we'll get one extra offset,
return sum(len(e.group(0).encode('utf-16-le')) // 2 - 1 for e in emoticons)
def _handle_message(self, msg):
"""parse a single message row"""
msg['number'] = '00' + msg['number'].split('@')[0]
msg['name'] = self._numberdict.get(msg['number'],msg['number'])
msg['verb'] = 'to' if msg['type'] else 'from'
msg['type'] = 'OUTGOING' if msg['type'] else 'INCOMING'
msg['handler'] = self._args.handler
if msg['text']:
if self._args.demojize:
msg['text'] = emoji.demojize(msg['text'])
if self._args.skip_emoji:
msg['text'] = re.sub(emoji.get_emoji_regexp(), '', msg['text'])
timestamp = datetime.datetime.fromtimestamp(msg['timestamp'] / 1000)
properties = OrgProperties(data_for_hashing=json.dumps(msg))
properties.add('NUMBER', msg['number'])
properties.add('TYPE', msg['type'])
output = self._args.output_format.format(**msg)
if msg['text'] and not self._is_ignored(msg):
self._writer.write_org_subitem(timestamp=OrgFormat.date(timestamp, show_time=True),
output=output, properties=properties)
#!/usr/bin/python3
"""
Count the number of emojis in the input
"""
import sys
import emoji
import re
txt_emoji_regex = re.compile(r'(8|:|;|=)(\^|\'|-)?(\)|\(|D|P|p)')
utf8_emoji_regex = emoji.get_emoji_regexp()
N = 0
try:
for line in sys.stdin:
for w in line.strip().split():
if txt_emoji_regex.search(w) or utf8_emoji_regex.search(w):
N += 1
except (KeyboardInterrupt, EOFError):
pass
finally:
print(N)
def translate_view(request):
log('translate_view(request)')
import json
import bleach
from googletrans import Translator
import emoji
translator = Translator()
with open('_database/templates/languages.json') as json_file:
language_codes = json.load(json_file)
if request.GET.get('q', None) and request.GET.get('language', None):
text = emoji.get_emoji_regexp().sub(u'', request.GET.get('q', None))
response = JsonResponse({'text': translator.translate(
text=text,
dest=request.GET.get('language', None)).text
})
elif request.GET.get('q', None):
LANGUAGES = Config('WEBSITE.LANGUAGES').value
languages = {}
text = emoji.get_emoji_regexp().sub(u'', request.GET.get('q', None))
for language in LANGUAGES:
if len(LANGUAGES) > 1:
languages[language] = translator.translate(
text=text,
from immp.hook.command import command
from immp.hook.database import BaseModel, DatabaseHook
from immp.hook.identity import IdentityProvider
try:
from jinja2 import Template
except ImportError:
Template = None
try:
from emoji import get_emoji_regexp
except ImportError:
EMOJI_REGEX = None
else:
_EMOJI_REGEX_RAW = get_emoji_regexp()
EMOJI_REGEX = re.compile(r"(\s*)({})+(\s*)".format(_EMOJI_REGEX_RAW.pattern))
log = logging.getLogger(__name__)
def _emoji_replace(match):
# Add correct spacing around removed emoji in a string.
left, *_, right = match.groups()
return " " if left and right else ""
class SyncBackRef(BaseModel):
"""
One of a set of references, each pointing to a representation of a source message.
def find_emojis(self):
"""Find emojis, symbols, pictographs, map symbols and flags
Returns:
Chepy: The Chepy object.
"""
self.state = emoji.get_emoji_regexp().findall(self._convert_to_str())
return self
def create_post(str_headline, str_text, str_category):
log('create_post()')
from html import unescape
import emoji
if BOOLEAN__key_exists('DISCOURSE.API_KEY') == False:
log('--> Failed: DISCOURSE.API_KEY not set')
return None
response = requests.post(DISCOURSE_URL+'posts.json',
headers={
'content-type': 'application/json'
}, params={
'api_key': STR__get_key('DISCOURSE.API_KEY'),
'api_username': STR__get_key('DISCOURSE.API_USERNAME'),
'title': emoji.get_emoji_regexp().sub(u'', unescape(str_headline)),
'raw': str_text,
'category': get_category_id(str_category)
# TODO add event details
# 'event': {'start': '2019-12-13T15:00:00+00:00', 'end': '2019-12-13T19:00:00+00:00'}
})
if response.status_code == 200:
if DISCOURSE_URL.endswith('/'):
url = DISCOURSE_URL+'t/'+str(response.json()['topic_id'])
log('--> Created Discourse post: '+url)
return url
else:
url = DISCOURSE_URL+'/t/'+str(response.json()['topic_id'])
log('--> Created Discourse post: '+url)
return url
else:
print(response.status_code)
import os
import re
import sys
import regex
import click
import emoji
import logging
import json
import importlib
import gzip
import contextlib
import itertools as it
WORD_TOKENIZER = regex.compile(
emoji.get_emoji_regexp().pattern +
'''|[\p{L}\p{N}\p{Pc}\p{Pd}'@#]+|[\p{P}\p{S}]+'''
)
'''Our basic word tokenizer regex.'''
CHARACTER_TOKENIZER = regex.compile(
'.|\n', flags=regex.MULTILINE
)
'''A Unicode character tokenizer regex.'''
def shell_docstring(command, name):
'''Utility for creating docstrings:
__doc__ += shell_docstring(cli, 'command-name')
'''