How to use the emoji.get_emoji_regexp function in emoji

To help you get started, we’ve selected a few emoji examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github AyraHikari / EmiliaHikari / emilia / modules / helper_funcs / string_handling.py View on Github external
def _calc_emoji_offset(to_calc) -> int:
    # Get all emoji in text.
    emoticons = emoji.get_emoji_regexp().finditer(to_calc)
    # Check the utf16 length of the emoji to determine the offset it caused.
    # Normal, 1 character emoji don't affect; hence sub 1.
    # special, eg with two emoji characters (eg face, and skin col) will have length 2, so by subbing one we
    # know we'll get one extra offset,
    return sum(len(e.group(0).encode('utf-16-le')) // 2 - 1 for e in emoticons)
github novoid / Memacs / memacs / whatsapp.py View on Github external
def _handle_message(self, msg):
        """parse a single message row"""

        msg['number'] = '00' + msg['number'].split('@')[0]
        msg['name'] = self._numberdict.get(msg['number'],msg['number'])
        msg['verb'] = 'to' if msg['type'] else 'from'
        msg['type'] = 'OUTGOING' if msg['type'] else 'INCOMING'
        msg['handler'] = self._args.handler

        if msg['text']:
            if self._args.demojize:
                msg['text'] = emoji.demojize(msg['text'])

            if self._args.skip_emoji:
                msg['text'] = re.sub(emoji.get_emoji_regexp(), '', msg['text'])

        timestamp = datetime.datetime.fromtimestamp(msg['timestamp'] / 1000)

        properties = OrgProperties(data_for_hashing=json.dumps(msg))
        properties.add('NUMBER', msg['number'])
        properties.add('TYPE', msg['type'])

        output = self._args.output_format.format(**msg)

        if msg['text'] and not self._is_ignored(msg):
            self._writer.write_org_subitem(timestamp=OrgFormat.date(timestamp, show_time=True),
                                           output=output, properties=properties)
github pmichel31415 / mtnt / analysis / count_emojis.py View on Github external
#!/usr/bin/python3
"""
Count the number of emojis in the input
"""
import sys
import emoji
import re

txt_emoji_regex = re.compile(r'(8|:|;|=)(\^|\'|-)?(\)|\(|D|P|p)')
utf8_emoji_regex = emoji.get_emoji_regexp()


N = 0
try:
    for line in sys.stdin:
        for w in line.strip().split():
            if txt_emoji_regex.search(w) or utf8_emoji_regex.search(w):
                N += 1
except (KeyboardInterrupt, EOFError):
    pass
finally:
    print(N)
github marcoEDU / HackerspaceTemplatePackage / _website / views_old.py View on Github external
def translate_view(request):
    log('translate_view(request)')
    import json
    import bleach
    from googletrans import Translator
    import emoji
    translator = Translator()

    with open('_database/templates/languages.json') as json_file:
        language_codes = json.load(json_file)

    if request.GET.get('q', None) and request.GET.get('language', None):
        text = emoji.get_emoji_regexp().sub(u'', request.GET.get('q', None))

        response = JsonResponse({'text': translator.translate(
            text=text,
            dest=request.GET.get('language', None)).text
        })

    elif request.GET.get('q', None):
        LANGUAGES = Config('WEBSITE.LANGUAGES').value
        languages = {}

        text = emoji.get_emoji_regexp().sub(u'', request.GET.get('q', None))

        for language in LANGUAGES:
            if len(LANGUAGES) > 1:
                languages[language] = translator.translate(
                    text=text,
github Terrance / IMMP / immp / hook / sync.py View on Github external
from immp.hook.command import command
from immp.hook.database import BaseModel, DatabaseHook
from immp.hook.identity import IdentityProvider


try:
    from jinja2 import Template
except ImportError:
    Template = None

try:
    from emoji import get_emoji_regexp
except ImportError:
    EMOJI_REGEX = None
else:
    _EMOJI_REGEX_RAW = get_emoji_regexp()
    EMOJI_REGEX = re.compile(r"(\s*)({})+(\s*)".format(_EMOJI_REGEX_RAW.pattern))


log = logging.getLogger(__name__)


def _emoji_replace(match):
    # Add correct spacing around removed emoji in a string.
    left, *_, right = match.groups()
    return " " if left and right else ""


class SyncBackRef(BaseModel):
    """
    One of a set of references, each pointing to a representation of a source message.
github securisec / chepy / chepy / modules / language.py View on Github external
def find_emojis(self):
        """Find emojis, symbols, pictographs, map symbols and flags
        
        Returns:
            Chepy: The Chepy object.
        """
        self.state = emoji.get_emoji_regexp().findall(self._convert_to_str())
        return self
github marcoEDU / HackerspaceTemplatePackage / hackerspace / APIs / discourse.py View on Github external
def create_post(str_headline, str_text, str_category):
    log('create_post()')
    from html import unescape
    import emoji

    if BOOLEAN__key_exists('DISCOURSE.API_KEY') == False:
        log('--> Failed: DISCOURSE.API_KEY not set')
        return None

    response = requests.post(DISCOURSE_URL+'posts.json',
                             headers={
                                 'content-type': 'application/json'
                             }, params={
                                 'api_key': STR__get_key('DISCOURSE.API_KEY'),
                                 'api_username': STR__get_key('DISCOURSE.API_USERNAME'),
                                 'title': emoji.get_emoji_regexp().sub(u'', unescape(str_headline)),
                                 'raw': str_text,
                                 'category': get_category_id(str_category)
                                 # TODO add event details
                                 #  'event': {'start': '2019-12-13T15:00:00+00:00', 'end': '2019-12-13T19:00:00+00:00'}
                             })
    if response.status_code == 200:
        if DISCOURSE_URL.endswith('/'):
            url = DISCOURSE_URL+'t/'+str(response.json()['topic_id'])
            log('--> Created Discourse post: '+url)
            return url
        else:
            url = DISCOURSE_URL+'/t/'+str(response.json()['topic_id'])
            log('--> Created Discourse post: '+url)
            return url
    else:
        print(response.status_code)
github microsoft / LMChallenge / lmchallenge / core / common.py View on Github external
import os
import re
import sys
import regex
import click
import emoji
import logging
import json
import importlib
import gzip
import contextlib
import itertools as it


WORD_TOKENIZER = regex.compile(
    emoji.get_emoji_regexp().pattern +
    '''|[\p{L}\p{N}\p{Pc}\p{Pd}'@#]+|[\p{P}\p{S}]+'''
)
'''Our basic word tokenizer regex.'''


CHARACTER_TOKENIZER = regex.compile(
    '.|\n', flags=regex.MULTILINE
)
'''A Unicode character tokenizer regex.'''


def shell_docstring(command, name):
    '''Utility for creating docstrings:

      __doc__ += shell_docstring(cli, 'command-name')
    '''