How to use the regex.compile function in regex

To help you get started, we’ve selected a few regex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github psf / black / tests / test_black.py View on Github external
def test_symlink_out_of_root_directory(self) -> None:
        path = MagicMock()
        root = THIS_DIR
        child = MagicMock()
        include = re.compile(black.DEFAULT_INCLUDES)
        exclude = re.compile(black.DEFAULT_EXCLUDES)
        report = black.Report()
        gitignore = PathSpec.from_lines("gitwildmatch", [])
        # `child` should behave like a symlink which resolved path is clearly
        # outside of the `root` directory.
        path.iterdir.return_value = [child]
        child.resolve.return_value = Path("/a/b/c")
        child.as_posix.return_value = "/a/b/c"
        child.is_symlink.return_value = True
        try:
            list(
                black.gen_python_files_in_dir(
                    path, root, include, exclude, report, gitignore
                )
            )
        except ValueError as ve:
            self.fail(f"`get_python_files_in_dir()` failed: {ve}")
github osm-fr / osmose-backend / plugins / Josm_unnecessary.py View on Github external
def init(self, logger):
        super().init(logger)
        tags = capture_tags = {} # noqa
        self.errors[9010001] = self.def_class(item = 9010, level = 3, tags = ["tag"], title = mapcss.tr(u'unnecessary tag'))
        self.errors[9010002] = self.def_class(item = 9010, level = 3, tags = ["tag"], title = mapcss.tr(u'{0} makes no sense', mapcss._tag_uncapture(capture_tags, u'{0.tag}')))
        self.errors[9010003] = self.def_class(item = 9010, level = 3, tags = ["tag"], title = mapcss.tr(u'descriptive name'))

        self.re_017d2728 = re.compile(r'^(?i)(restaurant)$')
        self.re_053f39fb = re.compile(r'^(?i)(house|casa|rumah|vivienda)$')
        self.re_0a40c79a = re.compile(r'^(?i)(Аптека|farmacia|pharmacy|pharmacie)$')
        self.re_106eed50 = re.compile(r'^(?i)(shop|boutique)$')
        self.re_10870b34 = re.compile(r'^(?i)(parc|park)$')
        self.re_14b2be23 = re.compile(r'^(?i)(lycée)$')
        self.re_1b9641aa = re.compile(r'^(?i)(post office)$')
        self.re_1ba0f749 = re.compile(r'^(?i)(pond)$')
        self.re_1e5aeb3d = re.compile(r'^(footway|pedestrian)$')
        self.re_2335ac87 = re.compile(r'^(?i)(house|casa|maison|rumah|vivienda)$')
        self.re_251cae80 = re.compile(r'^(?i)(parking|parkplatz)$')
        self.re_2b5b04af = re.compile(r'^(?i)(cemetery|cementerio|cimetière|cmentarz|friedhof)$')
        self.re_337f006b = re.compile(r'^(?i)(school|école|Школа)$')
        self.re_33dfa05b = re.compile(r'^(?i)(church|église|biserica)$')
        self.re_3ad2c525 = re.compile(r'^(?i)(école primaire)$')
        self.re_3ad9e1f5 = re.compile(r'^(motorway|motorway_link|trunk|trunk_link|primary|primary_link|secondary|secondary_link|tertiary|tertiary_link|unclassified|residential|service|living_street)$')
        self.re_47aaa0f7 = re.compile(r'^(yes|designated)$')
        self.re_480c7ba6 = re.compile(r'^(?i)(building|bangunan)$')
        self.re_480ecdbb = re.compile(r'^(?i)(école élémentaire)$')
        self.re_519078ac = re.compile(r'^(?i)(collège)$')
        self.re_56dafa68 = re.compile(r'^(?i)(hydrant)$')
        self.re_577104db = re.compile(r'^(?i)(kiosk)$')
        self.re_5b729ae4 = re.compile(r'^(?i)(toilets?)$')
        self.re_644827a8 = re.compile(r'^(?i)(jalan)$')
github ELS-RD / anonymisation / generate_trainset / match_patterns.py View on Github external
import regex
from acora import AcoraBuilder

from generate_trainset.match_acora import get_matches
from generate_trainset.modify_strings import org_types, get_first_last_name, remove_org_type

find_corp = regex.compile("(((?i)" + org_types + ") "
                                                 "((?i)"
                                                 "(de |le |la |les |pour |l'|et |en |des |d'|au |du )"
                                                 ")*"
                                                 "((\()?[A-ZÉÈ&']+[\w\-'\.\)]*)"
                                                 "( (de |le |la |les |pour |l'|et |en |des |d'|au |du |\(|& |/ ?|\- ?)*"
                                                 "[A-ZÉÈ\-&']+[\w\-'\.\)]*"
                                                 ")*"
                                                 ")", flags=regex.VERSION1)


def get_company_names(text: str) -> list:
    """
    Extract company names from string text
    :param text: original text
    :return: a list of offsets
    """
github chuanconggao / extratools / extratools / strtools.py View on Github external
def tagstats(tags: Iterable[str], lines: Iterable[str], separator: str = None) -> Mapping[str, int]:
    tagmatches.tagstats.tokenizer = None if separator is None else re.compile(separator)

    return {
        tag: sum(matches)
        for tag, matches in tagmatches.compute(
            lines,
            {tag: [tag] for tag in tags}
        ).items()
github insightfinder / InsightAgent / servicenow / getlogs_servicenow.py View on Github external
to_send_data_dict = dict()
    to_send_data_dict['userName'] = if_config_vars['user_name']
    to_send_data_dict['licenseKey'] = if_config_vars['license_key']
    to_send_data_dict['projectName'] = if_config_vars['project_name']
    to_send_data_dict['instanceName'] = HOSTNAME
    to_send_data_dict['agentType'] = get_agent_type_from_project_type()
    if 'METRIC' in if_config_vars['project_type'] and 'sampling_interval' in if_config_vars:
        to_send_data_dict['samplingInterval'] = str(if_config_vars['sampling_interval'])
    logger.debug(to_send_data_dict)
    return to_send_data_dict


if __name__ == "__main__":
    # declare a few vars
    TRUE = regex.compile(r"T(RUE)?", regex.IGNORECASE)
    FALSE = regex.compile(r"F(ALSE)?", regex.IGNORECASE)
    SPACES = regex.compile(r"\s+")
    SLASHES = regex.compile(r"\/+")
    UNDERSCORE = regex.compile(r"\_+")
    COLONS = regex.compile(r"\:+")
    LEFT_BRACE = regex.compile(r"\[")
    RIGHT_BRACE = regex.compile(r"\]")
    PERIOD = regex.compile(r"\.")
    COMMA = regex.compile(r"\,")
    NON_ALNUM = regex.compile(r"[^a-zA-Z0-9]")
    PCT_z_FMT = regex.compile(r"[\+\-][0-9]{2}[\:]?[0-9]{2}|\w+\s+\w+\s+\w+")
    PCT_Z_FMT = regex.compile(r"[A-Z]{3,4}")
    FORMAT_STR = regex.compile(r"{(.*?)}")
    HOSTNAME = socket.gethostname().partition('.')[0]
    ISO8601 = ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S', '%Y%m%dT%H%M%SZ', 'epoch']
    JSON_LEVEL_DELIM = '.'
    CSV_DELIM = r",|\t"
github norbusan / calibre-debian / src / calibre / ebooks / oeb / polish / spell.py View on Github external
def __init__(self):
        import regex
        # Remove soft hyphens/zero width spaces/control codes
        self.sanitize_invisible_pat = regex.compile(
            r'[\u00ad\u200b\u200c\u200d\ufeff\0-\x08\x0b\x0c\x0e-\x1f\x7f]', regex.VERSION1 | regex.UNICODE)
        self.split_pat = regex.compile(
            r'\W+', flags=regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE)
        self.digit_pat = regex.compile(
            r'^\d+$', flags=regex.VERSION1 | regex.WORD | regex.UNICODE)
        # French words with prefixes are reduced to the stem word, so that the
        # words appear only once in the word list
        self.fr_elision_pat = regex.compile(
            u"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)['’]", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE)
github InQuest / python-iocextract / iocextract.py View on Github external
""" + SEPARATOR_DEFANGS + r"""
            )*

            # Domain/path characters.
            \w
            \S+?

            # CISCO ESA style defangs followed by domain/path characters.
            (?:\x20[\/\.][^\.\/\s]\S*?)*
        )
    """ + END_PUNCTUATION + r"""
        (?=\s|$)
    """, re.IGNORECASE | re.VERBOSE | re.UNICODE)

# Get some obfuscated urls, main anchor is brackets around the period.
BRACKET_URL_RE = re.compile(r"""
        \b
        (
            [\.\:\/\\\w\[\]\(\)-]+
            (?:
                \x20?
                [\(\[]
                \x20?
                \.
                \x20?
                [\]\)]
                \x20?
                \S*?
            )+
        )
    """ + END_PUNCTUATION + r"""
        (?=\s|$)
github ryanInf / Time-NLPY / TimeUnit.py View on Github external
rule = "明(?!年)"
        pattern = re.compile(rule)
        match = pattern.search(self.exp_time)
        if match is not None:
            flag[2] = True
            cur = cur.shift(days=1)

        rule = "(?
github osm-fr / osmose-backend / plugins / Josm_numeric.py View on Github external
self.errors[9006002] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'{0} value with + sign', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006003] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'{0} should be an integer value between -5 and 5', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006004] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'{0} should have numbers only with optional .5 increments', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006008] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'{0} must be a numeric value', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006009] = {'item': 9006, 'level': 2, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'{0} must be a positive integer number', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006010] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'unusual value of {0}', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006011] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'{0} must be a numeric value, in meters and without units', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006013] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'voltage should be in volts with no units/delimiter/spaces')}
        self.errors[9006017] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'unusual value of {0}: use . instead of , as decimal separator', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006018] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'unusual value of {0}: meters is default; point is decimal separator; if units, put space then unit', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006019] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'unusual value of {0}: tonne is default; point is decimal separator; if units, put space then unit', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006020] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'unusual value of {0}: kilometers is default; point is decimal separator; if units, put space then unit', mapcss._tag_uncapture(capture_tags, u'{0.key}'))}
        self.errors[9006021] = {'item': 9006, 'level': 3, 'tag': ["tag", "value"], 'desc': mapcss.tr(u'Unnecessary amount of decimal places')}

        self.re_035d45f0 = re.compile(r'^(([0-9]+\.?[0-9]*( (t|kg|lbs))?)|([0-9]+\'[0-9]+\.?[0-9]*\"))$')
        self.re_066203d3 = re.compile(r'^[0-9]+$')
        self.re_08f211f3 = re.compile(r'^([0-9][0-9]?|[0-9][0-9]:[0-5][0-9](:[0-9][0-9])?)$')
        self.re_0ae2edfd = re.compile(r'^(signals|none|unposted|variable|walk|[1-9][0-9]*( [a-z]+)?|[A-Z][A-Z]:(urban|rural|living_street|motorway))$')
        self.re_0b0f0f56 = re.compile(r'^0$|^(-|\+)?[1-5]$')
        self.re_18424cc6 = re.compile(r'^[0-9]+,[0-9][0-9]?( (m|ft))?$')
        self.re_1d428b19 = re.compile(r'^(([0-9]+\.?[0-9]*( (m|ft))?)|([0-9]+\'[0-9]+\.?[0-9]*\"))$')
        self.re_1e934345 = re.compile(r'^[0-9]+,[0-9][0-9]?( (t|kg|lbs))?$')
        self.re_288e587a = re.compile(r'^\+\d')
        self.re_29d73dcf = re.compile(r'^(([1-9][0-9]*(\.[0-9]+)?( (m|ft))?)|([0-9]+\'(([0-9]|10|11)(\.[0-9]*)?\")?)|none|default|below_default)$')
        self.re_2a784076 = re.compile(r'^(([0-9]|[1-9][0-9]*)(\.5)?)$')
        self.re_2b84c9ab = re.compile(r'^[0-9]+,[0-9][0-9]?$')
        self.re_43c55ce5 = re.compile(r'(.*[A-Za-z].*)|.*,.*|.*( ).*')
        self.re_45b46d60 = re.compile(r'^-?[0-9]+(\.[0-9]+)?$')
        self.re_45e73e1b = re.compile(r'^(up|down|-?([0-9]+?(\.[1-9]%)?|100)[%°]?)$')
        self.re_49888e30 = re.compile(r'^(([0-9]+\.?[0-9]*( [a-z]+)?)|([0-9]+\'([0-9]+\.?[0-9]*\")?))$')
        self.re_4b9c2b6a = re.compile(r'^(([0-9]+\.?[0-9]*( (m|km|mi|nmi))?)|([0-9]+\'[0-9]+\.?[0-9]*\"))$')
        self.re_4d44d8e0 = re.compile(r'^(0|[1-9][0-9]*(\.[0-9]+)?)( (kHz|MHz|GHz|THz))?$')