How to use regex - 10 common examples

To help you get started, we’ve selected a few regex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github psf / black / tests / test_black.py View on Github external
def test_symlink_out_of_root_directory(self) -> None:
        path = MagicMock()
        root = THIS_DIR
        child = MagicMock()
        include = re.compile(black.DEFAULT_INCLUDES)
        exclude = re.compile(black.DEFAULT_EXCLUDES)
        report = black.Report()
        gitignore = PathSpec.from_lines("gitwildmatch", [])
        # `child` should behave like a symlink which resolved path is clearly
        # outside of the `root` directory.
        path.iterdir.return_value = [child]
        child.resolve.return_value = Path("/a/b/c")
        child.as_posix.return_value = "/a/b/c"
        child.is_symlink.return_value = True
        try:
            list(
                black.gen_python_files_in_dir(
                    path, root, include, exclude, report, gitignore
                )
            )
        except ValueError as ve:
            self.fail(f"`get_python_files_in_dir()` failed: {ve}")
github intentionet / netconan / tests / unit / test_ip_anonymization.py View on Github external
# Make sure all addresses to be checked are in ip_tree and generate reference mapping
    for ip_addr_raw in ip_v4_list:
        ip_addr = anonymizer_v4.make_addr(ip_addr_raw)
        ip_int = int(ip_addr)
        ip_int_anon = anonymizer_v4.anonymize(ip_int)
        ip_addr_anon = str(ipaddress.IPv4Address(ip_int_anon))
        ip_mapping[str(ip_addr)] = ip_addr_anon

    filename = str(tmpdir.mkdir("test").join("test_dump_iptree.txt"))
    with open(filename, 'w') as f_tmp:
        anonymizer_v4.dump_to_file(f_tmp)

    with open(filename, 'r') as f_tmp:
        # Build mapping dict from the output of the ip_tree dump
        for line in f_tmp.readlines():
            m = regex.match(r'\s*(\d+\.\d+.\d+.\d+)\s+(\d+\.\d+.\d+.\d+)\s*', line)
            ip_addr = m.group(1)
            ip_addr_anon = m.group(2)
            ip_mapping_from_dump[ip_addr] = ip_addr_anon

    for ip_addr in ip_mapping:
        # Confirm anon addresses from ip_tree dump match anon addresses from _convert_to_anon_ip
        assert(ip_mapping[ip_addr] == ip_mapping_from_dump[ip_addr])
github facelessuser / Rummage / tests / test_rumcore.py View on Github external
rc._regex_pattern(
                r"test",
                rc.DOTALL | rc.IGNORECASE | rc.MULTILINE | rc.WORD |
                rc.BESTMATCH | rc.ENHANCEMATCH | rc.REVERSE | rc.FULLCASE | rc.POSIX
            ).flags,
            regex.V0 | regex.ASCII | regex.DOTALL | regex.IGNORECASE | regex.MULTILINE |
            regex.WORD | regex.ENHANCEMATCH | regex.BESTMATCH | regex.REVERSE | regex.FULLCASE |
            regex.POSIX
        )
        self.assertEqual(
            rc._regex_pattern(
                r"test",
                rc.UNICODE | rc.DOTALL | rc.IGNORECASE | rc.MULTILINE | rc.FULLCASE |
                rc.WORD | rc.BESTMATCH | rc.ENHANCEMATCH | rc.REVERSE | rc.VERSION1 | rc.POSIX
            ).flags,
            regex.V1 | regex.UNICODE | regex.DOTALL | regex.IGNORECASE | regex.MULTILINE |
            regex.WORD | regex.ENHANCEMATCH | regex.BESTMATCH | regex.REVERSE | regex.FULLCASE |
            regex.POSIX
        )
github facelessuser / backrefs / tests / test_bregex.py View on Github external
def test_infinite_loop_catch(self):
        """Test infinite loop catch."""

        with pytest.raises(_bregex_parse.LoopException):
            bregex.compile_search(r'(?-x:(?x))', regex.V0 | regex.VERBOSE)

        with pytest.raises(_bregex_parse.LoopException):
            bregex.compile_search(r'(?V1)(?V0)')
github larryhastings / gilectomy / Lib / dos-8x3 / test_reg.py View on Github external
from test_support import verbose
import regex
from regex_syntax import *

re = 'a+b+c+'
print 'no match:', regex.match(re, 'hello aaaabcccc world')
print 'successful search:', regex.search(re, 'hello aaaabcccc world')
try:
    cre = regex.compile('\(' + re)
except regex.error:
    print 'caught expected exception'
else:
    print 'expected regex.error not raised'

print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb')
prev = regex.set_syntax(RE_SYNTAX_AWK)
print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb')
regex.set_syntax(prev)
print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb')

re = '\([0-9]+\) *\([0-9]+\)'
print 'matching with group names and compile()'
cre = regex.compile(re)
print cre.match('801 999')
try:
    print cre.group('one')
except regex.error:
    print 'caught expected exception'
else:
    print 'expected regex.error not raised'

print 'matching with group names and symcomp()'
github osm-fr / osmose-backend / plugins / Josm_unnecessary.py View on Github external
def init(self, logger):
        super().init(logger)
        tags = capture_tags = {} # noqa
        self.errors[9010001] = self.def_class(item = 9010, level = 3, tags = ["tag"], title = mapcss.tr(u'unnecessary tag'))
        self.errors[9010002] = self.def_class(item = 9010, level = 3, tags = ["tag"], title = mapcss.tr(u'{0} makes no sense', mapcss._tag_uncapture(capture_tags, u'{0.tag}')))
        self.errors[9010003] = self.def_class(item = 9010, level = 3, tags = ["tag"], title = mapcss.tr(u'descriptive name'))

        self.re_017d2728 = re.compile(r'^(?i)(restaurant)$')
        self.re_053f39fb = re.compile(r'^(?i)(house|casa|rumah|vivienda)$')
        self.re_0a40c79a = re.compile(r'^(?i)(Аптека|farmacia|pharmacy|pharmacie)$')
        self.re_106eed50 = re.compile(r'^(?i)(shop|boutique)$')
        self.re_10870b34 = re.compile(r'^(?i)(parc|park)$')
        self.re_14b2be23 = re.compile(r'^(?i)(lycée)$')
        self.re_1b9641aa = re.compile(r'^(?i)(post office)$')
        self.re_1ba0f749 = re.compile(r'^(?i)(pond)$')
        self.re_1e5aeb3d = re.compile(r'^(footway|pedestrian)$')
        self.re_2335ac87 = re.compile(r'^(?i)(house|casa|maison|rumah|vivienda)$')
        self.re_251cae80 = re.compile(r'^(?i)(parking|parkplatz)$')
        self.re_2b5b04af = re.compile(r'^(?i)(cemetery|cementerio|cimetière|cmentarz|friedhof)$')
        self.re_337f006b = re.compile(r'^(?i)(school|école|Школа)$')
        self.re_33dfa05b = re.compile(r'^(?i)(church|église|biserica)$')
        self.re_3ad2c525 = re.compile(r'^(?i)(école primaire)$')
        self.re_3ad9e1f5 = re.compile(r'^(motorway|motorway_link|trunk|trunk_link|primary|primary_link|secondary|secondary_link|tertiary|tertiary_link|unclassified|residential|service|living_street)$')
        self.re_47aaa0f7 = re.compile(r'^(yes|designated)$')
        self.re_480c7ba6 = re.compile(r'^(?i)(building|bangunan)$')
        self.re_480ecdbb = re.compile(r'^(?i)(école élémentaire)$')
        self.re_519078ac = re.compile(r'^(?i)(collège)$')
        self.re_56dafa68 = re.compile(r'^(?i)(hydrant)$')
        self.re_577104db = re.compile(r'^(?i)(kiosk)$')
        self.re_5b729ae4 = re.compile(r'^(?i)(toilets?)$')
        self.re_644827a8 = re.compile(r'^(?i)(jalan)$')
github andychase / reparse / reparse / util.py View on Github external
def separate_string(string):
    """
    >>> separate_string("test <2>")
    (['test ', ''], ['2'])
    """
    string_list = regex.split(r'<(?![!=])', regex.sub(r'>', '<', string))
    return string_list[::2], string_list[1::2]  # Returns even and odd elements
github microsoft / Recognizers-Text / Python / libraries / recognizers-date-time / recognizers_date_time / date_time / base_duration.py View on Github external
def parse_number_space_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        # if there are spaces between number and unit
        ers = self.config.cardinal_extractor.extract(source)
        if len(ers) != 1:
            return result

        suffix = source
        source_unit = ''
        er = ers[0]
        pr = self.config.number_parser.parse(er)
        no_num = source[pr.start + pr.length:].strip().lower()
        match = regex.search(self.config.followed_unit, no_num)

        if match is not None:
            suffix = RegExpUtility.get_group(match, Constants.SUFFIX_GROUP_NAME)
            source_unit = RegExpUtility.get_group(match, Constants.UNIT)

        if source_unit not in self.config.unit_map:
            return result

        num = float(pr.value) + self.parse_number_with_unit_and_suffix(suffix)
        unit = self.config.unit_map[source_unit]

        num = QueryProcessor.float_or_int(num)
        is_time = 'T' if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
github delph-in / pydelphin / delphin / repp.py View on Github external
directory (str, optional): the directory in which to search
                for submodules
        """
        path = Path(path).expanduser()
        if not path.is_file():
            raise REPPError(f'REPP config file not found: {path!s}')
        confdir = path.parent

        # TODO: can TDL parsing be repurposed for this variant?
        conf = path.read_text(encoding='utf-8')
        conf = re.sub(r';.*', '', conf).replace('\n', ' ')
        m = re.search(
            r'repp-modules\s*:=\s*((?:[-\w]+\s+)*[-\w]+)\s*\.', conf)
        t = re.search(
            r'repp-tokenizer\s*:=\s*([-\w]+)\s*\.', conf)
        a = re.search(
            r'repp-calls\s*:=\s*((?:[-\w]+\s+)*[-\w]+)\s*\.', conf)
        # f = re.search(
        #     r'format\s*:=\s*(\w+)\s*\.', conf)
        d = re.search(
            r'repp-directory\s*:=\s*(.*)\.\s*$', conf)

        if m is None:
            raise REPPError('repp-modules option must be set')
        if t is None:
            raise REPPError('repp-tokenizer option must be set')

        # mods = m.group(1).split()
        tok = t.group(1).strip()
        active = a.group(1).split() if a is not None else None
        # fmt = f.group(1).strip() if f is not None else None