How to use the regex.search function in regex

To help you get started, we’ve selected a few regex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / Recognizers-Text / Python / libraries / recognizers-date-time / recognizers_date_time / date_time / base_duration.py View on Github external
def parse_number_space_unit(self, source: str) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()

        # if there are spaces between number and unit
        ers = self.config.cardinal_extractor.extract(source)
        if len(ers) != 1:
            return result

        suffix = source
        source_unit = ''
        er = ers[0]
        pr = self.config.number_parser.parse(er)
        no_num = source[pr.start + pr.length:].strip().lower()
        match = regex.search(self.config.followed_unit, no_num)

        if match is not None:
            suffix = RegExpUtility.get_group(match, Constants.SUFFIX_GROUP_NAME)
            source_unit = RegExpUtility.get_group(match, Constants.UNIT)

        if source_unit not in self.config.unit_map:
            return result

        num = float(pr.value) + self.parse_number_with_unit_and_suffix(suffix)
        unit = self.config.unit_map[source_unit]

        num = QueryProcessor.float_or_int(num)
        is_time = 'T' if self.is_less_than_day(unit) else ''
        result.timex = f'P{is_time}{num}{unit[0]}'
        result.future_value = QueryProcessor.float_or_int(
            num * self.config.unit_value_map[source_unit])
github delph-in / pydelphin / delphin / repp.py View on Github external
directory (str, optional): the directory in which to search
                for submodules
        """
        path = Path(path).expanduser()
        if not path.is_file():
            raise REPPError(f'REPP config file not found: {path!s}')
        confdir = path.parent

        # TODO: can TDL parsing be repurposed for this variant?
        conf = path.read_text(encoding='utf-8')
        conf = re.sub(r';.*', '', conf).replace('\n', ' ')
        m = re.search(
            r'repp-modules\s*:=\s*((?:[-\w]+\s+)*[-\w]+)\s*\.', conf)
        t = re.search(
            r'repp-tokenizer\s*:=\s*([-\w]+)\s*\.', conf)
        a = re.search(
            r'repp-calls\s*:=\s*((?:[-\w]+\s+)*[-\w]+)\s*\.', conf)
        # f = re.search(
        #     r'format\s*:=\s*(\w+)\s*\.', conf)
        d = re.search(
            r'repp-directory\s*:=\s*(.*)\.\s*$', conf)

        if m is None:
            raise REPPError('repp-modules option must be set')
        if t is None:
            raise REPPError('repp-tokenizer option must be set')

        # mods = m.group(1).split()
        tok = t.group(1).strip()
        active = a.group(1).split() if a is not None else None
        # fmt = f.group(1).strip() if f is not None else None
github neulab / cmu-ner / utils / segnerfts / segnerfts.py View on Github external
'deu': lambda ws: [any([
        re.search('[rR]epublik$', w),
        re.search('land$', w),
        re.search('stan$', w),
        re.search('[sS]tadt$', w),
        re.search('heim$', w),
        re.search('dorf$', w),
        re.search('hausen$', w),
        re.search('burg$', w),
        re.search('berg$', w),
        re.search('gau$', w),
        re.search('[pP]rovinz$', w)
    ]) for w in ws],
    'amh': lambda ws: [w in {
github sebix / python-textile / textile / core.py View on Github external
out = []

        for line in text:
            # the line is just whitespace, add it to the output, and move on
            if not line.strip():
                if not eat_whitespace:
                    out.append(line)
                continue

            eat_whitespace = False

            pattern = (r'^(?P{0})(?P{1}{2})\.(?P\.?)'
                    r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
                        align_re_s, cls_re_s))
            match = re.search(pattern, line, flags=re.S | re.U)
            # tag specified on this line.
            if match:
                # if we had a previous extended tag but not this time, close up
                # the tag
                if ext and out:
                    # it's out[-2] because the last element in out is the
                    # whitespace that preceded this line
                    if not escaped:
                        content = encode_html(out[-2], quotes=True)
                        escaped = True
                    else:
                        content = out[-2]

                    if not multiline_para:
                        content = generate_tag(block.inner_tag, content,
                                block.inner_atts)</content></cite>
github microsoft / Recognizers-Text / Python / libraries / recognizers-date-time / recognizers_date_time / date_time / french / time_parser_config.py View on Github external
def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
        delta_min = 0
        trimmed_prefix = prefix.strip().lower()

        if trimmed_prefix.endswith('demie'):
            delta_min = 30
        elif trimmed_prefix.endswith('un quart') or trimmed_prefix.endswith('quart'):
            delta_min = 15
        elif trimmed_prefix.endswith('trois quarts'):
            delta_min = 45
        else:
            match = regex.search(self.less_than_one_hour, trimmed_prefix)
            if match:
                min_str = RegExpUtility.get_group(match, 'deltamin')
                if min_str:
                    delta_min = int(min_str)
                else:
                    min_str = RegExpUtility.get_group(
                        match, 'deltaminnum').lower()
                    delta_min = self.numbers.get(min_str)

        if trimmed_prefix.endswith('à'):
            delta_min = delta_min * -1

        adjust.minute += delta_min

        if adjust.minute &lt; 0:
            adjust.minute += 60
github abuccts / wikt2pron / pywiktionary / IPA / cmn_pron.py View on Github external
return ""
    text = re.sub(
        unicodedata.normalize("NFD", "ü"),
        "ü",
        re.sub(
            unicodedata.normalize("NFD", "ê"),
            "ê",
            unicodedata.normalize("NFD", text)
        )
    )
    if re.search(
            "[aeiouêü]" + tones + "[aeiou]?[aeiouêü]" + tones + "",
            text.lower()):
        return ""
    text = text.lower()
    if not re.search(tones, text) and re.match("[1-5]", text):
        return re.sub("(\d)(\p{Ll})", "\1 \2", text)
    if re.search("[一不,.?]", text):
        text = re.sub(
            "([一不])$",
            lambda x: " yī" if x.group() == "一" else " bù",
            text
        )
        text = re.sub("([一不])", r" \1 ", text)
        text = re.sub("([,.?])", r" \1 ", text)
        text = re.sub(" +", " ", text)
        text = re.sub("^ ", "", text)
        text = re.sub(" $", "", text)
        text = re.sub("\. \. \.", "...", text)
    text = re.sub("['\-]", " ", text)
    text = re.sub(
        "([aeiouêü]" + tones + "?n?g?r?)([bpmfdtnlgkhjqxzcsywr]h?)",
github neulab / cmu-ner / utils / segnerfts_2.py View on Github external
'deu': lambda ws: [any([
        re.search('[rR]epublik$', w),
        re.search('land$', w),
        re.search('stan$', w),
        re.search('[sS]tadt$', w),
        re.search('heim$', w),
        re.search('dorf$', w),
        re.search('hausen$', w),
        re.search('burg$', w),
        re.search('berg$', w),
        re.search('gau$', w),
        re.search('[pP]rovinz$', w)
    ]) for w in ws],
    'amh': lambda ws: [w in {
github parklab / MosaicForecast / Phase.py View on Github external
elif int(pileupcolumn.pos)==int(pos)-1 and str(querybase)==str(minor_allele): #and pileupread.alignment.mapping_quality&gt;=10:
								minor_ids.append(pileupread.alignment.query_name)
								minor_num+=1
						except:
							continue
#			elif len(major_allele)&gt;1 and len(minor_allele)==1:
			elif len(major_allele)&gt; len(minor_allele):
				state="DEL"
				#context1[name]=reference[chrom][int(pos)-2:int(pos)+1]
				context1=reference[chrom][max(1,int(pos)-11):min(int(pos)+1,int(chr_sizes[chrom]))]
				context2=reference[chrom][max(1,int(pos)-1):min(int(pos)+10,int(chr_sizes[chrom]))]
				context=reference[chrom][max(1,int(pos)-11):min(int(pos)+10,int(chr_sizes[chrom]))]
	
				if_homopolymer="No"
				for item in homopolymers:
					if re.search(str(item), str(context1)) or re.search(str(item),str(context2)):
						if_homopolymer="Yes"
						break
				if if_homopolymer=="No":
					for read in a.fetch(chrom,start-length, end+length):
						try:
							#if read.cigar[0][0]==4 and read.cigar[0][1]&lt;=length and read.reference_start&gt;= pos-1 and read.reference_start-read.query_alignment_start&lt; pos-1:
							if (read.cigar[0][0]==4 or read.cigar[0][0]==5) and read.reference_start&gt;= pos-2 and read.reference_start-read.query_alignment_start&lt; pos-1:
								query_clipped = read.query_sequence[:read.query_alignment_start][:length]
								if re.search(query_clipped, major_allele):
									minor_ids.append(read.query_name)
									minor_num+=1
							#elif read.cigar[-1][0]==4 and read.cigar[-1][1]&lt;=length and read.reference_end &lt;= pos-1 and (read.reference_end + read.query_length-read.query_alignment_end&gt;pos-1):
							elif (read.cigar[-1][0]==4 or read.cigar[-1][0]==5) and read.reference_end &lt;= pos and (read.reference_end + read.query_length-read.query_alignment_end&gt;pos-1):
								query_clipped = read.query_sequence[read.query_alignment_end:][-length:]
								if re.search(query_clipped, major_allele):
									minor_ids.append(read.query_name)
github andreikop / enki / enki / plugins / preview / approx_match.py View on Github external
def regexFuzzySearch(
  # Text to search for. It is NOT treated as a regex.
  searchText,
  # Text in which to find the searchText
  targetText):

    # Escape any characters in searchText that would be treated as a regexp.
    searchText = regex.escape(searchText)
    # The regex_ library supports fuzzy matching. Quoting from the manual:
    #
    # - ``(item){e}`` means perform a fuzzy match of the given ``item``,
    #   allowing insertions, deletions, or substitutions.
    # - The BESTMATCH flag searches for the best possible match, rather than the
    #   match found first.
    return regex.search('(' + searchText + '){e}', targetText, regex.BESTMATCH)
github microsoft / Recognizers-Text / Python / libraries / recognizers-date-time / recognizers_date_time / date_time / base_datetime.py View on Github external
if not parse_result1.value or not parse_result2.value:
            return result

        future_date: datetime = parse_result1.value.future_value
        past_date: datetime = parse_result1.value.past_value
        time: datetime = parse_result2.value.future_value

        hour = time.hour
        minute = time.minute
        second = time.second

        # handle morning, afternoon
        if regex.search(self.config.pm_time_regex, source) and hour &lt; 12:
            hour += 12
        elif regex.search(self.config.am_time_regex, source) and hour &gt;= 12:
            hour -= 12

        time_str = parse_result2.timex_str
        if time_str.endswith(Constants.AM_PM_GROUP_NAME):
            time_str = time_str[:-4]

        time_str = f'T{hour:02d}{time_str[3:]}'
        result.timex = parse_result1.timex_str + time_str

        val = parse_result2.value

        has_am_pm = regex.search(self.config.pm_time_regex, source) and regex.search(
            self.config.am_time_regex, source)
        if hour &lt;= 12 and not has_am_pm and val.comment:
            result.comment = Constants.AM_PM_GROUP_NAME