How to use the regex.split function in regex

To help you get started, we’ve selected a few regex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github cbuijs / unbound-dns-filter / unbound-dns-filter.py View on Github external
elif is_ip6.search(eip):
            prefix = '128'

    if prefix:
        prefix = int(prefix)
        if is_ip4.search(eip):
            if prefix in (8, 16, 24, 32):
                revip = '.'.join(eip.split('.')[0:int(prefix / 8)][::-1]) + '.in-addr.arpa.'
            elif delimiter:
                octs = eip.split('.')[::-1]
                octs[3 - int(prefix / 8)] = octs[3 - int(prefix / 8)] + delimiter + str(prefix)
                revip = '.'.join(octs[3 - int(prefix / 8):]) + '.in-addr.arpa.'

        elif is_ip6.search(eip):
            if prefix in (4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128):
                revip = '.'.join(filter(None, regex.split('(.)', regex.sub(':', '', eip))))[0:(prefix / 4) * 2][::-1].strip('.') + '.ip6.arpa.'
            elif delimiter:
                nibs = filter(None, regex.split('(.)', regex.sub(':', '', eip)))[::-1]
                nibs[31 - int(prefix / 4)] = nibs[31 - int(prefix /4)] + delimiter + str(prefix)
                revip = '.'.join(nibs[31 - int(prefix /4):]) + '.ip6.arpa.'

    return revip
github scrapinghub / dateparser / dateparser / freshness_date_parser.py View on Github external
def _are_all_words_units(self, date_string):
        skip = [_UNITS,
                r'ago|in|\d+',
                r':|[ap]m']

        date_string = re.sub(r'\s+', ' ', date_string.strip())

        words = filter(lambda x: x if x else False, re.split(r'\W', date_string))
        words = filter(lambda x: not re.match(r'%s' % '|'.join(skip), x), words)
        return not list(words)
github YangVincent / SmartContractTester / Flask / app / oyente.py View on Github external
def parse_output(self, result):

        info = []
        errors = []
        lines = result.split('INFO:symExec:')
        start_marker = '============ Results ==========='
        end_marker = '====== Analysis Completed ======'
        scanning = False
        for line in lines[:10]:
            if start_marker in line:     
                scanning = True
                continue

            s = re.split(r'([a-zA-Z]+.sol:\d+:\d+:)', line)
            if len(s) == 1 and scanning:
                if end_marker in line:
                    scanning = False
                    continue

                row_res = line.split(':')
                for i in range(len(row_res)):
                    row_res[i] = row_res[i].strip()
                info.append(tuple(row_res))

        for line in lines[10:]:
            line = line.strip()
            s = re.split(r'([a-zA-Z]+.sol:\d+:\d+:)', line)
            if len(s) > 1:
                for i in range(1, len(s)-1, 2):
                    errors.append((s[i], s[i+1]))
github jamesmeneghello / pynab / pynab / api.py View on Github external
if offset and int(offset) > 0:
                    offset = int(offset)
                else:
                    offset = 0

            except Exception as e:
                # normally a try block this long would make me shudder
                # but we don't distinguish between errors, so it's fine
                log.error('Incorrect API Parameter or parsing error: {}'.format(e))
                return api_error(201)

            search_terms = request.query.q or None
            if search_terms:
                # we're searching specifically for a show or something
                if search_terms:
                    for term in regex.split('[ \.]', search_terms):
                        query = query.filter(Release.search_name.ilike('%{}%'.format(term)))

            if config.api.get('postprocessed_only', False):
                query = query.filter(Release.passworded!='UNKNOWN')

            query = query.order_by(Release.posted.desc())

            query = query.limit(limit)
            query = query.offset(offset)

            total = query.count()
            results = query.all()

            dataset['releases'] = results
            dataset['offset'] = offset
            dataset['total'] = total
github TaoMiner / JointTextKgLearning / preprocess2.py View on Github external
if items[0]!="":
            ent_dic.add(items[0])
    print("#%d anchors." % (line_count/2))
    print("#%d unique entities." % len(ent_dic))

with codecs.open(kg_file, 'r', encoding='UTF-8') as fin_kg:
    with codecs.open(kg_sample, 'w', encoding='UTF-8') as fout_kg:
	line_count = 0
        tmp_kg = []
        for line in fin_kg:
	    tmp_line = []
	    line = line.lower()
	    line_count += 1
	    if line_count%100000 ==0:
		print("has processed: %d entities." % line_count)
            nodes = re.split(r'\t\t', line)
	    if nodes[0] in ent_dic:
		tmp_line.append(nodes[0])
		for _ in re.split(r';', nodes[1]):
                    if _!="" and _ in ent_dic:
			tmp_line.append(_)
		if len(tmp_line)>1:
		    tmp_kg.append(tmp_line[0]+"\t\t"+";".join(tmp_line[1:])+"\n")
		    if len(tmp_kg)>=10000:
                	fout_kg.writelines(tmp_kg)
                	del tmp_kg[:]
	if len(tmp_kg)>0:
	    fout_kg.writelines(tmp_kg)
github cbuijs / unbound-dns-filter / unbound-dns-filter.py View on Github external
log_info('{0}-KARMA-SCORE: {1} = {2}'.format(valuetype, testvalue, karmascore))

        # Check against domain
        if is_dom.search(testvalue):
            if check_dom(valuetype, testvalue, wl_dom, 'WHITELIST'): # Whitelisted
                return False

            elif check_dom(valuetype, testvalue, bl_dom, 'BLACKLIST'): # Blacklisted
                return True

            # Check if Domain is a rev-arpa domain, if it is, check its IP value
            ip = False
            if ip4arpa.search(testvalue):
                ip = '.'.join(testvalue.strip('.').split('.')[0:4][::-1]) # IPv4
            elif ip6arpa.search(testvalue):
                ip = ':'.join(filter(None, regex.split('(.{4,4})', ''.join(testvalue.strip('.').split('.')[0:32][::-1])))) # IPv6

            if ip:
                checkip = True
                testvalue = ip

    # Check against IP4
    if checkip and is_ip4.search(testvalue):
        # Check if IPv4 is whitelisted
        if check_ip(valuetype, testvalue, orgtestvalue, wl_ip4, 'WHITELIST', False):
            return False
        # Check if IPv4 is blacklisted
        elif check_ip(valuetype, testvalue, orgtestvalue, bl_ip4, 'BLACKLIST', True):
            return True

    # Check against IP6
    elif checkip and is_ip6.search(testvalue):
github parklab / MosaicForecast / old / mosaicpc / cli / phase.py View on Github external
MT2_phasing_num[mosaic_name]['doubt']=MT2_phasing_num[mosaic_name].get('doubt',0)
            for i in range(0,len(v)):
                inforSNPs_pos_list.append(int(inforSNPs[k][i].split(';')[2]))
                inforSNPs_alleles_list.append([inforSNPs[k][i].split(';')[3],inforSNPs[k][i].split(';')[4]])
            samfile=pysam.AlignmentFile(bam_dir+"/"+sample+".bam", "rb")
            M=defaultdict(dict)
            for read in samfile.fetch(chr, min(inforSNPs_pos_list),max(inforSNPs_pos_list)):
                readID=read.query_name
                for i in range(0,len(v)):
                    M[readID][str(i)]=M[readID].get(str(i),".")
                    try:
                        if (int(inforSNPs[k][i].split(';')[2])-1 > int(read.reference_start)) and (int(inforSNPs[k][i].split(';')[2])-1 =distance:
                                    break
                            distance=distance-int(offset)
github MalwareCantFly / Vba2Graph / vba2graph.py View on Github external
networkx.DiGraph: Directed Graph with highlighted Change triggers
    """
    # Find all the all the objects that have a _Change event
    # like TextBox1_Change
    changed_objects = []
    for func_name in vba_func_dict:
        if "_Change" in func_name:
            changed_object = func_name.replace("_Change", "")
            changed_objects.append(changed_object)

    # Find  pieces of code that assign to an object, which would
    # cause a _Change event Trigger
    for func_name in vba_func_dict:
        func_code = vba_func_dict[func_name]
        # split function code into lines
        func_code_lines = filter(None, re.split("\n", func_code))
        for func_line in func_code_lines:
            for changed_object in changed_objects:
                # look for .[changed_object] pattern, followd by "="
                found_loc = func_line.find("." + changed_object)
                if found_loc > -1:
                    if func_line.find("=", found_loc) > -1:
                        # we found object with Change event that was assigned a value

                        # show this connection as a function call
                        DG.add_edge(func_name, changed_object + "_Change", label="Triggers", fontcolor=color_scheme["COLOR_TRIGGERED_CALL_EDGE"])
    return DG
github MalwareCantFly / Vba2Graph / vba2graph.py View on Github external
def find_keywords_in_graph(vba_func_dict, DG):
    """Find and highlight possible malicious keywords in graph
    
    Args:
        vba_func_dict (dict[func_name]=func_code): Functions dictionary
        DG (networkx.DiGraph): Generated directed graph
    
    Returns:
        networkx.DiGraph: Directed Graph with keywords highlighted in red
    """
    # analyze function calls
    for func_name in vba_func_dict:

        func_code = vba_func_dict[func_name]
        # split function code into lines
        func_code_lines = filter(None, re.split("\n", func_code))

        # handle malicious keywords
        keywords_re_sensetive = "(" + ")|(".join(lst_mal_case_sensetive) + ")"
        keywords_re_insensetive = "(" + ")|(".join(lst_mal_case_insensetive) + ")"

        # iterate over all the words in func_code and match mal_regexes
        dict_items = {}
        for token in func_code_lines:
            match_findall_sensetive = re.findall(keywords_re_sensetive, token)
            match_findall_insensetive = re.findall(keywords_re_insensetive, token, re.IGNORECASE)
            match_findall = match_findall_sensetive + match_findall_insensetive
            if match_findall:
                for match in match_findall:
                    match_list = list(match)

                    # use dictionary dict_items to count occurances of keywords
github microsoft / Recognizers-Text / Python / libraries / recognizers-number / recognizers_number / number / cjk_parsers.py View on Github external
if any(x for x in ['k', 'K', 'k', 'K'] if x in double_text):
                power = 1000
            elif any(x for x in ['M', 'ï¼­'] if x in double_text):
                power = 1000000
            elif any(x for x in ['G', 'G'] if x in double_text):
                power = 1000000000
            elif any(x for x in ['T', 'ï¼´'] if x in double_text):
                power = 1000000000000
            result.value = self.get_digit_value(double_text, power)

        else:
            double_match = regex.search(
                self.config.percentage_regex, source_text)
            double_text = self.replace_unit(double_match.group())

            split_result = regex.split(self.config.point_regex, double_text)
            if split_result[0] == '':
                split_result[0] = self.config.zero_char

            double_value = self.get_int_value(split_result[0])
            if len(split_result) == 2:
                if regex.search(self.config.negative_number_sign_regex, split_result[0]) is not None:
                    double_value -= self.get_point_value(split_result[1])
                else:
                    double_value += self.get_point_value(split_result[1])
            result.value = double_value

        result.resolution_str = self.__format(result.value) + '%'
        return result