How to use the serpextract.serpextract.ExtractResult function in serpextract

To help you get started, we’ve selected a few serpextract examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Parsely / serpextract / serpextract / serpextract.py View on Github external
if url_parts.fragment:
                path_with_query_and_frag += '#{}'.format(url_parts.fragment)
            for path in self.hidden_keyword_paths:
                if not isinstance(path, string_types):
                    if path.search(path_with_query_and_frag):
                        keyword = False
                        break
                elif path == path_with_query_and_frag:
                    keyword = False
                    break

        if keyword is not None:
            # Replace special placeholder with blank string
            if keyword is False:
                keyword = ''
            return ExtractResult(engine_name, keyword, self)
github Parsely / serpextract / serpextract / serpextract.py View on Github external
result = None
    if parser is None:
        parser = get_parser(url_parts)

    if parser is None:
        if not use_naive_method:
            return None  # Tried to get keyword from non SERP URL

        # Try to use naive method of detection
        if _naive_re.search(url_parts.netloc):
            query = _unicode_parse_qs(url_parts.query, keep_blank_values=True)
            for param in _naive_params:
                if param in query:
                    tld_res = tldextract.extract(url_parts.netloc)
                    return ExtractResult(tld_res.domain,
                                         query[param][0],
                                         None)

        return None  # Naive method could not detect a keyword either

    result = parser.parse(url_parts)

    if result is None:
        return None

    if lower_case:
        result.keyword = result.keyword.lower()
    if trimmed:
        result.keyword = result.keyword.strip()
    if collapse_whitespace:
        result.keyword = re.sub(r'\s+', ' ', result.keyword, re.UNICODE)
github Parsely / serpextract / serpextract / serpextract.py View on Github external
keyword = ' '.join(keys).strip()

        if engine_name == 'Google':
            # Check for usage of Google's top bar menu
            tbm = query.get('tbm', [None])[0]
            if tbm == 'isch':
                engine_name = 'Google Images'
            elif tbm == 'vid':
                engine_name = 'Google Video'
            elif tbm == 'shop':
                engine_name = 'Google Shopping'

        if keyword is not None:
            # Edge case found a keyword, exit quickly
            return ExtractResult(engine_name, keyword, self)

        # Otherwise we keep looking through the defined extractors
        for extractor in self.keyword_extractor:
            if not isinstance(extractor, string_types):
                # Regular expression extractor
                match = extractor.search(url_parts.path)
                if match:
                    keyword = match.group(1)
                    break
            else:
                # Search for keywords in query string
                if extractor in query:
                    # Take the last param in the qs because it should be the
                    # most recent
                    keyword = query[extractor][-1]