How to use the tldextract.tldextract.TLDExtract function in tldextract

To help you get started, we’ve selected a few tldextract examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

john-kurkowski / tldextract / tldextract / cli.py View on Github

prog='tldextract',
        description='Parse hostname from a url or fqdn')

    parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)  # pylint: disable=no-member
    parser.add_argument('input', metavar='fqdn|url',
                        type=unicode, nargs='*', help='fqdn or url')

    parser.add_argument('-u', '--update', default=False, action='store_true',
                        help='force fetch the latest TLD definitions')
    parser.add_argument('-c', '--cache_file',
                        help='use an alternate TLD definition file')
    parser.add_argument('-p', '--private_domains', default=False, action='store_true',
                        help='Include private domains')

    args = parser.parse_args()
    tld_extract = TLDExtract(include_psl_private_domains=args.private_domains)

    if args.cache_file:
        tld_extract.cache_file = args.cache_file

    if args.update:
        tld_extract.update(True)
    elif not args.input:
        parser.print_usage()
        sys.exit(1)
        return

    for i in args.input:
        print(' '.join(tld_extract(i)))  # pylint: disable=superfluous-parens

robbielynch / RoblySearch / tldextract / tldextract.py View on Github

if sys.version_info &lt; (3,):
                    sys.stderr.write(line.encode('utf-8') + "\n")
                else:
                    sys.stderr.write(line + "\n")

        if self.cache_file:
            try:
                with open(self.cache_file, 'wb') as f:
                    pickle.dump(tlds, f)
            except IOError as e:
                LOG.warn("unable to cache TLDs in file %s: %s", self.cache_file, e)

        self._extractor = _PublicSuffixListTLDExtractor(tlds)
        return self._extractor

TLD_EXTRACTOR = TLDExtract()

@wraps(TLD_EXTRACTOR.__call__)
def extract(url):
    return TLD_EXTRACTOR(url)

@wraps(TLD_EXTRACTOR.update)
def update(*args, **kwargs):
    return TLD_EXTRACTOR.update(*args, **kwargs)

def get_tlds_from_raw_suffix_list_data(suffix_list_source):
    tld_finder = re.compile(r'^(?P[.*!]*\w[\S]*)', re.UNICODE | re.MULTILINE)
    tld_iter = (m.group('tld') for m in tld_finder.finditer(suffix_list_source))
    return frozenset(tld_iter)

def fetch_file(urls):
    """ Decode the first successfully fetched URL, from UTF-8 encoding to

john-kurkowski / tldextract / tldextract / tldextract.py View on Github

LOG.debug('computed TLD diff:\n' + '\n'.join(difflib.unified_diff(
                snapshot,
                new,
                fromfile=".tld_set_snapshot",
                tofile=self.cache_file
            )))

        if self.cache_file:
            try:
                with open(self.cache_file, 'w') as cache_file:
                    json.dump(tlds, cache_file)
            except IOError as ioe:
                LOG.warning("unable to cache TLDs in file %s: %s", self.cache_file, ioe)


TLD_EXTRACTOR = TLDExtract()


@wraps(TLD_EXTRACTOR.__call__)
def extract(url):
    return TLD_EXTRACTOR(url)


@wraps(TLD_EXTRACTOR.update)
def update(*args, **kwargs):
    return TLD_EXTRACTOR.update(*args, **kwargs)


def get_tlds_from_raw_suffix_list_data(suffix_list_source, include_psl_private_domains=False):
    if include_psl_private_domains:
        text = suffix_list_source
    else:

How to use the tldextract.tldextract.TLDExtract function in tldextract

To help you get started, we’ve selected a few tldextract examples, based on popular ways it is used in public projects.

tldextract

Package Health Score

Popular tldextract functions

Similar packages