How to use serpextract - 10 common examples

To help you get started, we’ve selected a few serpextract examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Parsely / serpextract / tests / test_serps.py View on Github external
def test_custom_parser_implicit(self):
        from serpextract.serpextract import _get_search_engines, _engines
        self.assertInvalidSERP(self.custom_serp_url)
        add_custom_parser(u'search.piccshare.com', self.custom_parser)
        self.assertValidSERP(self.custom_serp_url,
                             self.custom_parser.engine_name,
                             u'test')
        del _engines[u'search.piccshare.com']
github Parsely / serpextract / tests / test_serps.py View on Github external
def assertInvalidSERP(self, url, **kwargs):
        self.assertIsNone(extract(url, **kwargs))
        self.assertFalse(is_serp(url, **kwargs))
github Parsely / serpextract / tests / test_serps.py View on Github external
def assertValidSERP(self, url, expected_engine_name, expected_keyword, **kwargs):
        # Test both the URL and a parsed URL version
        for url in (url, urlparse(url)):
            res = extract(url, **kwargs)
            self.assertEqual(res.keyword, expected_keyword)
            self.assertEqual(res.engine_name, expected_engine_name)
            self.assertTrue(is_serp(url, **kwargs))
github Parsely / serpextract / tests / test_serps.py View on Github external
def assertInvalidSERP(self, url, **kwargs):
        self.assertIsNone(extract(url, **kwargs))
        self.assertFalse(is_serp(url, **kwargs))
github Parsely / serpextract / tests / test_serps.py View on Github external
def assertValidSERP(self, url, expected_engine_name, expected_keyword, **kwargs):
        # Test both the URL and a parsed URL version
        for url in (url, urlparse(url)):
            res = extract(url, **kwargs)
            self.assertEqual(res.keyword, expected_keyword)
            self.assertEqual(res.engine_name, expected_engine_name)
            self.assertTrue(is_serp(url, **kwargs))
github Parsely / serpextract / tests / test_helpers.py View on Github external
def test_serp_query_string(self):
        serp_query_string = serpextract._serp_query_string
        url = 'http://www.something.com/?a=1#b=2'
        expected = 'a=1&b=2'
        parts = urlparse(url)
        self.assertEqual(serp_query_string(parts), expected)
github Parsely / serpextract / tests / test_helpers.py View on Github external
def test_is_url_without_path_query_or_fragment(self):
        is_url_without_path_query_or_fragment = \
            serpextract._is_url_without_path_query_or_fragment
        results = (
            ('http://www.something.com', True),
            ('http://www.something.com/', True),
            ('http://www.something.com/path', False),
            ('http://www.something.com/?query=true', False),
            ('http://www.something.com/#fragment', False),
            ('http://www.something.com/path?query=True#fragment', False),
        )

        for url, expected in results:
            parts = urlparse(url)
            actual = is_url_without_path_query_or_fragment(parts)
            self.assertEqual(actual, expected)
github Parsely / serpextract / tests / test_serps.py View on Github external
def setUp(self):
        self.custom_serp_url = 'http://search.piccshare.com/search.php?cat=web&channel=main&hl=en&q=test'
        self.custom_parser = SearchEngineParser(u'PiccShare', u'q',
                                                u'/search.php?q={k}', u'utf-8')
github Parsely / serpextract / tests / test_serps.py View on Github external
def test_get_all_query_params(self):
        """Ensure that get_all_query_params is a non-empty list."""
        params = get_all_query_params()
        self.assertIsInstance(params, list)
        self.assertGreater(len(params), 0)
github Parsely / serpextract / tests / test_serps.py View on Github external
def test_get_query_params_by_domain(self):
        """ make sure that individual subdomains are enumerated properly """
        params_by_domain = get_all_query_params_by_domain()
        google_params = [u'q', u'query']
        bing_params = [u'Q', u'q']
        baidu_params = [u'kw', u'wd', u'word']
        yahoo_params = [u'p', u'q', u'va']
        so_net_params = [u'kw', u'query']
        goo_ne_jp_params = [u'MT']
        t_online_params = [u'q']
        self.assertEqual(params_by_domain['google.com'], google_params)
        self.assertEqual(params_by_domain['google.de'], google_params)
        self.assertEqual(params_by_domain['google.co.uk'], google_params)
        self.assertEqual(params_by_domain['baidu.com'], baidu_params)
        self.assertEqual(params_by_domain['bing.com'], bing_params)
        self.assertEqual(params_by_domain['yahoo.com'], yahoo_params)
        self.assertEqual(params_by_domain['so-net.ne.jp'], so_net_params)
        self.assertEqual(params_by_domain['goo.ne.jp'], goo_ne_jp_params)
        self.assertEqual(params_by_domain['t-online.de'], t_online_params)