How to use bs4 - 10 common examples

To help you get started, we’ve selected a few bs4 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NikolaiT / GoogleScraper / tests / threadtests.py View on Github external
'start': self.gsearch['n_res_page']*self.gsearch['n_page'], # the offset to the search results. page number = (start / num) + 1
            'pws': '0'      # personalization turned off by default
        }
        headers = {
            'User-Agent': 'Mozilla/5.0',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'close',
            'DNT': '1'
        }
        print("Initiating search with params={}".format(params))
        r = requests.get('http://www.google.com/search', params=params, headers=headers)
        html = r.text
        # Try to parse the google HTML llresult using lxml
        try:
            doc = UnicodeDammit(html, is_html=True)
            parser = lxml.html.HTMLParser(encoding=doc.declared_html_encoding)
            dom = lxml.html.document_fromstring(html, parser=parser)
            dom.resolve_base_href()
        except Exception as e:
            print('Some error occurred while lxml tried to parse: {}'.format(e.msg))
            return False

        try:
            res = dom.xpath(HTMLTranslator().css_to_xpath('div#resultStats'))[0].text_content()
            print("Got number of results: `{}` for query {}".format(res, self.gsearch['query']))
        except Exception as e:
            print(e.msg)
github CloudBotIRC / CloudBot / lib / bs4 / testing.py View on Github external
def test_xml_declaration_becomes_comment(self):
        markup = ''
        soup = self.soup(markup)
        self.assertTrue(isinstance(soup.contents[0], Comment))
        self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
        self.assertEqual("html", soup.contents[0].next_element.name)
github shobrook / overcast / testing / env / lib / python3.5 / site-packages / bs4 / element.py View on Github external
if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, str):
                        val = str(val)
                    elif (
                        isinstance(val, AttributeValueWithCharsetSubstitution)
                        and eventual_encoding is not None):
                        val = val.encode(eventual_encoding)

                    text = self.format_string(val, formatter)
                    decoded = (
                        str(key) + '='
                        + EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
        closeTag = ''

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '' % (prefix, self.name)

        pretty_print = self._should_pretty_print(indent_level)
        space = ''
        indent_space = ''
github linkedin / qark / qark / lib / bs4 / testing.py View on Github external
def assertDoctypeHandled(self, doctype_fragment):
        """Assert that a given doctype string is handled correctly."""
        doctype_str, soup = self._document_with_doctype(doctype_fragment)

        # Make sure a Doctype object was created.
        doctype = soup.contents[0]
        self.assertEqual(doctype.__class__, Doctype)
        self.assertEqual(doctype, doctype_fragment)
        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)

        # Make sure that the doctype was correctly associated with the
        # parse tree and that the rest of the document parsed.
        self.assertEqual(soup.p.contents[0], 'foo')
github evilhero / mylar / lib / bs4 / testing.py View on Github external
def assertDoctypeHandled(self, doctype_fragment):
        """Assert that a given doctype string is handled correctly."""
        doctype_str, soup = self._document_with_doctype(doctype_fragment)

        # Make sure a Doctype object was created.
        doctype = soup.contents[0]
        self.assertEqual(doctype.__class__, Doctype)
        self.assertEqual(doctype, doctype_fragment)
        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)

        # Make sure that the doctype was correctly associated with the
        # parse tree and that the rest of the document parsed.
        self.assertEqual(soup.p.contents[0], 'foo')
github jtackaberry / stagehand / external / bs4 / testing.py View on Github external
def test_soupstrainer(self):
        """Parsers should be able to work with SoupStrainers."""
        strainer = SoupStrainer("b")
        soup = self.soup("A <b>bold</b>  <i>statement</i>",
                         parse_only=strainer)
        self.assertEqual(soup.decode(), "<b>bold</b>")
github openembedded / bitbake / lib / bs4 / testing.py View on Github external
def test_soupstrainer(self):
        """Parsers should be able to work with SoupStrainers."""
        strainer = SoupStrainer("b")
        soup = self.soup("A <b>bold</b>  <i>statement</i>",
                         parse_only=strainer)
        self.assertEqual(soup.decode(), "<b>bold</b>")
github pde / tosback2-data / web-frontend / beautifulsoup4 / bs4 / testing.py View on Github external
def test_soupstrainer(self):
        """Parsers should be able to work with SoupStrainers."""
        strainer = SoupStrainer("b")
        soup = self.soup("A <b>bold</b>  <i>statement</i>",
                         parse_only=strainer)
        self.assertEqual(soup.decode(), "<b>bold</b>")