How to use the extruct.tool.metadata_from_url function in extruct

To help you get started, we’ve selected a few extruct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / extruct / tests / test_tool.py View on Github external
def test_metadata_from_url_all_types(self, mock_get):
        expected = self.expected
        expected['url'] = self.url
        expected['status'] = '200 OK'
        mock_response = build_mock_response(
            url=self.url,
            content=get_testdata('songkick', 'tovestyrke.html'),
        )
        mock_get.return_value = mock_response

        data = metadata_from_url(self.url)
        self.assertEqual(data, expected)
github scrapinghub / extruct / tests / test_tool.py View on Github external
def test_metadata_from_url_jsonld_only(self, mock_get):
        expected = {
            'json-ld': self.expected['json-ld'],
            'url': self.url,
            'status': '200 OK',
        }
        mock_response = build_mock_response(
            url=self.url,
            content=get_testdata('songkick', 'tovestyrke.html'),
        )
        mock_get.return_value = mock_response

        data = metadata_from_url(self.url, microdata=False, jsonld=True, rdfa=False)
        self.assertEqual(data, expected)
github scrapinghub / extruct / tests / test_tool.py View on Github external
def test_metadata_from_url_microdata_only(self, mock_get):
        expected = {
            'microdata': self.expected['microdata'],
            'url': self.url,
            'status': '200 OK',
        }
        mock_response = build_mock_response(
            url=self.url,
            content=get_testdata('songkick', 'tovestyrke.html'),
        )
        mock_get.return_value = mock_response

        data = metadata_from_url(self.url, microdata=True, jsonld=False, rdfa=False)
        self.assertEqual(data, expected)
github scrapinghub / extruct / tests / test_tool.py View on Github external
def test_metadata_from_url_rdfa_only(self, mock_get):
        expected = {
            'rdfa': self.expected['rdfa'],
            'url': self.url,
            'status': '200 OK',
        }
        mock_response = build_mock_response(
            url=self.url,
            content=get_testdata('songkick', 'tovestyrke.html'),
        )
        mock_get.return_value = mock_response

        data = metadata_from_url(self.url, microdata=False, jsonld=False, rdfa=True)
        self.assertEqual(data, expected)
github scrapinghub / extruct / tests / test_tool.py View on Github external
def test_metadata_from_url_unauthorized_page(self, mock_get):
        url = 'http://example.com/unauthorized'
        expected = {
            'url': url,
            'status': '401 Unauthorized',
        }
        mock_response = build_mock_response(
            url,
            reason='Unauthorized',
            status=401,
        )
        mock_get.return_value = mock_response
        mock_response.raise_for_status.side_effect = http_error

        data = metadata_from_url(url)
        self.assertEqual(data, expected)
github scrapinghub / extruct / extruct / service.py View on Github external
def async_extruct(url, microdata=True, jsonld=True, rdfa=True):
    response.content_type = 'application/json'
    result = metadata_from_url(url, microdata, jsonld, rdfa)
    return result