Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
it('should return an object or array and get correct data', function() {
return preq.get(test).then(function(callRes) {
var chtml = cheerio.load(callRes.body);
return meta.parseJsonLd(chtml)
.then(function(res) {
assert.ok(typeof res === 'object');
var result = res.filter(function(r) {
return r['@type'] === 'Organization';
})[0]; // Check the first organisation for the correct properties
['@context', '@type', 'url', 'logo'].forEach(function(key) {
assert.ok(result.hasOwnProperty(key));
});
});
});
});
});
it('should get all general enwiki site info', () => preq.get({
uri,
}).then((res) => {
// check the status
assert.status(res, 200);
// check the returned Content-Type header
assert.contentType(res, 'application/json');
// inspect the body
assert.notDeepEqual(res.body, undefined, 'No body returned!');
assert.notDeepEqual(res.body.server, undefined, 'No server field returned!');
}));
it('should get the whole page body', () => preq.get({
uri,
}).then((res) => {
// check the status
assert.status(res, 200);
// check the returned Content-Type header
assert.contentType(res, 'text/html');
// inspect the body
assert.notDeepEqual(res.body, undefined, 'No body returned!');
// this should be the right page
if (!/<\s*?h1.+Mulholland/.test(res.body)) {
throw new Error('Not the title I was expecting!');
}
}));
function checkRet(fieldName) {
return preq.get({
uri: infoUri + fieldName
}).then(function(res) {
// check the returned Content-Type header
assert.contentType(res, 'application/json');
// the status as well
assert.status(res, 200);
// finally, check the body has the specified field
assert.notDeepEqual(res.body, undefined, 'No body returned!');
assert.notDeepEqual(res.body[fieldName], undefined, 'No ' + fieldName + ' field returned!');
});
}
.then(() => {
return P.all([
preq.get(`${testRestBASEUri}/page/html/${encodeURIComponent(title)}/${revision}/${tid}`)
.catch((err) => {
// eslint-disable-next-line no-console
console.log(`${new Date()} Failed to fetch HTML ${title}/${revision}/${tid} from test RB: ${err}`);
}),
preq.get(`${testRestBASEUri}/page/html/${encodeURIComponent(title)}/${revision}/${tid}`)
.catch((err) => {
// eslint-disable-next-line no-console
console.log(`${new Date()} Failed to fetch Data-Parsoid ${title}/${revision}/${tid} from test RB: ${err}`);
})
]);
});
function getBody(domain, title) {
// get the page
return preq.get({
uri: 'http://' + domain + '/w/index.php',
query: {
title: title
}
}).then(function(callRes) {
// and then load and parse the page
return BBPromise.resolve(domino.createDocument(callRes.body));
});
}
function getBody(domain, title) {
// get the page
return preq.get({
uri: 'http://' + domain + '/w/index.php',
query: {
title: title
}
}).then(function(callRes) {
// and then load and parse the page
return BBPromise.resolve(domino.createDocument(callRes.body));
});
}
it('should not find dublin core metadata, reject promise', function() {
var url = 'http://www.laprovence.com/article/actualites/3411272/marseille-un-proche-du-milieu-corse-abattu-par-balles-en-plein-jour.html';
return preq.get(url)
.then(function(callRes) {
var $ = cheerio.load(callRes.body);
var prom = meta.parseDublinCore($);
return assert.fails(prom);
});
});
it('should get html lang parameter', function() {
var expected = "fr";
var options = {
url: "http://www.lemonde.fr",
headers: {
'User-Agent': 'webscraper'
}
};
return preq.get(options).then(function(callRes) {
var chtml = cheerio.load(callRes.body);
return meta.parseGeneral(chtml).then(function(results) {
assert.deepEqual(results.lang, expected);
});
});
});
it('should get robots.txt', () => preq.get({
uri: `${server.config.uri}robots.txt`,
}).then((res) => {
assert.deepEqual(res.status, 200);
assert.deepEqual(res.headers.disallow, '/');
}));