Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
function descriptionText (description) {
// preserve the line breaks when converting to text
const html = cheerio.load('<div>' + description.replace(/<br>/g, '\r\n') + '</div>');
return cheerio.text(html('div'));
}
private async parsePageItem(item: PageItem, next: (article: WechatArticle) => void) {
const infos = this.parseArticleInfo(item).filter(info => !!info);
if (infos.length === 0) {
console.warn(`解析文章数据失败: ${$['text']()}`);
}
while (infos.length > 0) {
const info = infos.shift();
const dbArticle = await db.wechat.get(this.name).get(info.id);
if (dbArticle) {
console.log(`正在更新文章数据:${JSON.stringify({ id: info.id, title: info.title })}`);
} else {
console.log(`正在解析文章数据:${JSON.stringify({ id: info.id, title: info.title })}`);
}
const article = await this.crawlDetail(info);
if (article) {
await next(article);
}
function parseHtmlForText(html) {
const $ = cheerio.load(html);
const strings = $('div[id]');
let result = [];
for (let i = 0; i < strings.length; i++) {
const string = strings[i];
result.push({
id: string.attribs.id,
text: cheerio.text($(string)),
type: string.attribs.stype || TEXT_TYPE
});
}
return result;
}
return new Promise( resolve => {
const result:any = {
title: null,
og_title: null,
og_desc: null,
og_image: null,
og_url: null,
}
try{
const document = cheerio.load(html)
result.title = cheerio.text(document('title'))
if (result.title===undefined){
result.title = null
}
result.og_title = document("meta[property='og:title']").attr('content')
if (result.og_title===undefined){
result.og_title = null
}
result.og_desc = document("meta[property='og:description']").attr('content')
if (result.og_desc===undefined){
result.og_desc = null
}
result.og_image = document("meta[property='og:image']").attr('content')
if (result.og_image===undefined){
result.og_image = null
}
result.og_url = document("meta[property='og:url']").attr('content')