How to use the jschardet.detect function in jschardet

To help you get started, we’ve selected a few jschardet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ireoo / spider.npm / lib / crawler.js View on Github external
return self.emit('pool:release', options);
    }

    if (!response.body) { response.body=''; }

    if (options.debug) {
        console.log('Got '+(options.uri||'html')+' ('+response.body.length+' bytes)...');
        console.timeEnd("Got " + options.uri + " use time");
    }

    if (options.forceUTF8) {
        //TODO check http header or meta equiv?
        var iconvObj;

        if (!options.incomingEncoding) {
            var detected = jschardet.detect(response.body);

            if (detected && detected.encoding) {
                if (options.debug) {
                    console.log(
                        'Detected charset ' + detected.encoding +
                        ' (' + Math.floor(detected.confidence * 100) + '% confidence)'
                    );
                }
                if (detected.encoding !== 'utf-8' && detected.encoding !== 'ascii') {

                    if (iconv) {
                        iconvObj = new iconv(detected.encoding, 'UTF-8//TRANSLIT//IGNORE');
                        response.body = iconvObj.convert(response.body).toString();

                        // iconv-lite doesn't support Big5 (yet)
                    } else if (detected.encoding !== 'Big5') {
github ktty1220 / cheerio-httpcli / lib / encoding.js View on Github external
detectByBuffer: function (buffer) {
    var enc = jschardet.detect(buffer);
    // 高精度で判定できた場合のみ
    if (enc && enc.encoding && (enc.confidence || 0) >= 0.99) {
      return enc.encoding;
    }
    return null;
  },
github k1LoW / utsusemi / src / lib / crawler.js View on Github external
.then((data) => {
                        let results = ['', []];
                        const detected = jschardet.detect(data.Body);
                        const decoded = iconv.decode(data.Body, detected.encoding);

                        if (contentType.match(/html/)) {
                            results = scraper.scrapeHTML(decoded, path, targetHost);
                        } else if (contentType.match(/css/)) {
                            depth = 3; // !!!!
                            results = scraper.scrapeCSS(decoded, path, targetHost);
                        }
                        const filtered = results[1];

                        const queueParams = {
                            QueueName: queueName
                        };

                        return Promise.all([
                            filtered,
github kof / kiipost / api / extractor / convertCharset.js View on Github external
function detect(res, data) {
    var detected = charset(res.headers, data)

    // In case we have got bullshit.
    var supported = iconvCharsets[detected]

    // Detect encoding using text.
    if (!supported) {
        detected = jschardet.detect(data)
        detected = detected.confidence > 0.5 ? detected.encoding : 'utf-8'
    }

    if (detected == 'utf8') detected = 'utf-8'

    return detected
}
github popcorn-official / popcorn-desktop-legacy / src / app / lib / subtitle / generic.js View on Github external
decode: function (dataBuff, language, callback) {
            var targetEncodingCharset = 'utf8';

            var charset = charsetDetect.detect(dataBuff);
            var detectedEncoding = charset.encoding;
            win.debug('SUB charset detected: ', detectedEncoding);
            // Do we need decoding?
            if (detectedEncoding.toLowerCase().replace('-', '') === targetEncodingCharset) {
                callback(dataBuff.toString('utf8'));
                // We do
            } else {
                var langInfo = App.Localization.langcodes[language] || {};
                win.debug('SUB charset expected for \'%s\': ', language, langInfo.encoding);
                if (langInfo.encoding !== undefined && langInfo.encoding.indexOf(detectedEncoding) < 0) {
                    // The detected encoding was unexepected to the language, so we'll use the most common
                    // encoding for that language instead.
                    detectedEncoding = langInfo.encoding[0];
                }
                win.debug('SUB charset used: ', detectedEncoding);
                dataBuff = iconv.encode(iconv.decode(dataBuff, detectedEncoding), targetEncodingCharset);
github AntSwordProject / antSword / modules / request.js View on Github external
function detectEncoding(buffer, options) {

  options = options || {};
  buffer = buffer || Buffer('');

  var DEFAULT_ENCODING = 'GBK',
    MIN_CONFIDENCE = 0.96;
  var verbose = options.verbose;
  var defaultEncoding = options.defaultEncoding || DEFAULT_ENCODING;
  var minConfidence = options.minConfidence || MIN_CONFIDENCE;
  var ret = jschardet.detect(buffer),
    encoding = ret.encoding === 'ascii' ?
    'utf-8' :
    ret.encoding,
    confidence = ret.confidence;
  // var VALID_ENCODINGS = ['gb2312', 'gbk', 'utf-8', 'big5', 'euc-kr','euc-jp'];

  if (encoding === null || !iconv.encodingExists(encoding) || confidence < minConfidence) {
    return verbose ? {
        encoding: defaultEncoding,
        oriEncoding: encoding,
        confidence: confidence
      } :
      defaultEncoding;
  } else {
    encoding = encoding.toUpperCase();
    return verbose ? {
github wuchangming / spy-debugger / src / proxy / spyProxy.js View on Github external
function chunkReplace(chunk, injectScriptTag, proxyRes) {
    var _charset;
    try {
        _charset =  charset(proxyRes, chunk) || jschardet.detect(chunk).encoding.toLowerCase();
    } catch (e) {
        console.error(e);
    }
    var chunkString;
    if (_charset != null && _charset != 'utf-8') {
        try {
            chunkString = iconv.decode(chunk, _charset);
        } catch (e) {
            console.error(e);
            chunkString = iconv.decode(chunk, 'utf-8');
        }
    } else {
        chunkString = chunk.toString();
    }

    var newChunkString = htmlUtil.injectScriptIntoHtml(chunkString, injectScriptTag);
github SabakiHQ / Sabaki / src / modules / fileformats / gib.js View on Github external
exports.parseFile = function(filename) {
    let iconv = require('iconv-lite')
    let jschardet = require('jschardet')

    let buffer = fs.readFileSync(filename)
    let encoding = 'utf8'
    let detected = jschardet.detect(buffer)
    if (detected.confidence > 0.2) encoding = detected.encoding

    content = iconv.decode(buffer, encoding)

    return exports.parse(content)
}
github oaprograms / lingo-player / app / js / node / vl_util.js View on Github external
var loadFile = function(path, langCode, explicit_encoding){
    var encoding = explicit_encoding;
    var content = fs.readFileSync(path);
    if (! encoding) {
        encoding = jschardet.detect(content).encoding.toLowerCase();
        if (!encoding.startsWith('utf-') && langCode) {
            encoding = encodings.getEncodings(langCode)[0];
        }
    }
    return iconv.decode(content, encoding);
};
github 1000ch / rog / index.js View on Github external
const getBody = response => {
  const headers = response.headers || {};
  const body = response.body || '';

  const contentType = headers['content-type'] || '';
  const matches = contentType.match(/charset=(?.+)/);
  if (matches !== null) {
    return iconv.decode(body, matches[1]);
  }

  const result = jschardet.detect(body);
  if (result && result.encoding && (result.confidence || 0) >= 0.99) {
    return iconv.decode(body, result.encoding);
  }

  const head = body.toString('ascii').match(/](?[\s\S]*?)<\/head>/i);
  if (!head) {
    return body.toString('utf8');
  }

  const charset = head[1].match(/]*[\s;]+charset\s*=\s*["']?(?[\w\-_]+)["']?/i);
  if (charset) {
    return iconv.decode(body, charset[1].trim());
  }

  return body.toString('utf8');
};

jschardet

Character encoding auto-detection in JavaScript (port of python's chardet)

LGPL-2.1
Latest version published 1 month ago

Package Health Score

80 / 100
Full package analysis