How to use jschardet - 10 common examples

To help you get started, we’ve selected a few jschardet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ireoo / spider.npm / lib / crawler.js View on Github external
return self.emit('pool:release', options);
    }

    if (!response.body) { response.body=''; }

    if (options.debug) {
        console.log('Got '+(options.uri||'html')+' ('+response.body.length+' bytes)...');
        console.timeEnd("Got " + options.uri + " use time");
    }

    if (options.forceUTF8) {
        //TODO check http header or meta equiv?
        var iconvObj;

        if (!options.incomingEncoding) {
            var detected = jschardet.detect(response.body);

            if (detected && detected.encoding) {
                if (options.debug) {
                    console.log(
                        'Detected charset ' + detected.encoding +
                        ' (' + Math.floor(detected.confidence * 100) + '% confidence)'
                    );
                }
                if (detected.encoding !== 'utf-8' && detected.encoding !== 'ascii') {

                    if (iconv) {
                        iconvObj = new iconv(detected.encoding, 'UTF-8//TRANSLIT//IGNORE');
                        response.body = iconvObj.convert(response.body).toString();

                        // iconv-lite doesn't support Big5 (yet)
                    } else if (detected.encoding !== 'Big5') {
github ktty1220 / cheerio-httpcli / lib / encoding.js View on Github external
detectByBuffer: function (buffer) {
    var enc = jschardet.detect(buffer);
    // 高精度で判定できた場合のみ
    if (enc && enc.encoding && (enc.confidence || 0) >= 0.99) {
      return enc.encoding;
    }
    return null;
  },
github k1LoW / utsusemi / src / lib / crawler.js View on Github external
.then((data) => {
                        let results = ['', []];
                        const detected = jschardet.detect(data.Body);
                        const decoded = iconv.decode(data.Body, detected.encoding);

                        if (contentType.match(/html/)) {
                            results = scraper.scrapeHTML(decoded, path, targetHost);
                        } else if (contentType.match(/css/)) {
                            depth = 3; // !!!!
                            results = scraper.scrapeCSS(decoded, path, targetHost);
                        }
                        const filtered = results[1];

                        const queueParams = {
                            QueueName: queueName
                        };

                        return Promise.all([
                            filtered,
github kof / kiipost / api / extractor / convertCharset.js View on Github external
function detect(res, data) {
    var detected = charset(res.headers, data)

    // In case we have got bullshit.
    var supported = iconvCharsets[detected]

    // Detect encoding using text.
    if (!supported) {
        detected = jschardet.detect(data)
        detected = detected.confidence > 0.5 ? detected.encoding : 'utf-8'
    }

    if (detected == 'utf8') detected = 'utf-8'

    return detected
}
github popcorn-official / popcorn-desktop-legacy / src / app / lib / subtitle / generic.js View on Github external
decode: function (dataBuff, language, callback) {
            var targetEncodingCharset = 'utf8';

            var charset = charsetDetect.detect(dataBuff);
            var detectedEncoding = charset.encoding;
            win.debug('SUB charset detected: ', detectedEncoding);
            // Do we need decoding?
            if (detectedEncoding.toLowerCase().replace('-', '') === targetEncodingCharset) {
                callback(dataBuff.toString('utf8'));
                // We do
            } else {
                var langInfo = App.Localization.langcodes[language] || {};
                win.debug('SUB charset expected for \'%s\': ', language, langInfo.encoding);
                if (langInfo.encoding !== undefined && langInfo.encoding.indexOf(detectedEncoding) < 0) {
                    // The detected encoding was unexepected to the language, so we'll use the most common
                    // encoding for that language instead.
                    detectedEncoding = langInfo.encoding[0];
                }
                win.debug('SUB charset used: ', detectedEncoding);
                dataBuff = iconv.encode(iconv.decode(dataBuff, detectedEncoding), targetEncodingCharset);
github AntSwordProject / antSword / modules / request.js View on Github external
function detectEncoding(buffer, options) {

  options = options || {};
  buffer = buffer || Buffer('');

  var DEFAULT_ENCODING = 'GBK',
    MIN_CONFIDENCE = 0.96;
  var verbose = options.verbose;
  var defaultEncoding = options.defaultEncoding || DEFAULT_ENCODING;
  var minConfidence = options.minConfidence || MIN_CONFIDENCE;
  var ret = jschardet.detect(buffer),
    encoding = ret.encoding === 'ascii' ?
    'utf-8' :
    ret.encoding,
    confidence = ret.confidence;
  // var VALID_ENCODINGS = ['gb2312', 'gbk', 'utf-8', 'big5', 'euc-kr','euc-jp'];

  if (encoding === null || !iconv.encodingExists(encoding) || confidence < minConfidence) {
    return verbose ? {
        encoding: defaultEncoding,
        oriEncoding: encoding,
        confidence: confidence
      } :
      defaultEncoding;
  } else {
    encoding = encoding.toUpperCase();
    return verbose ? {
github redhat-developer / vscode-openshift-tools / src / encoding.ts View on Github external
copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
 *--------------------------------------------------------------------------------------------*/

'use strict';

import * as jschardet from 'jschardet';

jschardet.Constants.MINIMUM_THRESHOLD = 0.2;

function detectEncodingByBOM(buffer: Buffer): string | null {
	if (!buffer || buffer.length < 2) {
		return null;
	}

	const b0 = buffer.readUInt8(0);
	const b1 = buffer.readUInt8(1);

	// UTF-16 BE
	if (b0 === 0xFE && b1 === 0xFF) {
		return 'utf16be';
	}

	// UTF-16 LE
	if (b0 === 0xFF && b1 === 0xFE) {
github ezpaarse-project / ezpaarse / lib / pkbvalidator.js View on Github external
exports.validate = function (file, callback) {
  var titleIDs      = {}; // List of title IDs
  var titleIdColumn = false;
  var syntaxError   = false;
  var currentLine   = 1;
  var nbErrors      = 0;
  var nbWarnings    = 0;

  var fileStream    = fs.createReadStream(file);
  var emitter       = new EventEmitter();
  var detector      = new jschardet.UniversalDetector();
  var data;
  detector.reset();

  // Store errors if a callback is provided
  if (typeof callback === 'function') {
    var pkbErrors    = [];
    var pkbWarnings  = [];

    emitter
    .on('error', function (error)       { callback(error); })
    .on('syntaxError', function (error) { callback(error); })
    .on('pkbError', function (msg, line)     { pkbErrors.push({ message: msg, line: line }); })
    .on('pkbWarning', function (msg, line)   { pkbWarnings.push({ message: msg, line: line }); })
    .on('end', function () { callback(null, pkbErrors, pkbWarnings); });
  }
github CapacitorSet / box-js / analyze.js View on Github external
if (fs.existsSync(git_path) && fs.lstatSync(git_path).isDirectory()) {
	lib.verbose("Commit: " + fs.readFileSync(path.join(__dirname, ".git/refs/heads/master"), "utf8").replace(/\n/, ""));
} else {
	lib.verbose("No git folder found.");
}
lib.verbose(`Analyzing ${filename}`, false);
const sampleBuffer = fs.readFileSync(filename);
let encoding;
if (argv.encoding) {
	lib.debug("Using argv encoding");
	encoding = argv.encoding;
} else {
	lib.debug("Using detected encoding");
	encoding = require("jschardet").detect(sampleBuffer).encoding;
	if (encoding === null) {
		lib.warning("jschardet (v" + require("jschardet/package.json").version + ") couldn't detect encoding, using UTF-8");
		encoding = "utf8";
	} else {
		lib.debug("jschardet (v" + require("jschardet/package.json").version + ") detected encoding " + encoding);
	}
}

let code = iconv.decode(sampleBuffer, encoding);

if (code.match(".
	lib.debug("Sample seems to be WSF");
	code = code.replace(/<\??\/?\w+( [\w=\"\']*)*\??>/g, ""); // XML tags
	code = code.replace(//g, "");
}

function lacksBinary(name) {
github wuchangming / spy-debugger / src / proxy / spyProxy.js View on Github external
function chunkReplace(chunk, injectScriptTag, proxyRes) {
    var _charset;
    try {
        _charset =  charset(proxyRes, chunk) || jschardet.detect(chunk).encoding.toLowerCase();
    } catch (e) {
        console.error(e);
    }
    var chunkString;
    if (_charset != null && _charset != 'utf-8') {
        try {
            chunkString = iconv.decode(chunk, _charset);
        } catch (e) {
            console.error(e);
            chunkString = iconv.decode(chunk, 'utf-8');
        }
    } else {
        chunkString = chunk.toString();
    }

    var newChunkString = htmlUtil.injectScriptIntoHtml(chunkString, injectScriptTag);

jschardet

Character encoding auto-detection in JavaScript (port of python's chardet)

LGPL-2.1
Latest version published 2 months ago

Package Health Score

80 / 100
Full package analysis