Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
/*!
* Crawl - node.io crawl job
* Copyright(c) 2012 Mike Moulton
* MIT Licensed
*/
var nodeio = require('node.io'),
first = require('first'),
urlUtil = require('url'),
hash = require('node_hash'),
util = require('util'),
_ = require('underscore'),
ct = require('../content-type');
var crawl = exports.job = new nodeio.Job({max: 50, retries: 3, auto_retry: true, timeout: 30}, {
init: function() {
this.options.cache = {};
this.options.baseUrl = undefined;
},
run: function (url) {
this.options.crawler.emit("crawl", url);
var urlParts = urlUtil.parse(url, true);
// Remember the first URL crawled so we can restrict all mined links to the same host
if (!this.options.baseUrl) {
this.options.baseUrl = urlParts;
}
if (!~data.indexOf('Rank_1:1:')) {
self.emit(input+',');
} else {
self.emit(input+','+data.substr(9));
}
});
},
fail: function(input) {
this.emit(input+',');
}
};
//Export the job
exports.job = new Job(options, methods);
//-----------------------------------------------
// CODE FOR GENERATING GOOGLE PAGERANK CHECKSUMS
//-----------------------------------------------
function zF(a,b) {
var z = parseInt(80000000,16);
if (z & a) {
a = a>>1;
a &=~z;
a |= 0x40000000;
a = a>>(b-1);
} else {
a = a>>b;
}
return(a);
var nodeio = require('node.io');
var _ = require('underscore');
var options = {
timeout: 10,
max: 20,
retries: 3
};
exports.job = new nodeio.Job(options, {
input: function(start, num, next) {
if (this.options.args.length < 1) {
console.log('format : node.io test sitename [start=1] [total=1000] [incr=50]');
this.exit(false);
}
var opts = _(this.options.args).map(function(n, i) {
if (i === 0) { return n; }
return parseInt(n, 10);
});
this.options._args = _(['name', 'start', 'total', 'incr']).object(opts);
_.defaults(this.options._args, { start: 1, total:1000, incr: 10});
if (this.options._args.incr > 50) {
this.options._args.incr = 50;
}
var nodeio = require('node.io');
var jsdom = require("jsdom");
var jquery = require("jquery");
var numbers = [];
for(var i = 0; i < 40; i++) {
numbers[i] = i;
}
exports.job = new nodeio.Job({
input: numbers,
run: function (id) {
var that = this;
var url = 'https://calnet.berkeley.edu/directory/details.pl?uid=' + id;
/*this.getHtml(url, function(err, $) {
console.log($);
var name = $('#content p').innerHTML;
//var name = $('#content > p span:nth-child(2)').innerHTML;
this.emit(name);
});*/
jsdom.env(
'https://calnet.berkeley.edu/directory/details.pl?uid=' + id,
['http://code.jquery.com/jquery.js'],
function (errors, window) {
var name = window.$('#content > p span:nth-child(2)').html();
if (name) {
//Only compile .coffee files
this.exec('coffee -c "' + file + '"', function(err) {
if (err) {
self.exit(err);
} else {
self.finish();
}
});
} else {
this.skip();
}
}
}
//Export the job
exports.job = new Job(options, methods);
var getVideoSourceUrl = function(youtubeLink, id, callback) {
var result = false;
var videoId = youtubeLink.substr(youtubeLink.lastIndexOf('=')+1);
var infoUrl = 'http://youtube.com/get_video_info?video_id=' + videoId;
var job = new nodeio.Job({
input: false,
run: function () {
var url = this.options.args[0];
this.get(url, function(err, data) {
if (err) {
this.exit(err);
} else {
try {
var vInfoResponse = querystring.parse(data);
if(vInfoResponse['status'] === "fail") {
throw 'The video seems to be unavaiable in your country. Please choose another one.';
}
//Remove duplicate lines (default)
if (!~seen_lines.indexOf(line)) {
emit.push(line);
seen_lines.push(line);
}
}
});
this.emit(emit);
}
};
//Export the job
exports.job = new Job({}, methods);
verbose('## Adding Servings');
var servings = $('.entry span[itemprop="recipeYield"]');
if (servings) {
obj.servings = servings.striptags;
}
} catch(e) {
verbose(e);
}
this.emit(obj);
});
}
};
var job = new nodeio.Job({
auto_retry: true,
timeout: 20,
retries: 3,
silent: true
}, methods);
nodeio.start(job, {}, function(err, data) {
if (err) { callback(err); }
callback(null, data);
}, true);
};
var fetchModel = function(id, callback) {
//Creates the job
var modelJob = new nodeio.Job({timeout:10}, modelMethods);
nodeio.start(modelJob, {args: [id]}, callback, true);
};
};
},
complete: function() {
var out = [];
for (var word in word_count) {
out.push(word_count[word] + ' ' + word);
}
//Now that we have the full list of words, output
this.output(out);
return true;
}
};
exports.job = new Job(options, methods);