Skip to content

Commit 23e63af

Browse files
committedJul 5, 2022
Organized files
1 parent d07ee2b commit 23e63af

18 files changed

+109
-121
lines changed
 
File renamed without changes.
File renamed without changes.

‎src/components/config.ts

+23-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import _ from "lodash"
2-
import {ConfigOptions} from "../middleware/ConfigOptions";
32
import Article from "./article";
43

54
export type ConfigType = {
@@ -54,6 +53,29 @@ export type ConfigType = {
5453
};
5554
}
5655

56+
export enum ConfigOptions {
57+
SOURCES_PATH = 'sources.path',
58+
SOURCES_INCLUDE_ONLY = 'sources.includeOnly',
59+
SOURCES_EXCLUDE = 'sources.exlude',
60+
SAFFRON_MODE = 'mode',
61+
WORKER_NODES = 'worker.nodes',
62+
REQUEST_TIMEOUT = 'worker.request.timeout',
63+
ARTICLE_AMOUNT = 'worker.article.amount',
64+
SCHEDULER_JOB_INT = 'scheduler.job.interval',
65+
SCHEDULER_JOB_HEAVY_INT = 'scheduler.job.heavyInterval',
66+
SCHEDULER_CHECKS_INT = 'scheduler.job.checkInterval',
67+
GRID_DISTRIBUTED = 'grid.distributed',
68+
GRID_SERVER_ADDRESS = 'grid.server.address',
69+
GRID_SERVER_PORT = 'grid.server.port',
70+
GRID_AUTH = 'grid.auth',
71+
GRID_USE_HTTP = 'grid.use_http',
72+
GRID_HTTPS_KEY = 'grid.https.key',
73+
GRID_HTTPS_CERT = 'grid.https.cert',
74+
MISC_LOG_LEVEL = 'misc.log',
75+
DB_PUSH_ARTICLES = 'db.articles.push',
76+
DB_GET_ARTICLES = 'db.articles.get'
77+
}
78+
5779
export default class Config {
5880
private static instance: Config
5981
_config: ConfigType = {

‎src/components/instructions.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import Source from "./source";
2-
import {ParserType} from "../middleware/ParserType";
2+
import {ParserType} from "./ParserType";
33

44
export type InstructionUrl = {
55
url: string;

‎src/components/source.ts

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import Job from "../components/job";
22
import Instructions from "./instructions";
3-
import {ParserType} from "../middleware/ParserType";
3+
import {ParserType} from "./ParserType";
44
import Article from "./article";
5-
import Config from "./config";
5+
import Config, {ConfigOptions} from "./config";
66
import ParserLoader from "../modules/parsers/ParserLoader";
7-
import {ConfigOptions} from "../middleware/ConfigOptions";
87

98

109
export default class Source {

‎src/index.ts

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import Config, {ConfigType} from "./components/config"
1+
import Config, {ConfigType, ConfigOptions} from "./components/config"
22
import Scheduler from "./modules/scheduler";
33
import Grid from "./modules/grid";
44
import Events from "./modules/events";
@@ -8,7 +8,6 @@ import Utils from "./modules/parsers/Utils";
88
import Job from "./components/job"
99
import Source from "./components/source"
1010
import Instructions from "./components/instructions";
11-
import {ConfigOptions} from "./middleware/ConfigOptions";
1211
import Extensions from "./modules/extensions";
1312
import {ParserResult} from "./components/types";
1413

@@ -62,9 +61,10 @@ export default class Saffron {
6261
}
6362
})
6463

65-
Events.on("stop", (force: boolean) => {
64+
Events.on("stop", () => {
65+
this.scheduler.stop();
6666
for (let worker of this.workers)
67-
worker.stop(force);
67+
worker.stop();
6868
})
6969
}
7070

@@ -83,7 +83,7 @@ export default class Saffron {
8383
* else if mode equals 'worker' then the worker will stop getting future jobs and disconnect from the main saffron instance.
8484
*/
8585
async stop() {
86-
Events.emit("stop")
86+
Events.emit("stop");
8787
}
8888

8989
/**
@@ -92,7 +92,7 @@ export default class Saffron {
9292
* @param cb The callback that will send the data
9393
*/
9494
async on(event: string, cb: (...args: any[]) => void) {
95-
Events.on(event, cb)
95+
Events.on(event, cb);
9696
}
9797

9898
/**
@@ -101,7 +101,7 @@ export default class Saffron {
101101
* @param callback The callback function that will be called.
102102
*/
103103
use(event: string, callback: (...args: any[]) => any): void {
104-
Extensions.getInstance().push({event, callback})
104+
Extensions.getInstance().push({event, callback});
105105
}
106106

107107
/**

‎src/middleware/ConfigOptions.ts

-22
This file was deleted.

‎src/middleware/hashCode.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Return the hash code of a string
33
* @param str
44
*/
5-
export default (str: String) => {
5+
export default function hashCode(str: String): number {
66
let hash = 0, i, chr;
77
if (str.length === 0) return hash;
88
for (i = 0; i < str.length; i++) {

‎src/middleware/logger.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import {DateTime} from "luxon";
22
import Events from "../modules/events";
3-
import {LoggerTypes} from "./LoggerTypes"
3+
import {LoggerTypes} from "../components/LoggerTypes"
44
import chalk from 'chalk'
55

66
/**
77
* Logs information in to the console
88
* @param type The type of log based on LoggerTypes
99
* @param data The message that will be logged
1010
*/
11-
export default (type: LoggerTypes, data: any) => {
11+
export default function logger(type: LoggerTypes, data: any): void {
1212
Events.getAntennae().emit("log", {type, log: data})
1313
let time = chalk.bold(`${DateTime.now().toLocaleString({
1414
day: '2-digit',

‎src/middleware/randomId.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {customAlphabet} from "nanoid";
44
* Generates a random id and add prefix at the start
55
* @param prefix The prefix (Optional)
66
*/
7-
export default (prefix: string = "") => {
7+
export default function randomId(prefix: string = ""): string {
88
let string = customAlphabet("1234567890qwertyuiopasdfghjklzxcvbnm", 40)
99
let number = customAlphabet("123456789", 8)
1010
return `${prefix}_${string()}${((Date.now() * parseInt(number())) / 100000).toString().substring(0, 6)}`

‎src/modules/events.ts

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import Logger from "../middleware/logger";
2-
import {LoggerTypes} from "../middleware/LoggerTypes";
2+
import {LoggerTypes} from "../components/LoggerTypes";
33
import Job from "../components/job";
44
import Article from "../components/article";
55
import chalk from "chalk";
66
import Grid from "./grid/index";
7-
import Config from "../components/config";
8-
import {ConfigOptions} from "../middleware/ConfigOptions";
7+
import Config, {ConfigOptions} from "../components/config";
98
import Source from "../components/source";
109
import {CallbackVoid} from "../components/types";
1110

‎src/modules/extensions.ts

+13-13
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
interface pair {
2-
event: string,
3-
callback: (...args: any[]) => any
4-
}
1+
type Pair = {
2+
event: string;
3+
callback: (...args: any[]) => any;
4+
};
55

66
export default class Extensions {
77

88
private static instance: Extensions;
9-
private declare readonly pairs: pair[];
9+
private declare readonly pairs: Pair[];
1010

1111
private constructor() {
1212
this.pairs = [];
@@ -19,19 +19,19 @@ export default class Extensions {
1919
return Extensions.instance;
2020
}
2121

22-
push(p: pair): void {
23-
if (this.pairs.filter(pr => pr.event == p.event).length > 0)
24-
throw new Error(`Cannot register an extension event twice. Event '${p.event}' already exists.`);
25-
22+
push(p: Pair): void {
23+
if (!['articles', 'article.format'].includes(p.event))
24+
throw new Error(`Event ${p.event} is not valid.`);
2625
this.pairs.push(p);
2726
}
2827

29-
startCount(): (() => pair | null) {
28+
startPairCount(): (() => Pair | null) {
3029
let i = 0;
31-
return (): pair | null => {
32-
if (i >= this.pairs.length)
30+
const self = this;
31+
return function getNextPair(): Pair | null {
32+
if (i >= self.pairs.length)
3333
return null;
34-
return this.pairs[i++];
34+
return self.pairs[i++];
3535
};
3636
}
3737
}

‎src/modules/grid/index.ts

+14-14
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@ import Job from "../../components/job";
22
import Events from "../events";
33
import {JobStatus} from "../../components/JobStatus";
44
import Worker from "../workers";
5-
import Config from "../../components/config";
5+
import Config, {ConfigOptions} from "../../components/config";
66
import Article from "../../components/article";
7-
import {ConfigOptions} from "../../middleware/ConfigOptions";
87
import Extensions from "../extensions";
98
import Source from "../../components/source";
109
import Scheduler from "../scheduler";
@@ -175,32 +174,33 @@ export default class Grid {
175174
*/
176175
destroyWorker(worker: Worker): void {
177176
let index = this.workersIds.findIndex(id => id == worker.id)
178-
this.workersIds.splice(index, 1)
179-
Events.emit("grid.worker.destroyed", worker.id)
177+
this.workersIds.splice(index, 1);
178+
Events.emit("grid.worker.destroyed", worker.id);
180179
}
181180

182181
/**
183182
* Forcefully remove a worker from the grid
183+
* @param sourceId
184184
* @param workerId
185185
*/
186-
fireWorker(workerId: string): void {
187-
if (!this.isMain) return
186+
fireWorker(sourceId: string, workerId: string): void {
187+
if (!this.isMain) return;
188188

189189
let index = this.workersClients.findIndex(js => {
190-
let index = js.workersIds.findIndex(id => id == workerId)
190+
let index = js.workersIds.findIndex(id => id == workerId);
191191
return index !== -1;
192192
})
193193

194194
if (index != -1) {
195-
let j = this.workersIds.findIndex((obj: string) => obj === workerId)
196-
if (j != -1) this.workersIds.splice(j, 1)
195+
let j = this.workersIds.findIndex((obj: string) => obj === workerId);
196+
if (j != -1) this.workersIds.splice(j, 1);
197197

198-
let k = this.workersClients[index].workersIds.findIndex(id => workerId == id)
199-
if (k != -1) this.workersClients[index].workersIds.splice(k, 1)
198+
let k = this.workersClients[index].workersIds.findIndex(id => workerId == id);
199+
if (k != -1) this.workersClients[index].workersIds.splice(k, 1);
200200
}
201201

202-
let k = this.workersIds.findIndex(id => workerId == id)
203-
if (k != -1) this.workersIds.splice(k, 1)
202+
let k = this.workersIds.findIndex(id => workerId == id);
203+
if (k != -1) this.workersIds.splice(k, 1);
204204
}
205205

206206
/**
@@ -246,7 +246,7 @@ export default class Grid {
246246

247247
Events.emit("middleware.before", articles);
248248

249-
let getExtPair = Extensions.getInstance().startCount();
249+
let getExtPair = Extensions.getInstance().startPairCount();
250250
let pair: any = {};
251251

252252
while ((pair = getExtPair()) != null) {

‎src/modules/parsers/ParserLoader.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
import {ParserType} from "../../middleware/ParserType";
1+
import {ParserType} from "../../components/ParserType";
22
import {HTMLParser} from "./drivers/HTMLParser";
33
import {RSSParser} from "./drivers/RSSParser";
4-
import {WordpressV2Parser} from "./drivers/wordpress/WordpressV2Parser";
4+
import {WordpressV2Parser} from "./drivers/WordpressV2Parser";
55
import {DynamicParser} from "./drivers/DynamicParser";
66
import {ParserClass} from "./ParserClass";
77
import Instructions from "../../components/instructions";
8-
import {WordpressV1Parser} from "./drivers/wordpress/WordpressV1Parser";
8+
import {WordpressV1Parser} from "./drivers/WordpressV1Parser";
99

1010
export default class ParserLoader {
1111

‎src/modules/parsers/drivers/wordpress/WordpressV1Parser.ts ‎src/modules/parsers/drivers/WordpressV1Parser.ts

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
import {ParserClass} from "../../ParserClass";
2-
import Instructions from "../../../../components/instructions";
3-
import Job from "../../../../components/job";
4-
import Article from "../../../../components/article";
5-
import Utils from "../../Utils";
1+
import {ParserClass} from "../ParserClass";
2+
import Instructions from "../../../components/instructions";
3+
import Job from "../../../components/job";
4+
import Article from "../../../components/article";
5+
import Utils from "../Utils";
66

77
export class WordpressV1Parser extends ParserClass {
88
validateScrape(scrape: object): void {}

‎src/modules/parsers/drivers/wordpress/WordpressV2Parser.ts ‎src/modules/parsers/drivers/WordpressV2Parser.ts

+5-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
1-
import {ParserClass} from "../../ParserClass";
2-
import Instructions from "../../../../components/instructions";
3-
import Job from "../../../../components/job";
4-
import Article from "../../../../components/article";
5-
import axios, {AxiosRequestConfig} from "axios";
6-
import Utils from "../../Utils";
7-
import https from "https";
8-
9-
const httpsAgent = new https.Agent({rejectUnauthorized: false})
1+
import {ParserClass} from "../ParserClass";
2+
import Instructions from "../../../components/instructions";
3+
import Job from "../../../components/job";
4+
import Article from "../../../components/article";
5+
import Utils from "../Utils";
106

117
export class WordpressV2Parser extends ParserClass {
128
validateScrape(scrape: object): void {

‎src/modules/scheduler/index.ts

+14-16
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,31 @@
1-
import Config from "../../components/config";
1+
import Config, {ConfigOptions} from "../../components/config";
22
import Events from "../events";
33
import Job from "../../components/job";
44
import Source from "../../components/source";
55
import Grid from "../grid/index";
66
import {JobStatus} from "../../components/JobStatus";
77
import Worker from "../workers";
8-
import {ConfigOptions} from "../../middleware/ConfigOptions";
98
import glob from "glob";
109
import * as path from "path";
1110

12-
const pathCwd = process.cwd();
13-
1411
export default class Scheduler {
1512

1613
private static instance: Scheduler | null = null;
17-
static getInstance(): Scheduler {
18-
if(this.instance === null)
19-
this.instance = new Scheduler();
20-
return this.instance!!;
21-
}
22-
2314
private declare isRunning: boolean;
2415
private declare jobsStorage: Job[];
2516

26-
constructor() {
17+
private constructor() {
2718
Events.on("start", (keepPreviousSession) => this.start(keepPreviousSession));
2819
Events.on("stop", () => this.stop());
2920
this.jobsStorage = [];
3021
}
3122

23+
static getInstance(): Scheduler {
24+
if (this.instance === null)
25+
this.instance = new Scheduler();
26+
return this.instance!!;
27+
}
28+
3229
/**
3330
* Issue a new job for a specific source
3431
* @param source The source
@@ -51,7 +48,7 @@ export default class Scheduler {
5148
const checkInterval = Config.getOption(ConfigOptions.SCHEDULER_CHECKS_INT);
5249

5350
this.isRunning = true;
54-
if(!keepPreviousSession) {
51+
if (!keepPreviousSession) {
5552
const sources = await this.resetSources();
5653
this.resetJobs(sources);
5754
}
@@ -96,10 +93,11 @@ export default class Scheduler {
9693
// Pending jobs
9794
case JobStatus.PENDING:
9895
if (job.untilRetry <= 0) {
99-
// If the worker did not complete the job after 5 times elect new worker
96+
// If the worker did not change the job status after 5 times (totally: 5 * checkInterval ms),
97+
// expect it to have crashed, so we elect a new worker to take its place.
10098
if (job.emitAttempts > 5) {
10199
let oldWorker = job.worker.id;
102-
Grid.getInstance().fireWorker(job.worker.id);
100+
Grid.getInstance().fireWorker(job.source.id, oldWorker);
103101

104102
job.worker.id = Worker.electWorker(job.worker.id);
105103
Events.emit("scheduler.job.worker.replace", oldWorker, job);
@@ -175,7 +173,7 @@ export default class Scheduler {
175173

176174
changeJobStatus(id: string, status: JobStatus) {
177175
let job = this.jobsStorage.find((obj: Job) => obj.id === id);
178-
if(job) job.status = status;
176+
if (job) job.status = status;
179177
}
180178

181179
/**
@@ -184,7 +182,7 @@ export default class Scheduler {
184182
private scanSourceFiles(): Promise<void> {
185183
return new Promise((resolve, reject) => {
186184
let sourcesPath = Config.getOption(ConfigOptions.SOURCES_PATH);
187-
glob(`${path.join(pathCwd, sourcesPath)}/**`, {}, (error: any, files: string[]) => {
185+
glob(`${path.join(process.cwd(), sourcesPath)}/**`, {}, (error: any, files: string[]) => {
188186
if (error) {
189187
Events.emit('scheduler.path.error', error);
190188
return reject(error);

‎src/modules/workers/index.ts

+16-20
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import Utils from "../parsers/Utils";
1212
export default class Worker {
1313

1414
declare readonly id: string;
15-
private declare isForcedStopped: boolean;
1615
private declare isRunning: boolean;
1716

1817
constructor() {
@@ -49,29 +48,28 @@ export default class Worker {
4948
* @param lastWorkerId The job's previous worker id. It will be excluded from the election only if the workers are greater that one
5049
*/
5150
static electWorker(lastWorkerId: string): string {
51+
// Make a copy of the array
5252
let workers = Grid.getInstance().getWorkers().slice();
5353

54-
// If only one worker, return the same
55-
if (workers.length === 1 && workers[0] === lastWorkerId)
56-
return lastWorkerId;
54+
// This is not supposed to be true
55+
if(workers.length === 0) return lastWorkerId;
5756

58-
// If more than one worker, delete the last one
59-
if (workers.length > 1) {
60-
let index = workers.findIndex((id: string) => id === lastWorkerId)
61-
if (index != -1)
62-
workers.splice(index, 1)
63-
}
57+
// If only one worker return that worker
58+
if (workers.length === 1) return workers[0];
59+
60+
// If more than one worker, delete the last used worker (if in array)
61+
let index = workers.findIndex((id: string) => id === lastWorkerId);
62+
if (index != -1) workers.splice(index, 1);
6463

6564
// From the remaining workers select one
66-
return workers[Math.abs(hashCode(lastWorkerId)) % workers.length]
65+
return workers[Math.abs(hashCode(lastWorkerId)) % workers.length];
6766
}
6867

6968
/**
7069
* Worker will start accepting jobs
7170
*/
7271
async start(): Promise<void> {
7372
Grid.getInstance().announceWorker(this);
74-
this.isForcedStopped = false;
7573
this.isRunning = true;
7674

7775
// start listening for new jobs
@@ -88,8 +86,6 @@ export default class Worker {
8886
return;
8987
}
9088

91-
if (this.isForcedStopped) return;
92-
9389
const source = job.getSource();
9490
result.forEach(r => {
9591
r.articles.forEach((article: Article) => {
@@ -102,18 +98,18 @@ export default class Worker {
10298

10399
const tableName = source.tableName || source.name;
104100

101+
if (!this.isRunning) return;
102+
105103
await Grid.getInstance().mergeArticles(source, tableName, result);
106104
await Grid.getInstance().finishedJob(job);
107105
})
108106
}
109107

110108
/**
111-
* Worker will stop accepting jobs
112-
* @param force if true the it will abandon the current job
109+
* Worker will stop accepting jobs and abort existing ones.
113110
*/
114-
stop(force: boolean): void {
115-
this.isForcedStopped = force
116-
this.isRunning = false
117-
Grid.getInstance().destroyWorker(this)
111+
stop(): void {
112+
this.isRunning = false;
113+
Grid.getInstance().destroyWorker(this);
118114
}
119115
}

0 commit comments

Comments
 (0)
Please sign in to comment.