From ac07e9ba3271bfb94092722d237638b87a78d59c Mon Sep 17 00:00:00 2001 From: Paul Millet Date: Sun, 15 May 2022 12:02:05 +0200 Subject: [PATCH] remote jobs support --- .galaxiat.json | 4 ++-- src/crawl.ts | 12 ++++++++---- src/getBrowser.ts | 8 ++++++++ src/index.ts | 19 ++----------------- 4 files changed, 20 insertions(+), 23 deletions(-) create mode 100644 src/getBrowser.ts diff --git a/.galaxiat.json b/.galaxiat.json index 908dc7c..41218b9 100644 --- a/.galaxiat.json +++ b/.galaxiat.json @@ -1,10 +1,10 @@ { "hostname" : "galaxiatapp.com", "port" : 3000, - "type" : "local", + "type" : "remote", "args" : ["--no-sandbox", "--disable-setuid-sandbox"], - "remote" : "wss://chrome.shared.svc.galaxiat.fr/playwright?token=XXXXXX", + "remote" : "wss://chrome.shared.svc.galaxiat.fr/playwright?token=MWkH6L4K3knkG3hvsaHrnzA5g6dtfucYk5nD9YVBRRh9ZtdPyDaE", "target" : "http://localhost:3000", "public" : "./public", "crawl" : [ diff --git a/src/crawl.ts b/src/crawl.ts index e1390ea..aa425fe 100644 --- a/src/crawl.ts +++ b/src/crawl.ts @@ -1,9 +1,12 @@ import { config_type, crawl } from "./types"; import { writeFileSync, mkdirSync } from 'fs'; import { BrowserContext, ChromiumBrowser } from "playwright"; +import { GetBrowser } from "./getBrowser"; -export async function Crawl(browser: BrowserContext, crawl_infos: crawl, config: config_type) { - const page = await browser.newPage(); +export async function Crawl(crawl_infos: crawl, config: config_type) { + const browser = await GetBrowser(config) + const context = await browser.newContext() + const page = await context.newPage(); try { // log cron start console.log(`CAPTURE : ${crawl_infos.url} -> ${crawl_infos.file}`) @@ -27,6 +30,7 @@ export async function Crawl(browser: BrowserContext, crawl_infos: crawl, config: if (!page.isClosed()) { await page.close() } - console.log(page.isClosed()) - + if (!browser.isConnected) { + await browser.close() + } } diff --git a/src/getBrowser.ts b/src/getBrowser.ts new file mode 100644 index 0000000..04c867c --- /dev/null +++ b/src/getBrowser.ts @@ -0,0 +1,8 @@ +import { config_type } from "./types"; +import playwright from "playwright" + +export function GetBrowser(config : config_type) : Promise { + return (config.type == "remote") ? + playwright.chromium.connect(config.remote) + : playwright.chromium.launch({ headless: true, args: config.args }); +} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 3ca2d35..ceae217 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,5 @@ #! /usr/bin/env node -// imports - import handler from 'serve-handler'; import http from 'http'; import { readFileSync } from 'fs'; @@ -12,6 +10,7 @@ import { NewCronRemote } from './cron_remote'; import Cron from 'croner'; import { Crawl } from './crawl'; +// const galaxiat_env = process.env.GALAXIAT_SERVE_ENV const config_location = galaxiat_env ? `./.galaxiat.${galaxiat_env}.json` : `./.galaxiat.json` @@ -26,17 +25,6 @@ const config: config_type = JSON.parse(readFileSync(config_location).toString()) }); }) - let browser : playwright.Browser - - //browser = await playwright.chromium.launch({ headless: true, args: config.args }); - if (config.type == "remote") { - browser = await playwright.chromium.connect(config.remote) - } else { - console.log("WARNING : you are using the local mode") - browser = await playwright.chromium.launch({ headless: true, args: config.args }); - } - const context = await browser.newContext({ignoreHTTPSErrors : !config.errors.https}) - let queue = new Stack() @@ -50,7 +38,7 @@ const config: config_type = JSON.parse(readFileSync(config_location).toString()) if ((curr_crawl_num < config.crawl_max_num)) { curr_crawl_num++ for (const entry of queue.get(config.crawl_queue_num)) { - await Crawl(context, entry, config) + await Crawl(entry, config) } curr_crawl_num-- } @@ -72,9 +60,6 @@ const config: config_type = JSON.parse(readFileSync(config_location).toString()) console.log(`Running at http://localhost:${config.port}`); }); - httpserv.on("close", async () => { - await browser.close() - }) })(); export class Stack {