Skip to content

Commit

Permalink
add ability to specify a specific file to write results to too
Browse files Browse the repository at this point in the history
  • Loading branch information
pes10k committed Apr 5, 2024
1 parent 0758ba9 commit cd935a5
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 9 deletions.
12 changes: 7 additions & 5 deletions src/brave/crawl.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict'

import fsLib from 'node:fs/promises'
import * as osLib from 'os'

import fsExtraLib from 'fs-extra'
Expand All @@ -8,6 +9,7 @@ import Xvbf from 'xvfb'

import { getLogger } from './debug.js'
import { puppeteerConfigForArgs, launchWithRetry } from './puppeteer.js'
import { isDir } from './validate.js'

const xvfbPlatforms = new Set(['linux', 'openbsd'])

Expand Down Expand Up @@ -55,11 +57,11 @@ function createFilename (url: Url) : FilePath {
return `page_graph_${url?.replace(/[^\w]/g, '_')}_${Math.floor(Date.now() / 1000)}.graphml`
}

function writeToFile (args: CrawlArgs, url: Url, response: any, logger: Logger) {
const outputFilename = pathLib.join(
args.outputPath,
createFilename(url)
)
async function writeToFile (args: CrawlArgs, url: Url, response: any, logger: Logger) {
const outputFilename = isDir(args.outputPath)
? pathLib.join(args.outputPath, createFilename(url))
: args.outputPath

fsExtraLib.writeFile(outputFilename, response.data).catch((err: Error) => {
logger.debug('ERROR saving Page.generatePageGraph output:', err)
})
Expand Down
11 changes: 8 additions & 3 deletions src/brave/validate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ const isUrl = (possibleUrl: string): boolean => {
}
}

const isFile = (path: string): boolean => {
export const isFile = (path: string): boolean => {
return fsLib.existsSync(path) && fsLib.lstatSync(path).isFile()
}

const isDir = (path: string): boolean => {
export const isDir = (path: string): boolean => {
if (!fsLib.existsSync(path)) {
return false
}
Expand Down Expand Up @@ -83,8 +83,13 @@ export const validate = (rawArgs: any): ValidationResult => {
executablePath = rawArgs.binary
}

// The output path either needs to be a directory, or a filename in
// an existing directory.
if (!isDir(rawArgs.output)) {
return [false, `Invalid path to write results to: ${rawArgs.output}`]
const outputPathParts = pathLib.parse(rawArgs.output)
if (!isDir(outputPathParts.dir)) {
return [false, `Invalid path to write results to: ${rawArgs.output}`]
}
}
const outputPath: FilePath = rawArgs.output

Expand Down
4 changes: 3 additions & 1 deletion src/run.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ parser.addArgument(['-r', '--recursive-depth'], {
help: 'If provided, choose a link at random on page and do another crawl to this depth. Default: 1 (no recursion).'
})
parser.addArgument(['-o', '--output'], {
help: 'Path (directory) to write graphs to.',
help: 'Path to write to. If a directory is provided, then results are ' +
'written to a file in that directory. If a full path is given, ' +
'then results are written to that path.',
required: true
})
parser.addArgument(['-u', '--url'], {
Expand Down

0 comments on commit cd935a5

Please sign in to comment.