diff --git a/__tests__/result.test.ts b/__tests__/result.test.ts index 77d1ee1..c746446 100644 --- a/__tests__/result.test.ts +++ b/__tests__/result.test.ts @@ -14,7 +14,7 @@ Deno.test('cheerio selector lazy loads and caches itself', async () => { Deno.test('cheerio selector loads with case sensitive headers', async () => { - const response = JSON.parse(await Deno.readTextFile('__tests__/data/response_html_case_sensitive_headers.json')); + const response = JSON.parse(await Deno.readTextFile('__tests__/data/response_html_success.json')); const result = new ScrapeResult(response); assertEquals(result.selector('h1').text(), 'Herman Melville - Moby-Dick'); }); diff --git a/deno.json b/deno.json index e61c727..c87c1bf 100644 --- a/deno.json +++ b/deno.json @@ -4,7 +4,7 @@ }, "name": "@scrapfly/scrapfly-sdk", "exports": "./src/main.ts", - "version": "0.6.6", + "version": "0.6.7", "description": "SDK for Scrapfly.io API for web scraping, screenshotting and data extraction", "tasks": { "start": "deno run --allow-net --allow-read src/main.ts", diff --git a/src/extractionconfig.ts b/src/extractionconfig.ts index d7c59be..1d56d75 100644 --- a/src/extractionconfig.ts +++ b/src/extractionconfig.ts @@ -26,7 +26,7 @@ type ExtractionConfigOptions = { extraction_prompt?: string; extraction_model?: string; is_document_compressed?: boolean; - document_compression_format?: CompressionFormat; + document_compression_format?: 'gzip' | 'zstd' | 'deflate' | CompressionFormat; webhook?: string; }; @@ -40,11 +40,14 @@ export class ExtractionConfig { extraction_prompt?: string; extraction_model?: string; is_document_compressed?: boolean; - document_compression_format?: CompressionFormat; + document_compression_format?: 'gzip' | 'zstd' | 'deflate' | CompressionFormat; webhook?: string; constructor(options: ExtractionConfigOptions) { this.validateOptions(options); + if (options.document_compression_format && !Object.values(CompressionFormat).includes(options.document_compression_format as CompressionFormat)) { + throw new errors.ExtractionConfigError(`Invalid CompressionFormat param value: ${options.document_compression_format}`); + } this.body = options.body; this.content_type = options.content_type; this.url = options.url ?? this.url; diff --git a/src/scrapeconfig.ts b/src/scrapeconfig.ts index f75353d..91017c6 100644 --- a/src/scrapeconfig.ts +++ b/src/scrapeconfig.ts @@ -38,6 +38,7 @@ export enum Format { export enum FormatOption { NO_LINKS = 'no_links', NO_IMAGES = 'no_images', + ONLY_CONTENT = 'only_content' } type ScrapeConfigOptions = { @@ -58,8 +59,8 @@ type ScrapeConfigOptions = { proxy_pool?: string; session?: string; tags?: string[]; - format?: Format; - format_options?: FormatOption[]; + format?: 'json' | 'text' | 'markdown' | 'clean_html' | 'raw' | Format; + format_options?: ('no_links' | 'no_images' | 'only_content' | FormatOption)[]; correlation_id?: string; cookies?: Rec; body?: string; @@ -69,7 +70,7 @@ type ScrapeConfigOptions = { rendering_wait?: number; wait_for_selector?: string; screenshots?: Rec; - screenshot_flags?: ScreenshotFlags[]; + screenshot_flags?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | 'high_quality' | ScreenshotFlags)[]; session_sticky_proxy?: boolean; webhook?: string; timeout?: number; @@ -100,8 +101,8 @@ export class ScrapeConfig { proxy_pool?: string; session?: string; tags: Set = new Set(); - format?: Format; // raw(unchanged) - format_options?: FormatOption[]; + format?: 'json' | 'text' | 'markdown' | 'clean_html' | 'raw' | Format; + format_options?: ('no_links' | 'no_images' | 'only_content' | FormatOption)[]; correlation_id?: string; cookies?: Rec; body?: string; @@ -112,7 +113,7 @@ export class ScrapeConfig { wait_for_selector?: string; session_sticky_proxy = false; screenshots?: Rec; - screenshot_flags?: ScreenshotFlags[]; + screenshot_flags?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | 'high_quality' | ScreenshotFlags)[]; webhook?: string; timeout?: number; // in milliseconds js_scenario?: Rec; @@ -122,14 +123,21 @@ export class ScrapeConfig { constructor(options: ScrapeConfigOptions) { this.validateOptions(options); - if (options.format && !Object.values(Format).includes(options.format)) { - throw new ScrapeConfigError(`Invalid format param value: ${options.format}`); + if (options.format && !Object.values(Format).includes(options.format as Format)) { + throw new ScrapeConfigError(`Invalid Format param value: ${options.format}`); } this.format = options.format ?? this.format; + if (options.format_options) { + options.format_options.forEach((flag) => { + if (!Object.values(FormatOption).includes(flag as FormatOption)) { + throw new ScrapeConfigError(`Invalid FormatOption param value: ${flag}`); + } + }); + } if (options.screenshot_flags) { options.screenshot_flags.forEach((flag) => { - if (!Object.values(ScreenshotFlags).includes(flag)) { - throw new ScrapeConfigError(`Invalid screenshot_flags param value: ${flag}`); + if (!Object.values(ScreenshotFlags).includes(flag as ScreenshotFlags)) { + throw new ScrapeConfigError(`Invalid ScreenshotFlags param value: ${flag}`); } }); } diff --git a/src/screenshotconfig.ts b/src/screenshotconfig.ts index df9d90d..eff96b4 100644 --- a/src/screenshotconfig.ts +++ b/src/screenshotconfig.ts @@ -34,14 +34,14 @@ export enum Format { type ScreenshotConfigOptions = { url: string; - format?: Format; + format?: 'jpg'| 'png' | 'webp'| 'gif' | Format; capture?: string; resolution?: string; country?: string; timeout?: number; rendering_wait?: number; wait_for_selector?: string; - options?: Options[]; + options?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | Options)[]; auto_scroll?: boolean; js?: string; cache?: boolean; @@ -52,14 +52,14 @@ type ScreenshotConfigOptions = { export class ScreenshotConfig { url: string; - format?: Format; + format?: 'jpg'| 'png' | 'webp'| 'gif' | Format; capture?: string; resolution?: string; country?: string = undefined; timeout?: number; rendering_wait?: number; wait_for_selector?: string; - options?: Options[]; + options?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | Options)[]; auto_scroll?: boolean; js?: string; cache?: boolean; @@ -69,18 +69,17 @@ export class ScreenshotConfig { constructor(options: ScreenshotConfigOptions) { this.validateOptions(options); - if (options.format && !Object.values(Format).includes(options.format)) { - throw new ScreenshotConfigError(`Invalid format param value: ${options.format}`); + if (options.format && !Object.values(Format).includes(options.format as Format)) { + throw new ScreenshotConfigError(`Invalid Format param value: ${options.format}`); } this.format = options.format ?? this.format; - // Validate options against the enum if (options.options) { options.options.forEach((opt) => { - if (!Object.values(Options).includes(opt)) { - throw new ScreenshotConfigError(`Invalid options param value: ${opt}`); + if (!Object.values(Options).includes(opt as Options)) { + throw new ScreenshotConfigError(`Invalid Options param value: ${opt}`); } }); - } + } this.url = options.url; this.format = options.format ?? this.format; this.capture = options.capture ?? this.capture;