Skip to content

Commit

Permalink
Merge pull request #10 from scrapfly/enum--params-string-support
Browse files Browse the repository at this point in the history
support plain text on enum validated params
  • Loading branch information
Granitosaurus authored Oct 4, 2024
2 parents b5dd654 + 6743cb2 commit cdc4b71
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 24 deletions.
2 changes: 1 addition & 1 deletion __tests__/result.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Deno.test('cheerio selector lazy loads and caches itself', async () => {


Deno.test('cheerio selector loads with case sensitive headers', async () => {
const response = JSON.parse(await Deno.readTextFile('__tests__/data/response_html_case_sensitive_headers.json'));
const response = JSON.parse(await Deno.readTextFile('__tests__/data/response_html_success.json'));
const result = new ScrapeResult(response);
assertEquals(result.selector('h1').text(), 'Herman Melville - Moby-Dick');
});
Expand Down
2 changes: 1 addition & 1 deletion deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
},
"name": "@scrapfly/scrapfly-sdk",
"exports": "./src/main.ts",
"version": "0.6.6",
"version": "0.6.7",
"description": "SDK for Scrapfly.io API for web scraping, screenshotting and data extraction",
"tasks": {
"start": "deno run --allow-net --allow-read src/main.ts",
Expand Down
7 changes: 5 additions & 2 deletions src/extractionconfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type ExtractionConfigOptions = {
extraction_prompt?: string;
extraction_model?: string;
is_document_compressed?: boolean;
document_compression_format?: CompressionFormat;
document_compression_format?: 'gzip' | 'zstd' | 'deflate' | CompressionFormat;
webhook?: string;
};

Expand All @@ -40,11 +40,14 @@ export class ExtractionConfig {
extraction_prompt?: string;
extraction_model?: string;
is_document_compressed?: boolean;
document_compression_format?: CompressionFormat;
document_compression_format?: 'gzip' | 'zstd' | 'deflate' | CompressionFormat;
webhook?: string;

constructor(options: ExtractionConfigOptions) {
this.validateOptions(options);
if (options.document_compression_format && !Object.values(CompressionFormat).includes(options.document_compression_format as CompressionFormat)) {
throw new errors.ExtractionConfigError(`Invalid CompressionFormat param value: ${options.document_compression_format}`);
}
this.body = options.body;
this.content_type = options.content_type;
this.url = options.url ?? this.url;
Expand Down
28 changes: 18 additions & 10 deletions src/scrapeconfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export enum Format {
export enum FormatOption {
NO_LINKS = 'no_links',
NO_IMAGES = 'no_images',
ONLY_CONTENT = 'only_content'
}

type ScrapeConfigOptions = {
Expand All @@ -58,8 +59,8 @@ type ScrapeConfigOptions = {
proxy_pool?: string;
session?: string;
tags?: string[];
format?: Format;
format_options?: FormatOption[];
format?: 'json' | 'text' | 'markdown' | 'clean_html' | 'raw' | Format;
format_options?: ('no_links' | 'no_images' | 'only_content' | FormatOption)[];
correlation_id?: string;
cookies?: Rec<string>;
body?: string;
Expand All @@ -69,7 +70,7 @@ type ScrapeConfigOptions = {
rendering_wait?: number;
wait_for_selector?: string;
screenshots?: Rec<any>;
screenshot_flags?: ScreenshotFlags[];
screenshot_flags?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | 'high_quality' | ScreenshotFlags)[];
session_sticky_proxy?: boolean;
webhook?: string;
timeout?: number;
Expand Down Expand Up @@ -100,8 +101,8 @@ export class ScrapeConfig {
proxy_pool?: string;
session?: string;
tags: Set<string> = new Set<string>();
format?: Format; // raw(unchanged)
format_options?: FormatOption[];
format?: 'json' | 'text' | 'markdown' | 'clean_html' | 'raw' | Format;
format_options?: ('no_links' | 'no_images' | 'only_content' | FormatOption)[];
correlation_id?: string;
cookies?: Rec<string>;
body?: string;
Expand All @@ -112,7 +113,7 @@ export class ScrapeConfig {
wait_for_selector?: string;
session_sticky_proxy = false;
screenshots?: Rec<any>;
screenshot_flags?: ScreenshotFlags[];
screenshot_flags?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | 'high_quality' | ScreenshotFlags)[];
webhook?: string;
timeout?: number; // in milliseconds
js_scenario?: Rec<any>;
Expand All @@ -122,14 +123,21 @@ export class ScrapeConfig {

constructor(options: ScrapeConfigOptions) {
this.validateOptions(options);
if (options.format && !Object.values(Format).includes(options.format)) {
throw new ScrapeConfigError(`Invalid format param value: ${options.format}`);
if (options.format && !Object.values(Format).includes(options.format as Format)) {
throw new ScrapeConfigError(`Invalid Format param value: ${options.format}`);
}
this.format = options.format ?? this.format;
if (options.format_options) {
options.format_options.forEach((flag) => {
if (!Object.values(FormatOption).includes(flag as FormatOption)) {
throw new ScrapeConfigError(`Invalid FormatOption param value: ${flag}`);
}
});
}
if (options.screenshot_flags) {
options.screenshot_flags.forEach((flag) => {
if (!Object.values(ScreenshotFlags).includes(flag)) {
throw new ScrapeConfigError(`Invalid screenshot_flags param value: ${flag}`);
if (!Object.values(ScreenshotFlags).includes(flag as ScreenshotFlags)) {
throw new ScrapeConfigError(`Invalid ScreenshotFlags param value: ${flag}`);
}
});
}
Expand Down
19 changes: 9 additions & 10 deletions src/screenshotconfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ export enum Format {

type ScreenshotConfigOptions = {
url: string;
format?: Format;
format?: 'jpg'| 'png' | 'webp'| 'gif' | Format;
capture?: string;
resolution?: string;
country?: string;
timeout?: number;
rendering_wait?: number;
wait_for_selector?: string;
options?: Options[];
options?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | Options)[];
auto_scroll?: boolean;
js?: string;
cache?: boolean;
Expand All @@ -52,14 +52,14 @@ type ScreenshotConfigOptions = {

export class ScreenshotConfig {
url: string;
format?: Format;
format?: 'jpg'| 'png' | 'webp'| 'gif' | Format;
capture?: string;
resolution?: string;
country?: string = undefined;
timeout?: number;
rendering_wait?: number;
wait_for_selector?: string;
options?: Options[];
options?: ('load_images' | 'dark_mode' | 'block_banners' | 'print_media_format' | Options)[];
auto_scroll?: boolean;
js?: string;
cache?: boolean;
Expand All @@ -69,18 +69,17 @@ export class ScreenshotConfig {

constructor(options: ScreenshotConfigOptions) {
this.validateOptions(options);
if (options.format && !Object.values(Format).includes(options.format)) {
throw new ScreenshotConfigError(`Invalid format param value: ${options.format}`);
if (options.format && !Object.values(Format).includes(options.format as Format)) {
throw new ScreenshotConfigError(`Invalid Format param value: ${options.format}`);
}
this.format = options.format ?? this.format;
// Validate options against the enum
if (options.options) {
options.options.forEach((opt) => {
if (!Object.values(Options).includes(opt)) {
throw new ScreenshotConfigError(`Invalid options param value: ${opt}`);
if (!Object.values(Options).includes(opt as Options)) {
throw new ScreenshotConfigError(`Invalid Options param value: ${opt}`);
}
});
}
}
this.url = options.url;
this.format = options.format ?? this.format;
this.capture = options.capture ?? this.capture;
Expand Down

0 comments on commit cdc4b71

Please sign in to comment.