-
Notifications
You must be signed in to change notification settings - Fork 76
/
Copy pathWeb.FetchSequentially.pq
58 lines (52 loc) · 1.61 KB
/
Web.FetchSequentially.pq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*
//Sequentially scrape a given list of URLs with a given minimum delay between fetches
//Usage:
let
Web.FetchSequentially = Load("Web.FetchSequentially"),
BaseUrl = "http://example.com/?p=",
Pages = List.Numbers(1, 5),
Urls = List.Transform(Pages, each BaseUrl & Number.ToText(_))
in
Web.FetchSequentially(Urls)
//Result: [a list of decoded contents for each of the input URLs]
*/
(
Urls as list, //type {text}
optional Delay as number, //in seconds, default 1
optional Encoding as number, //https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756(v=vs.85).aspx
optional Options //see options below
) as list =>
let
Web.Scrape = Load("Web.Scrape"),
Delay = if (Delay<>null) then Delay else 1,
Encoding = if (Encoding<>null) then Encoding else TextEncoding.Utf8,
Options = if (Options<>null) then Options else [
//ApiKeyName = "",
//Content = "",
Query = [],
Headers = []
],
Count = List.Count(Urls)
in
List.Buffer(
List.Skip(
List.Generate(
() => [
i = 0,
Page = null
],
each [i] <= Count,
each let
Url = Urls{[i]},
GetPage = (uri as text) => Text.FromBinary(
//Binary.Buffer(Web.Contents(uri, Options))
Web.Scrape(uri, Options)
, Encoding)
in [
i = [i] + 1,
Page = Function.InvokeAfter(()=>GetPage(Url), #duration(0,0,0,Delay))
],
each [Page]
)
)
)