-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbatch-downloader.js
87 lines (80 loc) · 2.86 KB
/
batch-downloader.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import { promises, existsSync, mkdirSync, createWriteStream, unlink } from "fs";
import ThreadPool from "./threadpool-es.js";
import https from 'https';
const poolImage = new ThreadPool(4);
const targetUrlTextFile = process.argv[2];
const outputPath = process.argv[3] || "output";
async function main() {
!existsSync(outputPath) && mkdirSync(path, {
recursive: true
})
const text = await promises.readFile(targetUrlTextFile, { encoding: 'utf-8' })
const lines = text.split('\r\n');
const prefix = lines[0];
const suffix = lines[1];
const targeUrls = lines.slice(2);
targeUrls.forEach(item => {
poolImage.add(() => {
let url = `${prefix}${item}${suffix}`;
console.log(`running: ${url}`);
return download(url, `${outputPath}/${item}`)
});
});
poolImage.step = () => console.log(`Running: ${ poolImage.running }, downloaded: ${ poolImage.counter } / ${ poolImage.sum }, ${ poolImage.status() }`)
poolImage.finish(() => {
console.log('okayed');
// promises.writeFile('error.log', errorLog, () => {});
});
poolImage.run();
}
main()
function download(url, dest, reserveEmpty) {
return new Promise((resolve, reject) => {
let path = dest.slice(0, dest.lastIndexOf('/'))
/*!fs.existsSync(path) && mkdirSync(path, {
recursive: true
})*/
if (existsSync(dest)) {
resolve('existed')
return
}
let downloadType = url.startsWith('https:') ? https : http;
let fileStream = createWriteStream(dest);
fileStream.on('error', err => {
fileStream.close();
console.log(`error occurred: ${err}, ${url} to ${dest}`);
reject(`failed to write file on ${dest}, url: ${url}`)
return
});
console.log(`downloading: ${url} to ${dest}`);
let rejectTimer = setTimeout(() => {
unlink(dest, () => {});
reject(`Time out: ${url}`)
}, 10000);
downloadType.get(url, function (response) {
response.pipe(fileStream);
fileStream.on('finish', () => {
if (response.statusCode === 404 || response.statusCode === 403) {
fileStream.close();
unlink(dest, () => {});
console.log(`failed: ${url}`);
reject(`failed: ${response.statusCode}, url: ${url}`)
return;
}
fileStream.close();
if (fileStream.bytesWritten === 0 && !reserveEmpty) {
unlink(dest, () => {})
console.log(`got empty file at ${dest}, deleted`)
} else {
console.log(`downloaded: ${url}`);
}
clearTimeout(rejectTimer)
resolve(`succeed: ${url}`);
});
}).on('error', function (err) {
console.log(`failed on ${url}: ${err}`);
unlink(dest, () => {});
reject(`error occurred: ${err}`)
});
})
}