-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdoi-pipeline
executable file
·42 lines (30 loc) · 1.12 KB
/
doi-pipeline
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env bash
mkdir -p slim crossref unpaywall
rm -rf crossref/* slim/*
# Fetch CRISTIN data
./bin/cristin
# Fetch/refresh ORCID data
./bin/orcid_fetch_apn_authors
# Fetch fresh OpenAlex data
./bin/openalex_pipeline
# Extract DOIs from raw publication references
cat raw/*.txt | ./bin/raw2doi | ./bin/crossref-works \
| nd-map d.DOI | sort | uniq > doi/misc/_raw.ndjson
# Get Crossref "works" metadata for each DOI
cat doi/*/*.ndjson | sort | uniq | ./bin/crossref-works 2> crossref/errors.txt \
| nd-group d.DOI | nd-map 'd[1][0]' \
> crossref/akvaplan-works.ndjson
# Create Crossref slim
cat crossref/akvaplan-works.ndjson | ./bin/crossref-slim > crossref/akvaplan-slim.ndjson
# Get pdf url from Unpaywall
cat crossref/akvaplan-slim.ndjson | ./bin/unpaywall_pdfs > unpaywall/akvaplan-pdfs.ndjson
# Citation counts from OpenAlex
# Create slim metadata
./bin/slim-join-pipeline \
| ./bin/ignore \
| ./bin/partition-slim-by-year
# Show summary counts
./bin/slim-counts
# Verify SHA checksums
# ./bin/verify-checksums
#./bin/count-author-collab | nd-filter 'd.count>=3' | nd-map 'values(d)'> collab/author-collab-gt3.ndjson