-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathlncdwiki_weekly_summary
executable file
·100 lines (89 loc) · 3.49 KB
/
lncdwiki_weekly_summary
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env bash
#
# fetch and format recent wiki changes for a slack message
# as promotion for wiki
#
# set TIMEDIFF and/or PASS environ to change behave
# TIMEDIFF="1 week ago - 1 day" # pased onto 'date -d' for decoding
# PASS="$(pass wiki)" #looks like 'user:pass'
#
# when PASS is set, uses curl to create cookie
#
# used in cron, piped to slack-msg
# 00 12 * * 5 PASS=$(pass wiki) lncdwiki_weekly_summary | slack-msg random
# 20231013WF - added code header; password; links in top of message
TIMEDIFF=${TIMEDIFF:-1 week ago - 1 day}
EDITS_STARTING_ON="$(date -d "$TIMEDIFF" +%F)"
export EDITS_STARTING_ON # export for $ENV perl line in get_wiki_updates
PASS=${PASS:-} # default to no pass
COOKIEARGS=() # and no cookie jar args for curl
# originally thought creds in url like https://${PASS}@lncd.pitt.edu...
# would work. but need cookie login
# NB. cookie file hanging out in /tmp/ is maybe not great
auth_cookie(){
declare -g COOKIEARGS
jar=${1:-/tmp/cjar}
if [ -n "$PASS" -a -z "${COOKIEARGS[*]}" ]; then
u=${PASS/:*/}
p=${PASS/*:/}
curl -Lsd "u=$u&p=$p" --cookie-jar "$jar" https://lncd.pitt.edu/wiki/doku.php >/dev/null
COOKIEARGS=(--cookie "$jar" --cookie-jar "$jar")
fi
}
wiki_curl(){
declare -g COOKIEARGS
auth_cookie /tmp/cjar # populates COOKIEARGS
curl "${COOKIEARGS[@]}" -Ls "$@"
}
# yq's jq for xml, "xq," is xq-python in debian
xq=xq
command -v $xq >/dev/null || xq="xq-python"
get_wiki_updates(){
#echo "cookies: ${COOKIEARGS[*]}" >&2
saveto=${1:-/tmp/wiki-results.tsv}
wiki_curl "https://lncd.pitt.edu/wiki/feed.php?num=500" | tee /tmp/wiki-results.html |
$xq --arg DATE "$EDITS_STARTING_ON" -r \
'."rdf:RDF".item[]|
select(."dc:date" > $DATE )|
select((if .title == null then "no title" else .title end) |test("Page moved from")|not)|
[(."dc:creator"|sub(" .*";"")),
.link |sub("http.*?=|&.*";"";"g")]|
@tsv' | tee "$saveto" |
grep -v %3A # dont show images
}
wiki_revisions(){
local id="$1"
wiki_curl "https://lncd.pitt.edu/wiki/doku.php?id=${id}&do=revisions" |
grep 'div.*no.*li>'|sed 's:/li>:/li>\n:g'|
perl -lne \
'$date=$1=~s:/:-:gr if m:class="date">([0-9/]+) [^<]+</span:;
$user=$1 if m:<bdi>(.*)</bdi:;
$url=$1 if m/doku.php\?id=(.*?)&rev/;
print "$user\t$url" if m:/li: and $user and $url and $date gt $ENV{EDITS_STARTING_ON}'
}
wiki_user_path_edits(){
get_wiki_updates /tmp/wiki-results.tsv |
while read -r user path; do
echo -e "$user\t$path"
wiki_revisions "$path" || :
done |
tee /tmp/wiki-results_revs.tsv |
sort -u
}
wiki_main(){
# message top with very visible links to wiki
echo "<https://lncd.pitt.edu/wiki/doku.php|Wiki> Edits <https://lncd.pitt.edu/wiki/doku.php?id=start&do=recent|this week> (since $EDITS_STARTING_ON) below."
#echo "Other entry points: <https://lncd.pitt.edu/wiki/doku.php?id=admin:website:wiki:missing|missing>, <https://lncd.pitt.edu/wiki/doku.php?id=notebooks|notebook>, <https://lncd.pitt.edu/wiki/doku.php?do=randompage|random article>"
# getting a large amount(500) of b/c
wiki_user_path_edits | # dont care about repeat edits
perl -slane \
'push @{$a{$F[0]}}, $F[1];
END{print " :star: ", $#{$a{$_}}+1,
" $_ ",
join(",", map {$_=" <https://lncd.pitt.edu/wiki/doku.php?id=$_|`:$_`>"} @{$a{$_}}) for(keys %a)}'|
tee /tmp/wiki-results-counts.tsv |
grep -v " Anonymous " | # where did anonymous come from?
sort -t' ' -nrk3,3
}
# iffmain from lncdtools
eval "$(iffmain wiki_main)"