-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathuserList.sh
executable file
·71 lines (60 loc) · 1.47 KB
/
userList.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#! /usr/bin/bash
# Script to download the list of wiki users having at least one edit using
# Allusers API
# Prime Jyothi ([email protected]), 20131110
# License GPLv3
api="http://ml.wikipedia.org/w/api.php?action=query&list=allusers&auexcludegroup=bot&auprop=editcount|groups&auwitheditsonly&aulimit=500&format=xml"
auexcludegroup=bot&auprop=editcount
stopSign="ALongStringThatIndicateItIsTimeToStop"
count=1
max=100
function log ()
{
echo "Log: $@"
}
function lastUname ()
{
# Extract the last username from the given XML file
u=`xmlstarlet sel -T -t -m /api/query/allusers/u -v "@name" -n $1 |
tail -1`
if [[ -z "$u" ]]
then
# No data found.
u=$stopSign
fi
elCount=`xmlstarlet sel -T -t -m /api/query/allusers/u -v "@name" -n $1 |
wc -l`
if [[ $elCount -eq 1 ]]
then
# There is only one record, end of data
# Since the aufrom parameter is used, the returned data will contain
# one record. Will take this as end of data
u=$stopSign
fi
echo $u
}
# First set of data
wget $api -O 1.xml 2> /dev/null
while :
do
if [[ $count -gt $max ]]
then
break
fi
last=`lastUname ${count}.xml`
if [[ "$last" = $stopSign ]]
then
# End of data or some error, stop the process
log "End of data"
break
fi
log count = $count, last $last
startUser="&aufrom=$last"
log startUser $startUser
count=`expr $count + 1`
url=${api}${startUser}
log url $url
wget "$url" -O ${count}.xml 2> /dev/null
# Be nice, give the server a break before next request
sleep 2
done