-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfsck-batch
executable file
·284 lines (236 loc) · 6.83 KB
/
fsck-batch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/bin/env bash
show_help() {
cat <<EOF
$EXEC_NAME version $VERSION
Usage:
$EXEC_NAME [options] CLASS-BASE
checks data object replicas for correct sizes and checksums
Parameters:
CLASS-BASE the common basename of the files that hold the data objects with
incorrect checksums or sizes
Options:
-h, --help display help text and exit
-H, --host HOST connect to the ICAT's DBMS on the host HOST instead of the
PostgreSQL default
-J, --jobs N perform N checks simultaneously, default is the number of
CPUs
-R, --resource RESC only check replicas on the storage resource RESC
-U, --user USER authorize the DBMS connection as user USER instead of the
default
-v, --version display version and exit
Summary:
The script reads a NUL-delimited list of iRODS data object paths from standard
in, one per line, and checks to see if any of the replicas have an incorrect
checksum or size. The ones with incorrect sizes are appended to the file
\`CLASS-BASE.bad_size\`. The ones with incorrect checksums are appended to the
file \`CLASS-BASE.bad_chksum\`. If an error occurs while checking a replica, the
error is written to \`CLASS-BASE\`.errors. In any case, at most one log entry
will be logged per line.
\`CLASS-BASE.bad_chksum\` and \`CLASS-BASE.bad_size\` file entries have the form
'<rescource hierarchy> <data object path>'.
Environment Variables:
PGHOST the default PostgreSQL host
PGUSER the default PostgreSQL user for authorizating the ICAT DB connection
Prerequisites:
1) iRODS 4.2.8 or later
2) The user must be initialized with iRODS as an admin user.
3) The user must be able to connect to the ICAT DB without providing a
password.
© 2024, The Arizona Board of Regents on behalf of The University of Arizona. For
license information, see https://cyverse.org/license.
EOF
}
readonly VERSION=5
set -o errexit -o nounset -o pipefail
export PGHOST PGUSER
EXEC_NAME="$(basename "$(realpath --canonicalize-missing "$0")")"
readonly EXEC_NAME
main() {
if ! opts="$(format_opts "$@")"; then
printf '\n'
show_help
return 1
fi >&2
eval set -- "$opts"
local jobs resc
while true; do
case "$1" in
-h | --help)
show_help
return 0
;;
-H | --host)
PGHOST="$2"
shift 2
;;
-J | --jobs)
jobs="$2"
shift 2
;;
-R | --resource)
resc="$2"
shift 2
;;
-U | --user)
PGUSER="$2"
shift 2
;;
-v | --version)
printf '%s\n' "$VERSION"
return 0
;;
--)
shift
break
;;
*)
show_help >&2
return 1
;;
esac
done
if (( $# < 1 )); then
show_help >&2
printf '\n' >&2
printf 'The base name for the error files needs to be provided.\n' >&2
return 1
fi
local classBase="$1"
fsck "$classBase" "${resc-''}" "${jobs-''}"
}
# formats the command line arguments for getopt style
# parsing
# Arguments:
# The raw command line arguments
# Output:
# The formatted arguments
format_opts() {
getopt \
--name "$EXEC_NAME" \
--options hH:J:R:U:v \
--longoptions help,host:,jobs:,resource:,user:,version \
-- \
"$@"
}
fsck() {
local classBase="$1"
local resc="$2"
local jobs="$3"
local parallelOpts=(--eta --no-notice --null --max-args=1)
if [[ -n "$jobs" ]]; then
parallelOpts+=(--jobs="$jobs")
fi
parallel "${parallelOpts[@]}" CHECK_OBJ "$resc" | log "$classBase"
}
log() {
local classBase="$1"
while read -r reason entry; do
if [[ "$reason" == Completed ]]; then
printf 'Completed %s\n' "$entry"
elif [[ "$reason" == checksum ]]; then
printf '%s\n' "$entry" >> "$classBase".bad_chksum
elif [[ "$reason" == missing ]]; then
printf '%s\n' "$entry" >> "$classBase".missing
elif [[ "$reason" == size ]]; then
printf '%s\n' "$entry" >> "$classBase".bad_size
else
printf '%s %s\n' "$reason" "$entry" >> "$classBase".errors
fi
done
}
# For a given data object, this fuction retrieves the size, checksum, resource,
# and file path for each of its replicas.
# Arguments:
# dataObj the absolute path the the data object
# rescName if not empty, restricts the replicas to belonging to this root
# resource
# Output:
# To stdout, it writes one line per replica. Each line as the form
# `<size>\t<checksum>\t<storage resource name>\t<file path>`. Each line is NUL
# terminated.
GET_CAT_INFO() {
local dataObj="$1"
local rescName="${2-}"
local collName dataName
collName="$(dirname "$dataObj")"
dataName="$(basename "$dataObj")"
query_icat() {
local collPath="$1"
local dataName="$2"
local rescName="$3"
psql --no-align --quiet --record-separator-zero --tuples-only --field-separator=$'\t' ICAT \
<<EOF
SELECT d.data_size, d.data_checksum, r.resc_name, d.data_path
FROM r_data_main AS d JOIN r_resc_main AS r ON r.resc_id = d.resc_id
WHERE d.data_name = '$dataName'
AND d.coll_id = (SELECT coll_id FROM r_coll_main WHERE coll_name = '$collPath')
AND ($(inject_resc_cond r "$rescName"))
EOF
}
inject_resc_cond() {
local rescTab="$1"
local rescName="$2"
if [[ -n "$rescName" ]]; then
printf $'%s.resc_name = \'%s\'' "$rescTab" "$rescName"
else
printf 'TRUE'
fi
}
query_icat "$collName" "$dataName" "$rescName"
}
export -f GET_CAT_INFO
# For a given file on a given host, this function retrieves the file's size and
# checksum.
# Arguments:
# storeHost The FQDN or IP address of the host
# filePath The absolute path to the file on storeHost
# Output:
# To stdout, it writes one line with the form `<size> <checksum>`.
GET_STORE_INFO() {
local storeHost="$1"
local filePath="$2"
# NOTE: The fd redirection gymanistics is to silence TACC's greeting message
#shellcheck disable=SC2087
ssh -q -t "$storeHost" 2>&1 > /dev/null <<EOSSH
if ! size="\$(sudo --user=irods stat --format=%s '$filePath' 2> /dev/null)"; then
size=-1
fi
if ! chksum="\$(sudo --user=irods md5sum '$filePath' 2> /dev/null)"; then
chksum=-1
else
chksum="\${chksum%% *}"
fi
printf '%s\t%s\n' "\$size" "\$chksum" >&2
EOSSH
}
export -f GET_STORE_INFO
CHECK_OBJ() {
local resc="$1"
local objPath="$2"
local catInfo
{
readarray -d '' catInfo < <(GET_CAT_INFO "$objPath" "$resc")
} 2>&1
for replInfo in "${catInfo[@]}"; do
local replSize replChksum rescName filePath
IFS=$'\t' read -r -d '' replSize replChksum rescName filePath < <(printf '%s' "$replInfo")
local storeHost
storeHost="$(iquest '%s' "select RESC_LOC where RESC_NAME = '$rescName'")"
local fileSize fileChksum
read -r fileSize fileChksum < <(GET_STORE_INFO "$storeHost" "$filePath")
local reason
if [[ "$fileSize" == '-1' ]] && [[ "$fileChksum" == '-1' ]]; then
reason=missing
elif [[ "$replSize" != "$fileSize" ]]; then
reason=size
elif [[ "$replChksum" != "$fileChksum" ]]; then
reason=checksum
fi
if [[ -n "$reason" ]]; then
printf '%s\t%s\t%q\n' "$reason" "$rescName" "$objPath"
fi
done 2>&1
printf 'Completed %q\n' "$objPath"
}
export -f CHECK_OBJ
main "$@"