-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapprove_hits.py
297 lines (247 loc) · 11.8 KB
/
approve_hits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import sys
import boto3
import psycopg2
import json
import time
import datetime
import logging
import pandas as pd
from models import *
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
# Heroku Database URL
DATABASE_URL = "replace_with_database_url_on_heroku"
AWS_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"
AWS_SECRET_ACCESS_KEY = "AWS_SECRET_ACCESS_KEY"
DEV_ENVIROMENT_BOOLEAN = False
def get_connection():
if DEV_ENVIROMENT_BOOLEAN:
return boto3.client('mturk',
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
region_name = 'us-east-1',
endpoint_url='https://mturk-requester-sandbox.us-east-1.amazonaws.com'
)
else:
return boto3.client('mturk',
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
region_name = 'us-east-1',
endpoint_url='https://mturk-requester.us-east-1.amazonaws.com'
)
# Set up the AMT connection
connection = get_connection()
# The answers to the questions
answers = json.loads(open("static/questions/answers.json").read())
# Database setup
engine = create_engine(DATABASE_URL)
Session = sessionmaker(bind=engine)
session = Session()
# Setup logging
log_filename = "logs/approve_hits_full_study_january_2020.log"
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(message)s',
datefmt='%m-%d-%y %H:%M:%S',
filename=log_filename,
filemode='a')
# define a Handler which writes INFO messages or higher to the sys.stderr
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# set a format which is simpler for console use
formatter = logging.Formatter('%(message)s')
# tell the handler to use this format
console.setFormatter(formatter)
# add the handler to the root logger
logging.getLogger('').addHandler(console)
# Returns the list of assignments with certain status for a given HITId
def get_assignment_hits(connection, hit_id, status):
return connection.list_assignments_for_hit(
HITId = hit_id,
MaxResults=100,
AssignmentStatuses=[status]
)
# Returns the score and time (in milliseconds) given a worker_id
def get_score_and_time(worker_id):
user = session.query(User).filter_by(worker_id=worker_id).first()
num_questions = 12
num_correct = 0
total_time = 0
for i in range(1, num_questions + 1):
q_col = "q" + str(i)
q_time_col = "q" + str(i) + "_time"
user_answer = getattr(user, q_col)
user_time = getattr(user, q_time_col)
if (user_answer == answers[str(i)]):
num_correct += 1
logging.info("Correct answer for question " + str(i))
total_time += user_time
return num_correct, total_time
def approve_hits(connection, assignment_id_list, worker_id_list, check_completion_times = True):
'''
If check_completion_times is set to true only print out the times and do not accept or reject any time
'''
min_allowed_correct = 5
max_allowed_time = 50*60 #In seconds
for i in range(len(assignment_id_list)):
accept = True
time.sleep(2)
assignment_id = assignment_id_list[i]
worker_id = worker_id_list[i]
logging.info("Worker " + str(i+1) + "/" + str(len(assignment_id_list)))
logging.info("--------------------- Evaluating " + assignment_id + " of worker " + worker_id + " ---------------------")
num_correct, total_time = get_score_and_time(worker_id)
logging.info("Assignment " + assignment_id + " had " + str(num_correct)
+ " correct and was completed in " + str(total_time/1000) + " seconds")
if check_completion_times == False:
# Check if the user failed
failure_reason = ""
if (num_correct < min_allowed_correct):
accept = False
failure_reason = "low correctness"
logging.info("User failed the assignment with ID: " + assignment_id + " due to " + failure_reason)
if (total_time > max_allowed_time*1000):
accept = False
failure_reason = "high completion time"
logging.info("User failed the assignment with ID: " + assignment_id + " due to " + failure_reason)
# Approve or reject assignment accordingly
if (accept):
logging.info("Assignment " + assignment_id + " approved")
connection.approve_assignment(
AssignmentId = assignment_id,
RequesterFeedback = "Congratulations! Your HIT has been approved. Thank you for your time! :)",
OverrideRejection = True
)
elif (not accept):
logging.info("Assignment " + assignment_id + " rejected")
connection.reject_assignment(
AssignmentId = assignment_id,
RequesterFeedback = "We are sorry to inform you that your HIT has been rejected due to" + failure_reason
)
# Send appropriate bonus if the assignment is accepted
time.sleep(2)
if accept:
logging.info("Calculating bonus for WorkerId: " + worker_id)
send_bonus(assignment_id, num_correct, total_time)
def send_bonus(assignment_id, num_correct, total_time):
base_pay = 5.20
min_allowed_correct = 5
correctness_per_question_bonus = 1.04
bonus_correctness = round(((num_correct - min_allowed_correct) * correctness_per_question_bonus if (num_correct - min_allowed_correct > 0) else 0), 2)
if total_time < 14 * 60 * 1000:
bonus_time = 0.32 * (base_pay + bonus_correctness)
elif total_time < 15 * 60 * 1000:
bonus_time = 0.28 * (base_pay + bonus_correctness)
elif total_time < 16 * 60 * 1000:
bonus_time = 0.24 * (base_pay + bonus_correctness)
elif total_time < 17 * 60 * 1000:
bonus_time = 0.20 * (base_pay + bonus_correctness)
elif total_time < 18 * 60 * 1000:
bonus_time = 0.16 * (base_pay + bonus_correctness)
elif total_time < 18 * 60 * 1000:
bonus_time = 0.12 * (base_pay + bonus_correctness)
else:
bonus_time = 0
bonus_time = round(bonus_time, 2)
total_bonus = round(bonus_correctness + bonus_time, 2)
if (total_bonus > 0):
reason_str = "You received a total bonus of $" + str(total_bonus) + ". $" + str(bonus_correctness) + " due to your correctness and $" + str(bonus_time) + " due to your completion time."
worker_id = session.query(User).filter_by(assignment_id=assignment_id).first().worker_id
logging.info("In addition Worker ID: " + worker_id + " received a total bonus of $" + str(total_bonus) + ". $" + str(bonus_correctness) +\
" due to your correctness and $" + str(bonus_time) + " due to your completion time.")
connection.send_bonus(
WorkerId = worker_id,
BonusAmount = str(total_bonus),
AssignmentId = assignment_id,
Reason = reason_str
)
else:
worker_id = session.query(User).filter_by(assignment_id=assignment_id).first().worker_id
logging.info("In addition Worker ID: " + worker_id + "did not receive any bonus.")
def send_bonus_error(worker_id, assignment_id):
total_bonus = "4.5"
reason_str = "Due to our issues with your HIT submission you have been compensated with the base pay. Your HIT is neither accepted nor rejected."
connection.send_bonus(
WorkerId = worker_id,
BonusAmount = str(total_bonus),
AssignmentId = assignment_id,
Reason = reason_str
)
logging.info("Worker ID: " + worker_id + " with Assignment ID: " + assignment_id + " is payed as bonus the base pay due to issues with their HIT submission.")
# Evaluate the workers that failed the HIT initially and accept their hit if they had at least 1 right answer
def re_evaluate_workers(connection, filename):
data = pd.read_csv(filename)
worker_id_list = data['worker_id'].values
assignment_id_list = data['assignment_id'].values
current_date = datetime.datetime.now().date()
min_allowed_correct = 1
for i in range(len(assignment_id_list)):
accept = True
assignment_id = assignment_id_list[i]
worker_id = worker_id_list[i]
# Check if the assignment was submitted less than a month ago
submit_date = connection.get_assignment(AssignmentId = assignment_id)['Assignment']['SubmitTime'].date()
days_diff = (current_date - submit_date).days
print("Days Difference:", days_diff)
if days_diff < 30:
time.sleep(2)
logging.info("--------------------- Evaluating " + assignment_id + " of worker " + worker_id + " ---------------------")
num_correct, total_time = get_score_and_time(assignment_id)
logging.info("Assignment " + assignment_id + " had " + str(num_correct)
+ " correct and was completed in " + str(total_time/1000) + " seconds")
# Check if the user answered 0 questions correctly
if (num_correct < min_allowed_correct):
accept = False
logging.info("worker's " + worker_id + "HIT is REJECTED")
else:
accept = True
# Approve or reject assignment accordingly
if (accept):
connection.approve_assignment(
AssignmentId = assignment_id,
RequesterFeedback = "Congratulations! We have decided to accept your HIT after re-evaluating our acceptance criteria for the HIT. Thank you for your time! :)",
OverrideRejection = True
)
logging.info("worker's " + worker_id + "HIT is ACCEPTED")
def approve_specific_assignment(assignment_id):
connection.approve_assignment(
AssignmentId = assignment_id,
RequesterFeedback = "Congratulations! Your HIT has been approved. Thank you for your time! :)",
OverrideRejection = True
)
def reject_specific_assignment(assignment_id):
connection.approve_assignment(
AssignmentId = assignment_id,
RequesterFeedback = "Rejected! Speeding detecting. The HIT cannot be completed in less than 5 minutes!",
)
def grade_specific_assignment(assignment_id, worker_id):
approve_hits(connection, [assignment_id], [worker_id], check_completion_times=False)
if __name__ == "__main__":
arg_arr = sys.argv[1:]
arg1 = arg_arr[0]
if (arg1 == 'batch_grade'):
hit_id = arg_arr[1]
assignments = get_assignment_hits(connection, hit_id, "Submitted")['Assignments']
assignment_id_list = []
worker_id_list = []
for i in assignments:
assignment_id_list.append(i['AssignmentId'])
worker_id_list.append(i['WorkerId'])
logging.info("\n\
---------------------------------------------------------------\n\
--------------------- Running approve_hits.py -----------------\n\
---------------------------------------------------------------\n"
)
for i in range(len(assignment_id_list)):
logging.info("worker_id: " + worker_id_list[i] + " with assignment: " + assignment_id_list[i])
logging.info("There are " + str(len(assignment_id_list)) + " assignments submitted waiting approval")
approve_hits(connection, assignment_id_list, worker_id_list, check_completion_times=True)
elif (arg1 == 'reject'):
assignment_id = arg_arr[1]
reject_specific_assignment(assignment_id)
elif (arg1 == 'approve'):
assignment_id = arg_arr[1]
approve_specific_assignment(assignment_id)
elif (arg1 == 'grade'):
assignment_id = arg_arr[1]
worker_id = arg_arr[2]
grade_specific_assignment(assignment_id, worker_id)