Skip to content

Commit

Permalink
Merge pull request #82 from JeffersonLab/DEV
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
T-Britton authored May 31, 2022
2 parents e8a5633 + 7b8988d commit 160440a
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 40 deletions.
4 changes: 2 additions & 2 deletions Generators/bggen/pythia-geant.map
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
24 3334 ! Omega-
25 -2112 ! antineutron
26 -3122 ! antiLambda0
27 -3112 ! antiSigma-
27 -3222 ! antiSigma-
28 -3212 ! antiSigma0
29 -3222 ! antiSigma+
29 -3112 ! antiSigma+
30 -3322 ! antiXi0
31 -3312 ! antiXi+
32 -3334 ! antiOmega+
Expand Down
4 changes: 2 additions & 2 deletions Generators/bggen_jpsi/pythia-geant.map
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
24 3334 ! Omega-
25 -2112 ! antineutron
26 -3122 ! antiLambda0
27 -3112 ! antiSigma-
27 -3222 ! antiSigma-
28 -3212 ! antiSigma0
29 -3222 ! antiSigma+
29 -3112 ! antiSigma+
30 -3322 ! antiXi0
31 -3312 ! antiXi+
32 -3334 ! antiOmega+
Expand Down
4 changes: 2 additions & 2 deletions Generators/bggen_phi_ee/pythia-geant.map
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
24 3334 ! Omega-
25 -2112 ! antineutron
26 -3122 ! antiLambda0
27 -3112 ! antiSigma-
27 -3222 ! antiSigma-
28 -3212 ! antiSigma0
29 -3222 ! antiSigma+
29 -3112 ! antiSigma+
30 -3322 ! antiXi0
31 -3312 ! antiXi+
32 -3334 ! antiOmega+
Expand Down
90 changes: 64 additions & 26 deletions Utilities/MCDispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
except Exception as e:
print("IMPORT ERROR:",e)
pass
from telnetlib import STATUS
import MySQLdb
import sys
import datetime
Expand Down Expand Up @@ -65,6 +66,7 @@ def RecallAll():
if("not found" in str(err,"utf-8")):
print("clear "+str(row["BatchJobID"]))
updatequery="UPDATE Attempts SET Status=\"3\" where BatchJobID=\""+str(row["BatchJobID"])+"\""+" && SubmitHost=\""+MCWRAPPER_BOT_HOST_NAME+"\""
print(updatequery)
curs.execute(updatequery)
conn.commit()
if("Failed to end classad" in str(err,"utf-8")):
Expand Down Expand Up @@ -206,7 +208,10 @@ def RetryAllJobs(rlim=False):
for row in rows:
if(row["Notified"] != 1):
print("Retrying Project "+str(row["ID"]))
RetryJobsFromProject(row["ID"],not rlim)
if 1: #row["ID"] != 2509 and row["ID"] != 2510:
RetryJobsFromProject(row["ID"],not rlim)
else:
continue
else:
print("Attempting to clean up Completed_Time")
getFinalCompleteTime="SELECT MAX(Completed_Time) FROM Attempts WHERE Job_ID IN (SELECT ID FROM Jobs WHERE Project_ID="+str(row['ID'])+");"
Expand Down Expand Up @@ -234,7 +239,8 @@ def RemoveAllJobs():
def RetryJobsFromProject(ID, countLim):
AllOSG=True
#query= "SELECT * FROM Attempts WHERE ID IN (SELECT Max(ID) FROM Attempts WHERE SubmitHost=\""+MCWRAPPER_BOT_HOST_NAME+"\" GROUP BY Job_ID) && Job_ID IN (SELECT ID FROM Jobs WHERE IsActive=1 && Project_ID="+str(ID)+");"
query= "SELECT * FROM Attempts WHERE ID IN (SELECT Max(ID) FROM Attempts WHERE SubmitHost is not NULL GROUP BY Job_ID) && Job_ID IN (SELECT ID FROM Jobs WHERE IsActive=1 && Project_ID="+str(ID)+");"
#query= "SELECT * FROM Attempts WHERE ID IN (SELECT Max(ID) FROM Attempts WHERE SubmitHost is not NULL GROUP BY Job_ID) && Job_ID IN (SELECT ID FROM Jobs WHERE IsActive=1 && Project_ID="+str(ID)+");"
query="select Attempts.* from Jobs inner join Attempts on Attempts.Job_ID = Jobs.id and Attempts.id = (select max(id) from Attempts latest_attempts where latest_attempts.job_id = Jobs.id) where Project_ID = " +str(ID)
curs.execute(query)
rows=curs.fetchall()

Expand All @@ -243,10 +249,11 @@ def RetryJobsFromProject(ID, countLim):
proj=curs.fetchall()[0]
i=0
j=0
k=0
SWIF_retry_IDs=[]

print(len(rows),"Jobs to retry")
for row in rows:

print("Project",ID," ",k+1,"/",len(rows),":",row["Job_ID"],":",row["BatchSystem"])
if (row["BatchSystem"]=="SWIF"):
if((row["Status"] == "succeeded" and row["ExitCode"] != 0) or (row["Status"]=="problem" and row["ExitCode"]!="232") or (proj['Tested']==1 and row["Status"]=="canceled" ) or (proj['Tested']==1 and row["Status"]=="failed" )):
#if(row["Status"] != "succeeded"):
Expand Down Expand Up @@ -291,7 +298,7 @@ def RetryJobsFromProject(ID, countLim):

RetryJob(row["Job_ID"],AllOSG)
i=i+1

k=k+1

print(SWIF_retry_IDs)
if(len(SWIF_retry_IDs)!=0 and AllOSG == False):
Expand Down Expand Up @@ -487,6 +494,16 @@ def ParallelTestProject(results_q,index,row,ID,versionSet,commands_to_call=""):
rows=curs.fetchall()
order=rows[0]

already_passed_q="SELECT COUNT(*) from Jobs where Project_ID="+str(ID)
curs.execute(already_passed_q)
already_passed=curs.fetchall()

if(already_passed[0]["COUNT(*)"]>0):
updatequery="UPDATE Project SET Tested=1"+" WHERE ID="+str(ID)+";"
curs.execute(updatequery)
conn.commit()
STATUS="Success"
return STATUS

output="Dispatch_Failure"
errors="Dispatch_Failure"
Expand Down Expand Up @@ -570,8 +587,11 @@ def ParallelTestProject(results_q,index,row,ID,versionSet,commands_to_call=""):
if order["SaveReconstruction"]==1:
cleanrecon=0


command=MCWRAPPER_BOT_HOME+"/gluex_MC.py "+pwd+"/"+"MCDispatched_"+str(ID)+".config "+str(RunNumber)+" "+str(500)+" per_file=250000 base_file_number=0"+" generate="+str(order["RunGeneration"])+" cleangenerate="+str(cleangen)+" geant="+str(order["RunGeant"])+" cleangeant="+str(cleangeant)+" mcsmear="+str(order["RunSmear"])+" cleanmcsmear="+str(cleansmear)+" recon="+str(order["RunReconstruction"])+" cleanrecon="+str(cleanrecon)+" projid="+str(ID)+" batch=0 tobundle=0"
test_number=500

if(order["ID"]==2531):
test_number=50
command=MCWRAPPER_BOT_HOME+"/gluex_MC.py "+pwd+"/"+"MCDispatched_"+str(ID)+".config "+str(RunNumber)+" "+str(test_number)+" per_file=250000 base_file_number=0"+" generate="+str(order["RunGeneration"])+" cleangenerate="+str(cleangen)+" geant="+str(order["RunGeant"])+" cleangeant="+str(cleangeant)+" mcsmear="+str(order["RunSmear"])+" cleanmcsmear="+str(cleansmear)+" recon="+str(order["RunReconstruction"])+" cleanrecon="+str(cleanrecon)+" projid="+str(ID)+" batch=0 tobundle=0"
print(command)

# print (command+command2).split(" ")
Expand Down Expand Up @@ -605,9 +625,10 @@ def ParallelTestProject(results_q,index,row,ID,versionSet,commands_to_call=""):

output="Error in rcdb query"
errors="Error in rcdb query:"+str(query_to_do)
print("singularity exec --cleanenv --bind "+pwd+":"+pwd+" --bind /osgpool/halld/tbritton:/osgpool/halld/tbritton --bind /group/halld/:/group/halld/ --bind /scigroup/mcwrapper/gluex_MCwrapper:/scigroup/mcwrapper/gluex_MCwrapper /cvmfs/singularity.opensciencegrid.org/markito3/gluex_docker_prod:latest /bin/sh "+pwd+"/TestProject_runscript_"+str(ID)+".sh")
sing_img="/cvmfs/singularity.opensciencegrid.org/jeffersonlab/gluex_prod:v1"
print("singularity exec --cleanenv --bind "+pwd+":"+pwd+" --bind /osgpool/halld/tbritton:/osgpool/halld/tbritton --bind /group/halld/:/group/halld/ --bind /scigroup/mcwrapper/gluex_MCwrapper:/scigroup/mcwrapper/gluex_MCwrapper "+sing_img+" /bin/sh "+pwd+"/TestProject_runscript_"+str(ID)+".sh")
if RunNumber != -1:
p = Popen("singularity exec --cleanenv --bind "+pwd+":"+pwd+" --bind /osgpool/halld/tbritton:/osgpool/halld/tbritton --bind /group/halld/:/group/halld/ --bind /scigroup/mcwrapper/gluex_MCwrapper:/scigroup/mcwrapper/gluex_MCwrapper /cvmfs/singularity.opensciencegrid.org/markito3/gluex_docker_prod:latest /bin/sh "+pwd+"/TestProject_runscript_"+str(ID)+".sh", env=my_env ,stdin=PIPE,stdout=PIPE, stderr=PIPE,bufsize=-1,shell=True)
p = Popen("singularity exec --cleanenv --bind "+pwd+":"+pwd+" --bind /osgpool/halld/tbritton:/osgpool/halld/tbritton --bind /group/halld/:/group/halld/ --bind /scigroup/mcwrapper/gluex_MCwrapper:/scigroup/mcwrapper/gluex_MCwrapper "+ sing_img +" /bin/sh "+pwd+"/TestProject_runscript_"+str(ID)+".sh", env=my_env ,stdin=PIPE,stdout=PIPE, stderr=PIPE,bufsize=-1,shell=True)
output, errors = p.communicate()


Expand All @@ -619,22 +640,32 @@ def ParallelTestProject(results_q,index,row,ID,versionSet,commands_to_call=""):


if(STATUS!=-1):
updatequery="UPDATE Project SET Tested=1"+" WHERE ID="+str(ID)+";"
curs.execute(updatequery)
conn.commit()
if(newLoc != "True"):
updateOrderquery="UPDATE Project SET Generator_Config=\""+newLoc+"\" WHERE ID="+str(ID)+";"
print(updateOrderquery)
curs.execute(updateOrderquery)
try:
updatequery="UPDATE Project SET Tested=1"+" WHERE ID="+str(ID)+";"
curs.execute(updatequery)
conn.commit()
if(newLoc != "True"):
updateOrderquery="UPDATE Project SET Generator_Config=\""+newLoc+"\" WHERE ID="+str(ID)+";"
print(updateOrderquery)
curs.execute(updateOrderquery)
conn.commit()
except Exception as e:
print(e)
pass
#conn=MySQLdb.connect(host=dbhost, user=dbuser, db=dbname)
#curs=conn.cursor(MySQLdb.cursors.DictCursor)

print(bcolors.OKGREEN+"TEST SUCCEEDED"+bcolors.ENDC)
print("rm -rf "+order["OutputLocation"])
#status = subprocess.call("rm -rf "+order["OutputLocation"],shell=True)
else:
updatequery="UPDATE Project SET Tested=-1"+" WHERE ID="+str(ID)+";"
curs.execute(updatequery)
conn.commit()
try:
updatequery="UPDATE Project SET Tested=-1"+" WHERE ID="+str(ID)+";"
curs.execute(updatequery)
conn.commit()
except Exception as e:
print(e)
pass

print(bcolors.FAIL+"TEST FAILED"+bcolors.ENDC)
print("rm -rf "+order["OutputLocation"])
Expand Down Expand Up @@ -679,10 +710,15 @@ def ParallelTestProject(results_q,index,row,ID,versionSet,commands_to_call=""):
#print(msg)
print("SENDING")
s.send_message(msg)
s.quit()
copy=open("/osgpool/halld/tbritton/REQUESTED_FAIL_MAILS/email_"+str(row['ID'])+".log", "w+")
copy.write('The log information is reproduced below:\n\n\n'+str(status[0])+'\n\n\nErrors:\n\n\n'+str(status[2]))
copy.close()
print("SENT")
s.quit()
try:
copy=open("/osgpool/halld/tbritton/REQUESTED_FAIL_MAILS/email_"+str(row['ID'])+".log", "w+")
copy.write('The log information is reproduced below:\n\n\n'+str(status[0])+'\n\n\nErrors:\n\n\n'+str(status[2]))
copy.close()
except Exception as e:
print(e)
pass
#subprocess.call("echo 'Your Project ID "+str(row['ID'])+" failed the test. Please correct this issue by following the link: "+"https://halldweb.jlab.org/gluex_sim/SubmitSim.html?prefill="+str(row['ID'])+"&mod=1" +" . Do NOT resubmit this request. Write [email protected] for additional assistance\n\n The log information is reproduced below:\n\n\n"+status[0]+"\n\n\n"+status[2]+"' | mail -s 'Project ID #"+str(row['ID'])+" Failed test' "+str(row['Email']),shell=True)
except:
print("UH OH MAILING")
Expand Down Expand Up @@ -1028,6 +1064,7 @@ def WriteConfig(ID):
#WritePayloadConfig(rows[0],"True")

def WritePayloadConfig(order,foundConfig,jobID=-1):
#print("writing config file based on\n",order)
if jobID==-1:
MCconfig_file= open("MCDispatched_"+str(order['ID'])+".config","a")
else:
Expand Down Expand Up @@ -1095,15 +1132,16 @@ def WritePayloadConfig(order,foundConfig,jobID=-1):
try:
subprocess.call("scp "+"tbritton@ifarm1801-ib:"+parseGenPostProcessing[i]+" "+"/osgpool/halld/tbritton/REQUESTEDMC_CONFIGS/"+str(order["ID"])+"_genpost_"+str(i)+".config", shell=True)
except Exception as e:
print("Error:",e)
print("Error in copying gen post processing file:",e)
pass

print("checking for config file:","/osgpool/halld/tbritton/REQUESTEDMC_CONFIGS/"+str(order["ID"])+"_genpost_"+str(i)+".config")
if( os.path.exists("/osgpool/halld/tbritton/REQUESTEDMC_CONFIGS/"+str(order["ID"])+"_genpost_"+str(i)+".config")):
newGenPost_str+=":/osgpool/halld/tbritton/REQUESTEDMC_CONFIGS/"+str(order["ID"])+"_genpost_"+str(i)+".config"
fileError=False

#fileError=False
else:
print("Error:",parseGenPostProcessing[i] ,"not found")
print("Error finding a gen post processing file:",parseGenPostProcessing[i] ,"not found")
fileError=True
else:
newGenPost_str+=":Default"
Expand Down
74 changes: 73 additions & 1 deletion Utilities/MCMover.csh
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ if [[ `ps all -u tbritton | grep MCMover.csh | grep -v grep | wc -l` == 2 ]]; th
input_dir=/osgpool/halld/tbritton/REQUESTEDMC_OUTPUT/
transfer_node=tbritton@dtn1902-ib
LOGtransfer_node=tbritton@ifarm1901-ib
CONFIGtransfer_node=tbritton@ifarm1901-ib

output_dir=/lustre19/expphy/cache/halld/gluex_simulations/REQUESTED_MC/
outputLOG_dir=/work/halld3/REQUESTED_MC/
outputCONFIG_dir=/work/halld3/REQUESTED_MC/

outputLOG_file=outputLOG_files_list_$HOST
outputCONFIG_file=outputCONFIG_files_list_$HOST
output_file=outputLOG_files_list_$HOST
# move slag-like files in the input directory out of the way
mkdir -pv $input_dir/slag
Expand All @@ -36,6 +40,13 @@ if [[ `ps all -u tbritton | grep MCMover.csh | grep -v grep | wc -l` == 2 ]]; th
LOGtransArray+=("$REPLY")
done < <(find $input_dir/ -mindepth 2 -maxdepth 2 -type d -not -name ".*" -name "log*" -print0)

CONFIGtransArray=()
while IFS= read -r -d $'\0'; do
CONFIGtransArray+=("$REPLY")
done < <(find $input_dir/ -mindepth 2 -maxdepth 2 -type d -not -name ".*" -name "config*" -print0)



for logdir in ${LOGtransArray[@]}
do
echo $logdir
Expand Down Expand Up @@ -96,7 +107,68 @@ if [[ `ps all -u tbritton | grep MCMover.csh | grep -v grep | wc -l` == 2 ]]; th
comm -12 /tmp/inputLOG_files_list.txt /tmp/outputLOG_files_list.txt | xargs rm -v
fi


for configdir in ${CONFIGtransArray[@]}
do
echo $configdir
projpath=`echo $configdir | awk '{split($0,arr,"REQUESTEDMC_OUTPUT"); print arr[2]}'`
echo $projpath >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log

echo ssh $CONFIGtransfer_node mkdir -p $outputCONFIG_dir/$projpath/

ssh $CONFIGtransfer_node mkdir -p $outputCONFIG_dir/$projpath/


configdir2=${configdir}
echo chmod -R g+w $configdir2
ssh $CONFIGtransfer_node chmod -R g+w $outputCONFIG_dir/$projpath/
rsync_command="rsync --progress -pruvt $configdir/ $CONFIGtransfer_node:$outputCONFIG_dir/$projpath/" #--exclude $input_dir/slag"
echo $rsync_command >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
status="255"
while [ "$status" -eq "255" ]
do
echo $rsync_command
$rsync_command
status="$?"
echo status = $status
sleep 1
done
((movecount=movecount+1))
echo $movecount

done

echo "============================================================================================"
echo "ssh" $CONFIGtransfer_node "cd $outputCONFIG_dir;rm /tmp/"$outputCONFIG_file".txt; find . -type f | sort > /tmp/"$outputCONFIG_file".txt;"
ssh $CONFIGtransfer_node "cd $outputCONFIG_dir;rm /tmp/"$outputCONFIG_file".txt; find . -type f | sort > /tmp/"$outputCONFIG_file".txt;"

echo "OUTPUT FILE WRITTEN"

echo scp $CONFIGtransfer_node:/tmp/$outputCONFIG_file".txt" /tmp/outputCONFIG_files_list.txt
scp $CONFIGtransfer_node:/tmp/$outputCONFIG_file".txt" /tmp/outputCONFIG_files_list.txt
#echo "pwd" $PWD >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
#find . -type f | sort > /tmp/output_files_list.txt
echo "cd $input_dir" >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
cd $input_dir

echo "moving to delete" >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
echo $PWD >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log

if [[ $PWD == *"/lustre19/expphy/cache/halld/gluex_simulations/REQUESTED_MC"* ]]; then
echo "oh no! did not move to input directory?!"
exit 1
fi

# make list of files in the input directory
find . -type f -mmin +600 | sort > /tmp/inputCONFIG_files_list.txt
echo "DELETING" >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
echo `comm -12 /tmp/inputCONFIG_files_list.txt /tmp/outputCONFIG_files_list.txt` >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
echo `comm -12 /tmp/inputCONFIG_files_list.txt /tmp/outputCONFIG_files_list.txt`

if [[ `comm -12 /tmp/inputCONFIG_files_list.txt /tmp/outputCONFIG_files_list.txt | wc -l` != 0 ]]; then
comm -12 /tmp/inputCONFIG_files_list.txt /tmp/outputCONFIG_files_list.txt | xargs rm -v
fi


#echo ${transArray[*]} >> /osgpool/halld/tbritton/MCWrapper_Logs/MCWrapperMover.log
for dir in ${transArray[@]}
do
Expand Down
Loading

0 comments on commit 160440a

Please sign in to comment.