Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated script #799

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions bv-evaluation/collect-data-llvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@

inconsistencies = 0

both_failed = 0
ls_only_failed = 0
bw_only_failed = 0

thmTot = 0

errTot = 0
Expand All @@ -37,7 +41,8 @@

err_loc_tot = []
err_msg_tot = []

bitwuzla_failed_locations = []
leanSAT_failed_locations = []

for file in os.listdir(benchmark_dir):

Expand Down Expand Up @@ -68,6 +73,7 @@
err_locations = []
err_msg = []


res_file = open(res_dir+file.split(".")[0]+"_r"+str(r)+".txt")
# print(res_dir+file.split(".")[0]+"_r"+str(r)+".txt")
ls = 0
Expand All @@ -79,7 +85,12 @@
while l:
if "Bitwuzla " in l:
cegb = False
if "counter" in l :
bw_failed = False
if "failed" in l :
bw_failed = True
if (r == 0):
bitwuzla_failed_locations.append(file)
elif "counter" in l :
cegb = True
tot = float(l.split("after ")[1].split("ms")[0])
if r == 0:
Expand All @@ -99,8 +110,13 @@
l = res_file.readline()
# if testing went right the next line should contain
if "LeanSAT " in l:
ls_failed = False
cegl = False
if "counter example" in l:
if "failed" in l :
ls_failed = True
if (r == 0):
leanSAT_failed_locations.append(file)
elif "counter example" in l:
tot = float(l.split("ms")[0].split("after ")[1])
if r == 0:
counter_leanSAT_tot_times_average.append([tot])
Expand Down Expand Up @@ -131,6 +147,30 @@
leanSAT_lrat_t_times_average[ls].append(float(l.split("ms")[4].split("g ")[1]))
leanSAT_lrat_c_times_average[ls].append(float(l.split("ms")[5].split("g ")[1]))
ls = ls + 1

if bw_failed and ls_failed and r ==0:
both_failed+=1
elif ls_failed and r ==0:
ls_only_failed +=1
# delete latest entry from bw results for consistency
if cegb:
del counter_bitwuzla_times_average[-1]
else :
del bitwuzla_times_average[-1]
elif bw_failed and r ==0:
bw_only_failed +=1
if cegl :
del counter_leanSAT_tot_times_average[-1]
del counter_leanSAT_rw_times_average[-1]
del counter_leanSAT_sat_times_average[-1]
else :
del leanSAT_tot_times_average[-1]
del leanSAT_rw_times_average[-1]
del leanSAT_bb_times_average[-1]
del leanSAT_sat_times_average[-1]
del leanSAT_lrat_t_times_average[-1]
del leanSAT_lrat_c_times_average[-1]

if cegb and not cegl:
print("bitwuzla found a counterexample, leanSAT proved a theorem in file "+file)
inconsistencies+=1
Expand Down Expand Up @@ -180,9 +220,14 @@

print("leanSAT and Bitwuzla solved: "+str(len(leanSAT)))
print("leanSAT and Bitwuzla provided "+str(len(counter_leanSAT))+" counterexamples")
print("leanSAT and Bitwuzla both failed on "+str(both_failed)+" theorems")
print("leanSAT failed and Bitwuzla succeeded on "+str(ls_only_failed)+" theorems")
print("leanSAT succeeded and Bitwuzla failed on "+str(bw_only_failed)+" theorems")
print("There were "+str(inconsistencies)+" inconsistencies")
print("Errors raised: "+str(errTot))



err_a = np.array(err_msg_tot)

unique_elements, counts = np.unique(err_a, return_counts=True)
Expand Down
Loading