Skip to content

Commit

Permalink
updated data gathering script to newer changes (still TODO for figures)
Browse files Browse the repository at this point in the history
  • Loading branch information
afoix committed Jul 21, 2024
1 parent 62704af commit 6e9ffcf
Showing 1 changed file with 116 additions and 104 deletions.
220 changes: 116 additions & 104 deletions scripts/shapeembed/gather_run_results.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#! /usr/bin/env python3

import pandas as pd
import os
import shutil
import logging
import argparse
import shutil
import os
import datetime
import functools

import pandas as pd

from common_helpers import *

# define a Custom aggregation
# function for finding total
def keep_first_fname(series):
Expand All @@ -17,128 +20,136 @@ def get_run_info(run):
return f'{x[0]}_{x[1]}', x[2], x[4]

def main_process(clargs, logger=logging.getLogger(__name__)):
print(clargs)

params = []
for f in clargs.run_folders:
ps = find_existing_run_scores(f)
for p in ps: p.folder = f
params.append(ps)
params = [x for ps in params for x in ps]
logger.debug(params)

os.makedirs(clargs.output_dir, exist_ok=True)
dfs = []
for d in clargs.run_folder:
csv = f'{d}/scores_df.csv'
#csv = f'{d}/scores_df_mean.csv'
if not os.path.isfile(csv):
print(f'WARNING: no {csv} found, skipping')
continue

run_name = os.path.basename(d)
model, latent_space_sz, dataset = get_run_info(run_name)
df = pd.read_csv(csv)
df['model'] = model
df['latent_space_sz'] = latent_space_sz
df['dataset'] = dataset

for trial in ['efd','regionprops','shapeembed', 'combined_all']:

conf_mat = f'{trial}_confusion_matrix.png'
if os.path.isfile(f'{d}/{conf_mat}'):
shutil.copy(f'{d}/{conf_mat}',f'{clargs.output_dir}/{run_name}_{conf_mat}')
df.loc[df['trial'] == trial, 'conf_mat'] = f'./{run_name}_{conf_mat}'
else:
df.loc[df['trial'] == trial, 'conf_mat'] = f'nofile'

umap = f'umap_{trial}.pdf'
if os.path.isfile(f'{d}/{umap}'):
shutil.copy(f'{d}/{umap}',f'{clargs.output_dir}/{run_name}_{umap}')
df.loc[df['trial'] == trial, 'umap'] = f'./{run_name}_{umap}'
else:
df.loc[df['trial'] == trial, 'umap'] = f'nofile'

barplot = f'scores_barplot.pdf'
if os.path.isfile(f'{d}/{barplot}'):
shutil.copy(f'{d}/{barplot}',f'{clargs.output_dir}/{run_name}_{barplot}')
df.loc[df['trial'] == trial, 'barplot'] = f'./{run_name}_{barplot}'
else:
df.loc[df['trial'] == trial, 'barplot'] = f'nofile'

dfs = []
for p in params:

# open scores dataframe
df = pd.read_csv(p.csv_file, index_col=0)

# pair up with confusion matrix
conf_mat_file = f'{job_str(p)}-shapeembed-confusion_matrix.png'
print(f'{p.folder}/{conf_mat_file}')
if os.path.isfile(f'{p.folder}/{conf_mat_file}'):
shutil.copy(f'{p.folder}/{conf_mat_file}',f'{clargs.output_dir}/{conf_mat_file}')
df['conf_mat'] = f'./{conf_mat_file}'
else:
df['conf_mat'] = f'nofile'

# pair up with umap
umap_file = f'{job_str(p)}-shapeembed-umap.pdf'
if os.path.isfile(f'{p.folder}/{umap_file}'):
shutil.copy(f'{p.folder}/{umap_file}',f'{clargs.output_dir}/{umap_file}')
df['umap'] = f'./{umap_file}'
else:
df['umap'] = f'nofile'

## pair up with barplot
#barplot = f'scores_barplot.pdf'
#if os.path.isfile(f'{d}/{barplot}'):
# shutil.copy(f'{d}/{barplot}',f'{clargs.output_dir}/{run_name}_{barplot}')
# df.loc[df['trial'] == trial, 'barplot'] = f'./{run_name}_{barplot}'
#else:
# df.loc[df['trial'] == trial, 'barplot'] = f'nofile'

# add dataframe to list for future concatenation
dfs.append(df.convert_dtypes())

# gather all dataframes together
df = pd.concat(dfs)
df = df.iloc[:, 1:] # drop first column 'unnamed' for non-mean df
df.set_index(['dataset', 'trial', 'model', 'latent_space_sz'], inplace=True)
logger.debug(df)
df.to_csv(f'{clargs.output_dir}/all_scores_df.csv', index=False)

#df = df.iloc[:, 1:] # drop first column 'unnamed' for non-mean df
df.set_index(['dataset', 'trial', 'model', 'compression_factor', 'latent_dim', 'batch_size'], inplace=True)
df.sort_index(inplace=True)
df = df.groupby(level=['dataset', 'trial', 'model', 'latent_space_sz']).agg({
df = df.groupby(level=['dataset', 'trial', 'model', 'compression_factor', 'latent_dim', 'batch_size']).agg({
'test_accuracy': 'mean'
, 'test_precision': 'mean'
, 'test_recall': 'mean'
, 'test_f1': 'mean'
, 'conf_mat': keep_first_fname
, 'umap': keep_first_fname
, 'barplot': keep_first_fname
#, 'barplot': keep_first_fname
})

print('-'*80)
print(df)
print('-'*80)


cell_hover = { # for row hover use <tr> instead of <td>
'selector': 'td:hover',
'props': [('background-color', '#ffffb3')]
}
index_names = {
'selector': '.index_name',
'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
}
headers = {
'selector': 'th:not(.index_name)',
'props': 'background-color: #eeeeee; color: #333333;'
}

def html_img(path):
if os.path.splitext(path)[1][1:] == 'png':
return f'<a href="{path}"><img class="zoom" src="{path}" width="50"></a>'
if os.path.splitext(path)[1][1:] == 'pdf':
return f'<a href="{path}"><object class="zoom" data="{path}" width="50" height="50"></a>'
return '<div style="width: 50px">:(</div>'
df['conf_mat'] = df['conf_mat'].apply(html_img)
df['umap'] = df['umap'].apply(html_img)
df['barplot'] = df['barplot'].apply(html_img)

def render_html(fname, d):
with open(fname, 'w') as f:
f.write('''<head>
<style>
.df tbody tr:nth-child(even) { background-color: lightblue; }
.zoom {transition: transform .2s;}
.zoom:hover{transform: scale(10);}
</style>
</head>
<body>
''')
s = d.style
s.set_table_styles([cell_hover, index_names, headers])
s.to_html(f, classes='df')
f.write('</body>')

with open(f'{clargs.output_dir}/gathered_table.tex', 'w') as f:
f.write('\\documentclass[12pt]{article}\n\\usepackage{booktabs}\n\\usepackage{underscore}\n\\usepackage{multirow}\n\\begin{document}\n')
df.to_latex(f)
f.write('\\end{decument}')
render_html(f'{clargs.output_dir}/gathered_table.html', df)

dft = df.transpose()
with open(f'{clargs.output_dir}/gathered_table_transpose.tex', 'w') as f:
f.write('\\documentclass[12pt]{article}\n\\usepackage{booktabs}\n\\usepackage{underscore}\n\\usepackage{multirow}\n\\begin{document}\n')
dft.to_latex(f)
f.write('\\end{decument}')
render_html(f'{clargs.output_dir}/gathered_table_transpose.html', dft)
df.to_csv(f'{clargs.output_dir}/all_scores_agg_df.csv')


#cell_hover = { # for row hover use <tr> instead of <td>
# 'selector': 'td:hover',
# 'props': [('background-color', '#ffffb3')]
# }
#index_names = {
# 'selector': '.index_name',
# 'props': 'font-style: italic; color: darkgrey; font-weight:normal;'
# }
#headers = {
# 'selector': 'th:not(.index_name)',
# 'props': 'background-color: #eeeeee; color: #333333;'
# }

#def html_img(path):
# if os.path.splitext(path)[1][1:] == 'png':
# return f'<a href="{path}"><img class="zoom" src="{path}" width="50"></a>'
# if os.path.splitext(path)[1][1:] == 'pdf':
# return f'<a href="{path}"><object class="zoom" data="{path}" width="50" height="50"></a>'
# return '<div style="width: 50px">:(</div>'
#df['conf_mat'] = df['conf_mat'].apply(html_img)
#df['umap'] = df['umap'].apply(html_img)
#df['barplot'] = df['barplot'].apply(html_img)

#def render_html(fname, d):
# with open(fname, 'w') as f:
# f.write('''<head>
# <style>
# .df tbody tr:nth-child(even) { background-color: lightblue; }
# .zoom {transition: transform .2s;}
# .zoom:hover{transform: scale(10);}
# </style>
# </head>
# <body>
# ''')
# s = d.style
# s.set_table_styles([cell_hover, index_names, headers])
# s.to_html(f, classes='df')
# f.write('</body>')

#with open(f'{clargs.output_dir}/gathered_table.tex', 'w') as f:
# f.write('\\documentclass[12pt]{article}\n\\usepackage{booktabs}\n\\usepackage{underscore}\n\\usepackage{multirow}\n\\begin{document}\n')
# df.to_latex(f)
# f.write('\\end{decument}')
#render_html(f'{clargs.output_dir}/gathered_table.html', df)

#dft = df.transpose()
#with open(f'{clargs.output_dir}/gathered_table_transpose.tex', 'w') as f:
# f.write('\\documentclass[12pt]{article}\n\\usepackage{booktabs}\n\\usepackage{underscore}\n\\usepackage{multirow}\n\\begin{document}\n')
# dft.to_latex(f)
# f.write('\\end{decument}')
#render_html(f'{clargs.output_dir}/gathered_table_transpose.html', dft)


if __name__ == "__main__":

parser = argparse.ArgumentParser(description='Run the shape embed pipeline')

parser.add_argument( 'run_folder', nargs="+", type=str
parser.add_argument( 'run_folders', metavar='run_folder', nargs="+", type=str
, help=f"The runs folders to gather results from")
parser.add_argument( '-o', '--output-dir', metavar='OUTPUT_DIR'
, default=f'{os.getcwd()}/gathered_results'
, default=f'{os.getcwd()}/gathered_results_{datetime.datetime.now().strftime("%Y%m%d_%H%M%S")}'
, help=f"The OUTPUT_DIR path to use to gather results")
parser.add_argument('-v', '--verbose', action='count', default=0
, help="Increase verbosity level by adding more \"v\".")
Expand All @@ -147,10 +158,11 @@ def render_html(fname, d):
clargs=parser.parse_args()

# set verbosity level
logging.basicConfig()
logger = logging.getLogger(__name__)
if clargs.verbose > 2:
logger.setLevel(logging.DEBUG)
if clargs.verbose > 1:
logger.setLevel('DEBUG')
elif clargs.verbose > 0:
logger.setLevel(logging.INFO)
logger.setLevel('INFO')

main_process(clargs, logger)

0 comments on commit 6e9ffcf

Please sign in to comment.