cosmetic and functional fixes

priyakasimbeg · priyakasimbeg · commit a3e513e5bc7b · 2024-07-02T17:53:47.000Z
diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py
@@ -31,9 +31,14 @@
 import re
 
 from absl import logging
+import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from tabulate import tabulate
+import re
+
+import logging
 
 from algorithmic_efficiency.workloads.workloads import get_base_workload_name
 import algorithmic_efficiency.workloads.workloads as workloads_registry
@@ -63,6 +68,30 @@
 
 MAX_EVAL_METRICS = ['mean_average_precision', 'ssim', 'accuracy', 'bleu']
 
+#MPL params
+mpl.rcParams['figure.figsize'] = (16, 10)  # Width, height in inches
+mpl.rcParams['font.family'] = 'serif'      
+mpl.rcParams['font.serif'] = ['Times New Roman'] + mpl.rcParams['font.serif']  # Add Times New Roman as first choice
+mpl.rcParams['font.size'] = 22
+mpl.rcParams['savefig.dpi'] = 300  # Set resolution for saved figures
+
+# Plot Elements
+mpl.rcParams['lines.linewidth'] = 3     # Adjust line thickness if needed
+mpl.rcParams['lines.markersize'] = 6      # Adjust marker size if needed
+mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"])  # Example color cycle (consider ColorBrewer or viridis)
+mpl.rcParams['axes.labelsize'] = 22       # Axis label font size
+mpl.rcParams['xtick.labelsize'] = 20       # Tick label font size
+mpl.rcParams['ytick.labelsize'] = 20
+
+# Legends and Gridlines
+mpl.rcParams['legend.fontsize'] = 20       # Legend font size
+mpl.rcParams['legend.loc'] = 'best'       # Let matplotlib decide the best legend location
+mpl.rcParams['axes.grid'] = True          # Enable grid
+mpl.rcParams['grid.alpha'] = 0.4          # Gridline transparency
+
+def print_dataframe(df):
+      tabulated_df = tabulate(df.T, headers='keys', tablefmt='psql')
+      logging.info(tabulated_df)
 
 def generate_eval_cols(metrics):
   splits = ['train', 'validation']
@@ -177,10 +206,10 @@ def get_workloads_time_to_target(submission,
       num_trials = len(group)
       if num_trials != NUM_TRIALS and not self_tuning_ruleset:
         if strict:
-          raise ValueError(f'Expecting {NUM_TRIALS} trials for workload '
+          raise ValueError(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
                            f'{workload} but found {num_trials} trials.')
         else:
-          logging.warning(f'Expecting {NUM_TRIALS} trials for workload '
+          logging.warning(f'In Study {study}: Expecting {NUM_TRIALS} trials for workload '
                           f'{workload} but found {num_trials} trials.')
 
       # Get trial and time index that reaches target
@@ -194,13 +223,14 @@ def get_workloads_time_to_target(submission,
 
     workloads.append({
         'submission': submission_name,
-        'workload': workload,
+        'workload': re.sub(r'_(jax|pytorch)$', '', workload),
         time_col: np.median(time_vals_per_study),
     })
 
   df = pd.DataFrame.from_records(workloads)
   df = df.pivot(index='submission', columns='workload', values=time_col)
-
+  logging.info("HELLOOOOOOOOO")
+  print_dataframe(df)
   return df
 
 
@@ -269,26 +299,30 @@ def compute_performance_profiles(submissions,
                                      strict))
   df = pd.concat(dfs)
 
+  logging.info("TIME TO TARGET")
+  print_dataframe(df)
+
   # Set score to inf if not within 4x of fastest submission
   best_scores = df.min(axis=0)
   df[df.apply(lambda x: x > 4 * best_scores, axis=1)] = np.inf
 
+  logging.info("4X of budget")
+  print_dataframe(df)
+
   # For each held-out workload if variant target was not hit set submission to inf
   framework = None
   for workload in df.keys():
-    # Check if this is a variant
-    framework = workload.split('_')[-1]
-    workload_ = workload.split(f'_{framework}')[0]
-    if workload_ not in BASE_WORKLOADS:
+    if workload not in BASE_WORKLOADS:
       # If variants do not have finite score set base_workload score to inf
-      base_workload = get_base_workload_name(workload_)
+      base_workload = get_base_workload_name(workload)
       df[base_workload] = df.apply(
-          variant_criteria_filter(base_workload + f'_{framework}', workload),
+          variant_criteria_filter(base_workload, workload),
           axis=1)
+    
+  logging.info("HELDOUT_WORKLOAD FILTER")
+  print_dataframe(df)
 
-  base_workloads = [w + f'_{framework}' for w in BASE_WORKLOADS]
-  df = df[base_workloads]
-  print(df)
+  df = df[BASE_WORKLOADS]
 
   if verbosity > 0:
     logging.info('\n`{time_col}` to reach target:')
@@ -316,11 +350,17 @@ def compute_performance_profiles(submissions,
                            1000):
       logging.info(df)
 
+  logging.info('DIVIDE BY FASTEST')
+  print_dataframe(df)
+
   # If no max_tau is supplied, choose the value of tau that would plot all non
   # inf or nan data.
   if max_tau is None:
     max_tau = df.replace(float('inf'), -1).replace(np.nan, -1).values.max()
 
+  logging.info('AFTER MAYBE SETTING MAX TAU')
+  print_dataframe(df)
+
   if scale == 'linear':
     points = np.linspace(min_tau, max_tau, num=num_points)
   elif scale == 'log':
@@ -375,8 +415,8 @@ def plot_performance_profiles(perf_df,
                               df_col,
                               scale='linear',
                               save_dir=None,
-                              figsize=(30, 10),
-                              font_size=18):
+                              figsize=(30, 10)
+                              ):
   """Plot performance profiles.
 
   Args:
@@ -396,12 +436,13 @@ def plot_performance_profiles(perf_df,
   Returns:
     None. If a valid save_dir is provided, save both the plot and perf_df.
   """
-  fig = perf_df.T.plot(figsize=figsize)
+  fig = perf_df.T.plot(figsize=figsize, alpha=0.7)
   df_col_display = f'log10({df_col})' if scale == 'log' else df_col
   fig.set_xlabel(
-      f'Ratio of `{df_col_display}` to best submission', size=font_size)
-  fig.set_ylabel('Proportion of workloads', size=font_size)
-  fig.legend(prop={'size': font_size}, bbox_to_anchor=(1.0, 1.0))
+      f'Ratio of `{df_col_display}` to best submission')
+  fig.set_ylabel('Proportion of workloads')
+  fig.legend(bbox_to_anchor=(1.0, 1.0))
+  plt.tight_layout()
   maybe_save_figure(save_dir, f'performance_profile_by_{df_col_display}')
   maybe_save_df_to_csv(save_dir,
                        perf_df,
diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py
@@ -22,8 +22,10 @@
 import pandas as pd
 import scoring_utils
 from tabulate import tabulate
+import json
+import pickle
 
-from scoring import performance_profile
+import performance_profile
 
 flags.DEFINE_string(
     'submission_directory',
@@ -101,8 +103,13 @@ def get_summary_df(workload, workload_df, include_test_split=False):
   return summary_df
 
 
-def print_submission_summary(df, include_test_split=True):
+def get_submission_summary(df, include_test_split=True):
+  """Summarizes the submission results into metric and time tables
+  organized by workload.
+  """
+
   dfs = []
+  print(df)
   for workload, group in df.groupby('workload'):
     summary_df = get_summary_df(
         workload, group, include_test_split=include_test_split)
@@ -115,15 +122,26 @@ def print_submission_summary(df, include_test_split=True):
 
 def main(_):
   results = {}
-
-  for submission in os.listdir(FLAGS.submission_directory):
-    experiment_path = os.path.join(FLAGS.submission_directory, submission)
-    df = scoring_utils.get_experiment_df(experiment_path)
-    results[submission] = df
-    summary_df = print_submission_summary(df)
-    with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
-              'w') as fout:
-      summary_df.to_csv(fout)
+  os.makedirs(FLAGS.output_dir, exist_ok=True)
+
+#   for team in os.listdir(FLAGS.submission_directory):
+#     for submission in os.listdir(os.path.join(FLAGS.submission_directory, team)):
+#         print(submission)
+#         experiment_path = os.path.join(FLAGS.submission_directory, team, submission)
+#         df = scoring_utils.get_experiment_df(experiment_path)
+#         results[submission] = df
+#         summary_df = get_submission_summary(df)
+#         with open(os.path.join(FLAGS.output_dir, f'{submission}_summary.csv'),
+#                 'w') as fout:
+#             summary_df.to_csv(fout)
+  
+#   # Save results
+#   with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'wb') as f:
+#     pickle.dump(results, f)
+ 
+  # Read results
+  with open(os.path.join(FLAGS.output_dir, 'results.pkl'), 'rb') as f:
+    results = pickle.load(f)
 
   if not FLAGS.strict:
     logging.warning(