diff --git a/results/result_generation.ipynb b/results/result_generation.ipynb index fe4e16d604eb3d3ffcc2f1bc3f8c731019b6b797..99d060f41afcbb58ef62d2b387e84a383b517741 100644 --- a/results/result_generation.ipynb +++ b/results/result_generation.ipynb @@ -19,11 +19,11 @@ "source": [ "df_base_dir = \"./dataframes\"\n", "projects_with_model = [\n", - " (\"5_devices_unlimited_new\", \"resnet110\"),\n", - " (\"50_devices_unlimited_new\", \"resnet110\"),\n", - " (\"controller_comparison\", \"resnet110\"),\n", - " (\"controller_comparison_het_bat\", \"resnet110\"),\n", - " (\"controller_comparison_homogeneous\", \"resnet110\")\n", + " (\"journal_50_devices\", \"resnet110\"),\n", + " (\"journal_5_devices\", \"resnet110\"),\n", + " (\"journal_controller_comparison_all_heterogeneous\", \"resnet110\"),\n", + " (\"journal_controller_comparison_heterogeneous_battery\", \"resnet110\"),\n", + " (\"journal_controller_comparison_homogeneous\", \"resnet110\")\n", "]" ], "metadata": { @@ -43,6 +43,7 @@ " \"swarm_sequential__\": False,\n", " \"swarm_max_battery__\": False,\n", " \"swarm_smart__\": False,\n", + " \"swarm_sequential_static_at_resnet_decoderpth\": True,\n", " \"psl_sequential_static_at_resnet_decoderpth\": True,\n", " \"psl_sequential__resnet_decoderpth\": True,\n", " \"psl_sequential_static_at_\": False,\n", diff --git a/results/result_generation.py b/results/result_generation.py index f22bca3d8558982250e4fa8d3376ee41b70d7539..26ea0d79ef13e614af15daf7753597e559edb544 100644 --- a/results/result_generation.py +++ b/results/result_generation.py @@ -21,6 +21,7 @@ STRATEGY_MAPPING = { "swarm_smart__": "Swarm SL (Smart)", "swarm_rand__": "Swarm SL (Rand)", "swarm_max_battery__": "Swarm SL (Greedy)", + "swarm_sequential_static_at_resnet_decoderpth": "Swarm SL (Seq) AE Static", "split___": "Vanilla SL", "psl_sequential_static_at_resnet_decoderpth": "PSSL (Seq) AE Static", "psl_sequential__resnet_decoderpth": "PSSL (Seq) AE", @@ -478,25 +479,74 @@ def accuracy_over_epoch(history_groups, phase="train"): return results -def accuracy_over_time(history_groups, phase="train"): +def accuracy_over_time( + history_groups, phase="train", num_intervals=1000, rounds_fixed=True +): """ - Returns the accuracy over time for each group. No averaging implemented yet if there are multiple runs per group! + Returns the average accuracy over time for each group. Args: history_groups: The runs of one project, according to the structure of the wandb project phase: (str) the phase to get the accuracy for, either "train" or "val" + num_intervals: (int) the number of intervals to split for averaging + rounds_fixed: (bool) whether the number of rounds was fixed -> Returns: - results: (dict) the accuracy (list(float)) per round (list(int)) for each group + results: a dataframe mapping time to accuracy for each group """ - results = {} + avg_acc_per_strategy = {} for (strategy, job), group in history_groups.items(): if job == "train": - run_df = group["controller"][0] # no averaging - time_acc = run_df[[f"{phase}_accuracy.value", "_runtime"]].dropna() - results[(strategy, job)] = ( - time_acc["_runtime"], - time_acc[f"{phase}_accuracy.value"], - ) - return results + # determine upper bound for intervals + end_times = [] + for i, run_df in enumerate(group["controller"]): + end_times.append( + max( + run_df[[f"{phase}_accuracy.value", "_runtime"]].dropna()[ + "_runtime" + ] + ) + ) + if rounds_fixed: # if the number of rounds is fixed, average time + interval_end_time = sum(end_times) / len(end_times) + else: # otherwise use the longest runtime + interval_end_time = max(end_times) + avg_acc_per_interval = [] + for i, run_df in enumerate(group["controller"]): + if rounds_fixed: + run_end_time = max( + run_df[[f"{phase}_accuracy.value", "_runtime"]].dropna()[ + "_runtime" + ] + ) + run_df = scale_parallel_time( + run_df.copy(), scale_factor=interval_end_time / run_end_time + ) + time_acc = run_df[[f"{phase}_accuracy.value", "_runtime"]].dropna() + intervals = pd.interval_range( + start=0, + end=interval_end_time, + periods=num_intervals, + ) + time_acc = time_acc.set_index("_runtime") + time_acc["time"] = pd.cut( + time_acc.index, + bins=intervals, + labels=np.arange(num_intervals), + include_lowest=True, + ) + # average per interval and fill first empty values with 0 and empty values after with the last values + df_resampled = ( + time_acc.groupby("time")[f"{phase}_accuracy.value"] + .mean() + .ffill() + .fillna(0) + ) + avg_acc_per_interval.append(df_resampled) + avg_acc_per_strategy[(strategy, job)] = ( + pd.concat(avg_acc_per_interval, axis=1) + .mean(axis=1) + .rename(lambda x: x.right) + ) # replace intervals by their upper bound + return avg_acc_per_strategy def plot_accuracies(accuracies_per_round, save_path=None, phase="train"): @@ -532,8 +582,8 @@ def plot_accuracies_over_time(accuracies_per_time, save_path=None, phase="train" save_path: (str) the path to save the plot to """ plt.figure(dpi=DPI) - for (strategy, job), (time, accs) in accuracies_per_time.items(): - plt.plot(time, accs, label=f"{STRATEGY_MAPPING[strategy]}") + for (strategy, job), df in accuracies_per_time.items(): + plt.plot(df, label=f"{STRATEGY_MAPPING[strategy]}") plt.xlabel(LABEL_MAPPING["runtime"]) plt.ylabel(LABEL_MAPPING[f"{phase} accuracy"]) plt.legend()