Merge branch 'feat/adaptive-threshold-mechanism' into 'wip'

chore: experiment files for adaptive threshold mechanism See merge request !11

Merge branch 'feat/adaptive-threshold-mechanism' into 'wip'
1e7d7d5e · Tim Tobias Bauerle · 5fede3b6 · 7dcf3631 · 1e7d7d5e · 1e7d7d5e
Commit 1e7d7d5e authored 11 months ago by Tim Tobias Bauerle
--- a/config/controller/parallel_swarm2.yaml
+++ b/config/controller/parallel_swarm2.yaml
+name: psl
+_target_: edml.controllers.parallel_split_controller.ParallelSplitController
+_partial_: true
+scheduler:
+  _target_: edml.controllers.scheduler.sequential.SequentialNextServerScheduler
--- a/config/controller/parallel_swarm_ash_1.65.yaml
+++ b/config/controller/parallel_swarm_ash_1.65.yaml
+_target_: edml.controllers.parallel_split_controller.ParallelSplitController
+_partial_: true
+scheduler:
+  _target_: edml.controllers.scheduler.sequential.SequentialNextServerScheduler
+adaptive_threshold_fn:
+  _target_: edml.controllers.adaptive_threshold.static.StaticAdaptiveThresholdFn
+  threshold: 1.65
--- a/config/default.yaml
+++ b/config/default.yaml
@@ -4,12 +4,12 @@ defaults:
  - dataset: mnist
  - battery: flops_and_communication
  - loss_fn: !!null
+  - experiment: default_experiment
  - model_provider: mnist
  - optimizer: !!null
  - scheduler: !!null
  - seed: default
  - topology: equal_batteries
-  - experiment: default_experiment
  - grpc: default
  - wandb: default
  - _self_
@@ -17,6 +17,12 @@ defaults:
 own_device_id: "d0"
 num_devices: ${len:${topology.devices}}

+# define config attributes for the group:
+group_by:
+  - controller: [ name, scheduler: name, adaptive_threshold_fn: name ]
+# group attribute determined by resolver with the given attributes
+group: ${group_name:${group_by}}
+
 # This registers the framework-provided configuration files with hydra.
 hydra:
  searchpath:

--- a/config/experiment/baseline.yaml
+++ b/config/experiment/baseline.yaml
@@ -19,8 +19,8 @@ early_stopping_metric: accuracy

 # Dataset partitioning.
 partition: True
-fractions: [ 0.1, 0.1, 0.1, 0.1, 0.1 ] # set to !!null if dataset should not be partitioned or partitioned equally
-latency: [ 0.0, 1.0, 0.0, 0.0, 0.0 ] # set to !!null for no latency
+fractions: !!null # set to !!null if dataset should not be partitioned or partitioned equally
+latency: !!null # set to !!null for no latency

 # Debug.
 load_single_batch_for_debugging: False
--- a/config/experiment/cifar100-effectiveness-adaptive-threshold-mechanism-none.yaml
+++ b/config/experiment/cifar100-effectiveness-adaptive-threshold-mechanism-none.yaml
+# Base properties for the experiment.
+project: inda-ml-comparisons
+name: cifar100-effectiveness-adaptive-threshold-mechanism-none
+job: train
+
+# Training parameters.
+batch_size: 64
+max_epochs: 1
+max_rounds: 200
+metrics: [ accuracy ]
+
+# Checkpoint saving and early stopping.
+save_weights: True
+server_model_save_path: "edml/models/weights/"
+client_model_save_path: "edml/models/weights/"
+early_stopping: True
+early_stopping_patience: 200
+early_stopping_metric: accuracy
+
+# Dataset partitioning.
+partition: True
+fractions: !!null
+latency: !!null
+
+# Debug.
+load_single_batch_for_debugging: False
--- a/config/experiment/cifar100-effectiveness-adaptive-threshold-mechanism.yaml
+++ b/config/experiment/cifar100-effectiveness-adaptive-threshold-mechanism.yaml
+# Base properties for the experiment.
+project: inda-ml-comparisons
+name: cifar100-effectiveness-adaptive-threshold-mechanism
+job: train
+
+# Training parameters.
+batch_size: 64
+max_epochs: 1
+max_rounds: 200
+metrics: [ accuracy ]
+
+# Checkpoint saving and early stopping.
+save_weights: True
+server_model_save_path: "edml/models/weights/"
+client_model_save_path: "edml/models/weights/"
+early_stopping: True
+early_stopping_patience: 200
+early_stopping_metric: accuracy
+
+# Dataset partitioning.
+partition: True
+fractions: !!null
+latency: !!null
+
+# Debug.
+load_single_batch_for_debugging: False
--- a/config/optimizer/sdg_with_momentum.yaml
+++ b/config/optimizer/sdg_with_momentum.yaml
+_target_: torch.optim.SGD
+lr: 0.1
+momentum: 0.9
+weight_decay: 0.0001
--- a/config/sweep/cifar100/cifar100-effectiveness-adaptive-threshold-mechanism.yaml
+++ b/config/sweep/cifar100/cifar100-effectiveness-adaptive-threshold-mechanism.yaml
+# @package _global_
+defaults:
+  - override /battery: unlimited
+  - override /dataset: cifar100
+  - override /experiment: cifar100-effectiveness-adaptive-threshold-mechanism
+  - override /loss_fn: cross_entropy
+  - override /model_provider: resnet20
+  - override /optimizer: sdg_with_momentum
+  - override /scheduler: multistep
+  - override /topology: equal_batteries
+  - _self_
+
+hydra:
+  mode: MULTIRUN
+  sweeper:
+    params:
+      +controller: parallel_swarm #parallel_swarm_ash_1.65
+      # +controller/scheduler: max_battery
+      # controller.adaptive_learning_threshold: 1.65
--- a/config/sweep/mnist/all.yaml
+++ b/config/sweep/mnist/all.yaml
@@ -11,5 +11,5 @@ hydra:
  mode: MULTIRUN
  sweeper:
    params:
-      +controller: fed,swarm,parallel_swarm
-      +controller/scheduler: max_battery,sequential,rand
+      +controller: swarm,parallel_swarm
+      controller/scheduler: max_battery,sequential,rand
--- a/edml/config/controller/adaptive_threshold_fn/dynamic.yaml
+++ b/edml/config/controller/adaptive_threshold_fn/dynamic.yaml
+_target_: edml.controllers.adaptive_threshold_mechanism.dynamic.LogarithmicDecayAdaptiveThresholdFn
+name: log_decay_at
+starting_value: 4
+approach_value: 1
+decay_rate: 0.05
--- a/edml/config/controller/adaptive_threshold_fn/static.yaml
+++ b/edml/config/controller/adaptive_threshold_fn/static.yaml
+name: static_at
+_target_: edml.controllers.adaptive_threshold_mechanism.static.StaticAdaptiveThresholdFn
+threshold: 1.65
--- a/edml/config/controller/fed.yaml
+++ b/edml/config/controller/fed.yaml
+name: fed
 _target_: edml.controllers.fed_controller.FedController
 _partial_: true
--- a/edml/config/controller/parallel_swarm.yaml
+++ b/edml/config/controller/parallel_swarm.yaml
+name: psl
 _target_: edml.controllers.parallel_split_controller.ParallelSplitController
 _partial_: true
-scheduler:
-  _target_: edml.controllers.scheduler.sequential.SequentialNextServerScheduler
+defaults:
+  - scheduler: sequential
+  - adaptive_threshold_fn: !!null
--- a/edml/config/controller/scheduler/max_battery.yaml
+++ b/edml/config/controller/scheduler/max_battery.yaml
+name: max_battery
 _target_: edml.controllers.scheduler.max_battery.MaxBatteryNextServerScheduler
--- a/edml/config/controller/scheduler/rand.yaml
+++ b/edml/config/controller/scheduler/rand.yaml
+name: rand
 _target_: edml.controllers.scheduler.random.RandomNextServerScheduler
--- a/edml/config/controller/scheduler/sequential.yaml
+++ b/edml/config/controller/scheduler/sequential.yaml
+name: sequential
 _target_: edml.controllers.scheduler.sequential.SequentialNextServerScheduler
--- a/edml/config/controller/swarm.yaml
+++ b/edml/config/controller/swarm.yaml
+name: swarm
 _target_: edml.controllers.swarm_controller.SwarmController
 _partial_: true
-scheduler:
-  _target_: edml.controllers.scheduler.sequential.SequentialNextServerScheduler
+defaults:
+  - scheduler: sequential
--- a/edml/config/model_provider/resnet20.yaml
+++ b/edml/config/model_provider/resnet20.yaml
+_target_: edml.models.provider.cut_layer.CutLayerModelProvider
+model:
+  _target_: edml.models.resnet_models.ResNet
+  block:
+    _target_: hydra.utils.get_class
+    path: edml.models.resnet_models.BasicBlock
+  num_blocks: [ 3, 3, 3 ]
+  num_classes: 100
+cut_layer: 4
--- a/edml/controllers/adaptive_threshold_mechanism/__init__.py
+++ b/edml/controllers/adaptive_threshold_mechanism/__init__.py
+from abc import ABC, abstractmethod
+
+
+class AdaptiveThresholdFn(ABC):
+    """A function that returns the adaptive threshold value based on the current round."""
+
+    @abstractmethod
+    def invoke(self, round_no: int) -> float:
+        """Return the adaptive threshold value for the given round number."""
--- a/edml/controllers/adaptive_threshold_mechanism/dynamic.py
+++ b/edml/controllers/adaptive_threshold_mechanism/dynamic.py
+import numpy as np
+
+from edml.controllers.adaptive_threshold_mechanism import AdaptiveThresholdFn
+
+
+class LogarithmicDecayAdaptiveThresholdFn(AdaptiveThresholdFn):
+
+    def __init__(
+        self, starting_value: float, approach_value: float, decay_rate: float = 1.0
+    ):
+        super().__init__()
+        self._start = starting_value
+        self._end = approach_value
+        self._decay_rate = decay_rate
+
+    def invoke(self, round_no: int):
+        return self._end + (self._start - self._end) * np.exp(
+            -self._decay_rate * round_no
+        )