2025-10-08 12:13:02 +02:00
|
|
|
import numpy as np
|
|
|
|
|
# custom LR from attention is all you need
|
|
|
|
|
class Custom_lr():
|
|
|
|
|
def __init__(self, d_model: int, warmup_step:int) -> None:
|
|
|
|
|
|
|
|
|
|
self.__d_model = d_model
|
|
|
|
|
self.__warmup_step = warmup_step
|
2025-10-08 16:05:22 +02:00
|
|
|
self.__epoch = 0
|
2025-10-08 12:13:02 +02:00
|
|
|
|
|
|
|
|
|
2025-10-08 16:05:22 +02:00
|
|
|
def step(self) -> int:
|
|
|
|
|
self.__epoch += 1
|
|
|
|
|
return (self.__d_model ** -0.5) * min(self.__epoch ** -0.5,
|
|
|
|
|
self.__epoch * (self.__warmup_step ** -1.5))
|
2025-10-08 12:13:02 +02:00
|
|
|
|
|
|
|
|
# OTHER LR
|
|
|
|
|
|
|
|
|
|
# Learning rate schedules (matching visualization parameters)
|
|
|
|
|
def step_lr(epoch, lr):
|
|
|
|
|
# StepLR: step_size=20, gamma=0.5 (from visualization)
|
|
|
|
|
return lr * 0.5 if epoch % 20 == 0 and epoch > 0 else lr
|
|
|
|
|
|
|
|
|
|
def exp_lr(epoch, lr):
|
|
|
|
|
# ExponentialLR: gamma=0.95 (from visualization)
|
|
|
|
|
return lr * 0.95
|
|
|
|
|
|
|
|
|
|
def cosine_lr(epoch, lr):
|
|
|
|
|
# CosineAnnealingLR: lr_min=0.001, lr_max=0.1, max_epochs=100 (from visualization)
|
|
|
|
|
lr_min, lr_max = 0.001, 0.1
|
|
|
|
|
max_epochs = 100
|
|
|
|
|
return lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos(epoch * np.pi / max_epochs))
|
|
|
|
|
|
|
|
|
|
def cyclical_lr(epoch, lr):
|
|
|
|
|
# CyclicalLR: base_lr=0.001, max_lr=0.1, step_size=20 (from visualization)
|
|
|
|
|
base_lr = 0.001
|
|
|
|
|
max_lr = 0.1
|
|
|
|
|
step_size = 20
|
|
|
|
|
|
|
|
|
|
cycle = np.floor(1 + epoch / (2 * step_size))
|
|
|
|
|
x = np.abs(epoch / step_size - 2 * cycle + 1)
|
|
|
|
|
return base_lr + (max_lr - base_lr) * max(0, (1 - x))
|