Source code for olympus.optimizers.amsgrad

import torch.optim

from olympus.optimizers.base import OptimizerAdapter


[docs]class AMSGrad(OptimizerAdapter):
    """Variant of Adam

    See also :class`.Adam`

    Attributes
    ----------
    model_parameters: List[Tensor]

    weight_decay: float
        Add L2 penalty to the cost (encourage smaller weights)

    learning_rate: float = 0.001

    beta1: float ∈ [0, 1) default =  0.9
        Exponential decay rates for the fist moment estimate

    beta2: float ∈ [0, 1) default = 0.999
        Exponential decay rates for the second moment estimate

    eps: float = 1e-8
        Term added to the denominator to improve numerical stability

    References
    ----------
    .. [1] Tran Thi Phuong, Le Trieu Phong. "On the Convergence Proof of AMSGrad and a New Version", 7 Apr 2019
    """
    def __init__(self, model_parameters, weight_decay, lr, beta1, beta2, eps=1e-8):
        super(AMSGrad, self).__init__(
            torch.optim.Adam,
            model_parameters,
            lr=lr,
            betas=[beta1, beta2],
            weight_decay=weight_decay,
            eps=eps,
            amsgrad=True
        )

[docs]    @staticmethod
    def get_space():
        return {
            'lr': 'loguniform(1e-5, 1)',
            'beta1': 'loguniform(0.9, 1)',
            'beta2': 'loguniform(0.99, 1)',
            'weight_decay': 'loguniform(1e-10, 1e-3)'
        }

[docs]    @staticmethod
    def defaults():
        return {
            'weight_decay': 0.001,
            'lr': 0.001,
            'beta1': 0.9,
            'beta2': 0.999
        }


builders = {'amsgrad': AMSGrad}