-
Notifications
You must be signed in to change notification settings - Fork 512
/
adamw.py
80 lines (72 loc) · 2.43 KB
/
adamw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#
# For licensing see accompanying LICENSE file.
# Copyright (C) 2024 Apple Inc. All Rights Reserved.
#
import argparse
from typing import Dict, Iterable, Union
from torch import Tensor
from torch.optim import AdamW
from corenet.optims import OPTIM_REGISTRY
from corenet.optims.base_optim import BaseOptim
@OPTIM_REGISTRY.register(name="adamw")
class AdamWOptimizer(BaseOptim, AdamW):
"""
`AdamW <https://arxiv.org/abs/1711.05101>`_ optimizer
Args:
opts: Command-line arguments
model_params: Model parameters
"""
def __init__(
self,
opts: argparse.Namespace,
model_params: Iterable[Union[Tensor, Dict]],
*args,
**kwargs
) -> None:
BaseOptim.__init__(self, opts=opts)
beta1 = getattr(opts, "optim.adamw.beta1")
beta2 = getattr(opts, "optim.adamw.beta2")
ams_grad = getattr(opts, "optim.adamw.amsgrad")
eps = getattr(opts, "optim.adamw.eps", None)
AdamW.__init__(
self,
params=model_params,
lr=self.lr,
betas=(beta1, beta2),
eps=self.eps if eps is None else eps,
weight_decay=self.weight_decay,
amsgrad=ams_grad,
)
@classmethod
def add_arguments(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""Add arguments for AdamW optimizer"""
if cls != AdamWOptimizer:
# Don't re-register arguments in subclasses that don't override `add_arguments()`.
return parser
group = parser.add_argument_group(cls.__name__)
group.add_argument(
"--optim.adamw.beta1",
type=float,
default=0.9,
help="Value of Beta1 in AdamW optimizer. Defaults to 0.9.",
)
group.add_argument(
"--optim.adamw.beta2",
type=float,
default=0.98,
help="Value of Beta2 in AdamW optimizer. Defaults to 0.98.",
)
group.add_argument(
"--optim.adamw.amsgrad",
action="store_true",
default=False,
help="Use AMSGrad in AdamW. Defaults to False.",
)
group.add_argument(
"--optim.adamw.eps",
type=float,
default=None,
help="Value of epsilon in AdamW optimizer. Defaults to None."
"When this value is None, the default value in base optimizer is used.",
)
return parser