-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_config.py
executable file
·72 lines (68 loc) · 1.78 KB
/
model_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
from dataclasses import dataclass, field
@dataclass
class DeMansia_2_tiny_config:
# batch size 11024
learning_rate: float = 1e-4
weight_decay: float = 0.5
warmup_epochs:int = 10
img_size: tuple[int] = (224, 224)
patch_size: tuple[int] = (16, 16)
token_label_size: int = 14
channels: int = 3
depth: int = 24
d_model: int = 192
d_intermediate: int = 0
num_classes: int = 1000
ssm_cfg: dict = field(
default_factory=lambda: {
"headdim": 48,
}
)
attn_layer_idx: tuple[int] = (10, 20)
attn_cfg: dict = field(
default_factory=lambda: {
"causal": False,
"d_conv": 4,
"head_dim": 96,
"num_heads": 3,
"out_proj_bias": True,
"qkv_proj_bias": True,
"rotary_emb_dim": 48,
}
)
device: str = "cuda"
dtype: torch.dtype = torch.float32
@dataclass
class DeMansia_2_small_config:
# batch size 1024
learning_rate: float = 1e-4
weight_decay: float = 0.5
warmup_epochs: int = 10
img_size: tuple[int] = (224, 224)
patch_size: tuple[int] = (16, 16)
token_label_size: int = 14
channels: int = 3
depth: int = 24
d_model: int = 384
d_intermediate: int = 0
num_classes: int = 1000
ssm_cfg: dict = field(
default_factory=lambda: {
"headdim": 96,
}
)
attn_layer_idx: tuple[int] = (10, 20)
attn_cfg: dict = field(
default_factory=lambda: {
"causal": False,
"d_conv": 4,
"head_dim": 96,
"num_heads": 6,
"out_proj_bias": True,
"qkv_proj_bias": True,
"rotary_emb_dim": 48,
}
)
device: str = "cuda"
dtype: torch.dtype = torch.float32