forked from valeoai/rangevit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_kitti.yaml
104 lines (89 loc) · 2.1 KB
/
config_kitti.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# General config
num_workers: 4
id: 'exp_kitti'
# Data config
dataset: 'SemanticKitti'
n_classes: 20 # 19 + 1(ignored)
use_trainval: false # set to true if you want to train on train-val of SemanticKITTI.
# Train config
has_label: true
val_frequency: 1
n_epochs: 60
warmup_epochs: 10
batch_size: 4
batch_size_val: 1
lr: 0.0004
train_result_frequency: 100 # printing frequency of the train results
# Model config
vit_backbone: 'vit_small_patch16_384'
in_channels: 5
patch_size: [2, 8] # patch size = patch stride if a convolutional stem (ConvStem) is used
patch_stride: [2, 8]
image_size: [64, 384] # random crop at train
window_size: [64, 384] # sliding window size
window_stride: [64, 256] # sliding window stride
original_image_size: [64, 2048]
# Stem
conv_stem: 'ConvStem' # 'none' or 'ConvStem'
stem_base_channels: 32
D_h: 128 # hidden dimension of the stem
# Note tha the test results that we provide in the paper for the SemanticKITTI were with D_h = 256.
# However, in the validation set D_h = 128 gave us better mIoU results.
# Decoder
decoder: 'up_conv' # 'linear' or 'up_conv'
skip_filters: 128 # has to be 0 (no skip) or D_h
# 3D refiner
use_kpconv: true
# Checkpoint model
checkpoint: null
pretrained_model: '/path_to_pretrained_model/model.pth'
# Loading pre-trained patch and positional embeddings
reuse_pos_emb: true
reuse_patch_emb: false # no patch embedding as a convolutional stem (ConvStem) is used
# Data augmentation config
augmentation:
# flip
p_flipx: 0.
p_flipy: 0.5
# translation
p_transx: 0.5
trans_xmin: -5
trans_xmax: 5
p_transy: 0.5
trans_ymin: -3
trans_ymax: 3
p_transz: 0.5
trans_zmin: -1
trans_zmax: 0.
# rotation
p_rot_roll: 0.5
rot_rollmin: -5
rot_rollmax: 5
p_rot_pitch: 0.5
rot_pitchmin: -5
rot_pitchmax: 5
p_rot_yaw: 0.5
rot_yawmin: 5
rot_yawmax: -5
sensor:
name: 'HDL64'
type: 'spherical'
scan_proj: true
proj_h: 64
proj_w: 2048
fov_up: 3.
fov_down: -25.
fov_left: -180
fov_right: 180
img_mean:
- 12.12
- 10.88
- 0.23
- -1.04
- 0.21
img_stds:
- 12.32
- 11.47
- 6.91
- 0.86
- 0.16