-
Notifications
You must be signed in to change notification settings - Fork 129
Expand file tree
/
Copy pathdrnas_defaults.yaml
More file actions
125 lines (108 loc) · 3.27 KB
/
drnas_defaults.yaml
File metadata and controls
125 lines (108 loc) · 3.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# options cifar10, cifar100, ImageNet16-120 reports on their test acc is avaliable
dataset: ImageNet16-120
# in the code base the deafult value for the seed is 2.
# using random seeds that are logged but log files are not provided
# not mentioned in the paper what are the random seeds are
seed: 99
# darts (or nb301)
# nb201
search_space: nasbench301
out_dir: run
optimizer: drnas
search:
checkpoint_freq: 5
# default value batch size in code is 64
batch_size: 64
# lr_rate for progressive and original: 0.025
learning_rate: 0.025
# lr_rate for progressive and original: 0.025
learning_rate_min: 0.001
momentum: 0.9
# weight_decay for progressive and original: 0.0003
weight_decay: 0.0003
# for cifar10 the learning process is 2 stages of 25 epochs each
# in code it states that the number of training epochs has the default value of 100 in nb201
epochs: 100
warm_start_epochs: 0
grad_clip: 5
# for cifar10 the train and optimization data (50k) is equally partitioned
train_portion: 0.5
# for cifar10 the train and optimization data (50k) is equally partitioned
data_size: 25000
# for the four args the values are same for oridinary and progressive mode for nb201
cutout: False
cutout_length: 16
cutout_prob: 1.0
drop_path_prob: 0.0
# for nb201 this value is false
unrolled: False
arch_learning_rate: 0.0003
# not mentiond for progressive mode but for ordinary it is 1e-3 in nb201
arch_weight_decay: 0.001
output_weights: True
fidelity: 200
# GDAS
tau_max: 10
tau_min: 0.1
# RE
sample_size: 10
population_size: 100
#LS
num_init: 10
#GSparsity-> Uncomment the lines below for GSparsity
#seed: 50
#grad_clip: 0
#threshold: 0.000001
#weight_decay: 120
#learning_rate: 0.01
#momentum: 0.8
#normalization: div
#normalization_exponent: 0.5
#batch_size: 256
#learning_rate_min: 0.0001
#epochs: 100
#warm_start_epochs: 0
#train_portion: 0.9
#data_size: 25000
# BANANAS
k: 10
num_ensemble: 3
acq_fn_type: its
acq_fn_optimization: mutation
encoding_type: path
num_arches_to_mutate: 2
max_mutations: 1
num_candidates: 100
# BasePredictor
predictor_type: var_sparse_gp
debug_predictor: False
evaluation:
checkpoint_freq: 30
# Neither the paper nor the code base indicates the batch size but the default value is 64
batch_size: 64
learning_rate: 0.025
learning_rate_min: 0.00
# momentum is 0.9
momentum: 0.9
# for cifar weight_decay is 3e-4
weight_decay: 0.0003
# cifar's eval is 600 epochs, for imagenet it is 250
epochs: 250
# for image net it has 5 epochs of warm starting
warm_start_epochs: 5
grad_clip: 5
# uses the whole training data of cifar10 (50K) to train from scratch for 600 epochs
train_portion: 1.
data_size: 50000
# cifar10 the cutout is done to have fair comparisons with previous work
cutout: True
# cifar10 cutout length is 16
cutout_length: 16
# cifar10 the cutout is done to have fair comparisons with previous work
cutout_prob: 1.0
# cifar drop out is 0.3
drop_path_prob: 0.2
# cifar auxiliary is 0.4
auxiliary_weight: 0.4
# has a partial channel variable that for oridinary is 1 and in progressive mode has 4 as the default value.
# mentions some things about regularization scale of l2 and kl (used for dirichlet) in code of nb201