-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
46 lines (38 loc) · 1.43 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# path to output directory to store result files
out_dir:
# column name in the input file indicating the column name containing the protein names/IDs
index_col: "protein_ID"
## Preprocessing params
# maximum percentage of missing values per protein
max_allowed_NAs_per_protein: 0.3
# transformation funtion to apply to the data before model fitting. options are log10, log2, and log
log_func_name: "log"
# names of columns in the sample annotation file to be passed as covariates to the autoencoder.
# leaving this empty results in no covariates passed to the model
cov_used:
#- "gender"
#- "PROTEOMICS_BATCH"
#- "BATCH_RUN"
#- "INSTRUMENT"
## Grid search outlier injection params
inj_freq: 1e-3
inj_mean: 3
inj_sd: 1.6
seed:
## Model params
autoencoder_training: True ## when set to false, it returns values after (PCA) initialization without training
n_layers: 1 # number of layers for the autoencoder model
n_epochs: 100 # number of training epochs
lr: 1e-4 # learning rate
batch_size:
find_q_method: "OHT" # option to find latent space dimension. Options: OHT, gs
init_pca: True # PCA initialization
h_dim: # hidden dimension, only for n_layers>1
## Stat params
pval_dist: 't' # Distribution choice for P-value computation. Options: gaussian, t
pval_adj: 'by' # Multiple testing correction. Options: by, bh
pval_sided: 'two-sided'
pseudocount: 0.01 # for FC computation
## Reporting params
outlier_threshold: 0.1
report_all: True