-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathparams.yaml
111 lines (102 loc) · 3.82 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
## This file holds all parameters to be passed into PyCoGAPS.
## To modify default parameters, simply replace parameter values below with user-specified values, and save file.
# RELATIVE path to data -- make sure to move your data into the created data/ folder
path: data/ModSimData.txt
# result output file name (output saved as a .h5ad file)
result_file: ModSimResult.h5ad
standard_params:
# number of patterns CoGAPS will learn
nPatterns: 3
# number of iterations for each phase of the algorithm
nIterations: 1000
# random number generator seed
seed: 0
# speeds up performance with sparse data (roughly >80% of data is zero), note this can only be used with the default uncertainty
useSparseOptimization: False
run_params:
# maximum number of threads to run on
nThreads: 1
# T/F for displaying output
messages: True
# number of iterations between each output (set to 0 to disable status updates)
outputFrequency: 500
# uncertainty matrix - either a matrix or a supported file type
uncertainty: null
# name of the checkpoint file to create
checkpointOutFile: gaps_checkpoint.out
# number of iterations between each checkpoint (set to 0 to disable checkpoints)
checkpointInterval: 250
# if this is provided, CoGAPS runs from the checkpoint contained in this file
checkpointInFile: null
# T/F for transposing data while reading it in - useful for data that is stored as samples x genes since CoGAPS requires data to be genes x samples
transposeData: False
# if calling CoGAPS in parallel the worker ID can be specified
workerID: 1
# enable asynchronous updating which allows for multi-threaded runs
asynchronousUpdates: True
# how many snapshots to take in each phase, setting this to 0 disables snapshots
nSnapshots: 0
# which phase to take snapsjots in e.g. "equilibration", "sampling", "all"
snapshotPhase: sampling
sparsity_params:
# sparsity parameter for feature matrix
alphaA: 0.01
# sparsity parameter for sample matrix
alphaP: 0.01
# atomic mass restriction for feature matrix
maxGibbsMassA: 100
# atomic mass restriction for sample matrix
maxGibbsMassP: 100
distributed_params:
# either null or genome-wide
distributed: null
# number of sets to break data into
nSets: 4
# number of branches at which to cut dendrogram used in pattern matching
# default: nPatterns
cut: null
# minimum of individual set contributions a cluster must contain
# default: math.ceil(cut / 2)
minNS: null
# maximum of individual set contributions a cluster can contain
# default: minNS + nSets
maxNS: null
# specify subsets by index or name
explicitSets: null
# specify categories along the rows (cols) to use for weighted sampling
samplingAnnotation: null
# weights associated with samplingAnnotation
samplingWeight: null
additional_params:
# set of indices to use from the data
subsetIndices: null
# which dimension (0=rows, 1=cols) to subset
subsetDim: 0
# vector of names of genes in data
geneNames: null
# vector of names of samples in data
sampleNames: null
# fix either 'A' or 'P' matrix to these values, in the context of distributed CoGAPS, the first phase is skipped and `fixedPatterns:
# is used for all sets allowing manual pattern matching, as well as fixed runs of standard CoGAPS
fixedPatterns: null
# either 'A' or 'P', indicating which matrix is fixed
whichMatrixFixed: null
# whether or not to take PUMP samples
takePumpSamples: False
# for reading .h5 files
hdfKey: null
# for reading .h5 files
hdfRowKey: null
# for reading .h5 files
hdfColKey: null
aws_params:
# whether or not to use AWS bucket server
useAWS: False
# name of bucket to download from
downloadBucket: null
# name of key to download from
downloadKey: null
# name of bucket to upload to
uploadBucket: null
# name of key to upload to
uploadKey: null