comparison mixomics_blocksplsda.R @ 2:c8533e9298e5 draft

"planemo upload for repository https://gitlab.com/bilille/galaxy-viscorvar commit 8cb5630238352459037b3647eebfabb5554566f6-dirty"
author ppericard
date Fri, 23 Oct 2020 10:15:56 +0000
parents
children 88c1fd2ac110
comparison
equal deleted inserted replaced
1:e93350dc99f1 2:c8533e9298e5
1 #!/usr/bin/env Rscript
2
3 # Setup R error handling to go to stderr
4 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
5
6 # we need that to not crash galaxy with an UTF8 error on German LC settings.
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8
9 ## Get parameters ##
10 suppressPackageStartupMessages(require(argparse))
11
12 parser <- ArgumentParser(description='Run the mixOmics block.splsda function')
13
14 parser$add_argument('--block', dest='blocks_list', nargs=4, action="append", required=TRUE,
15 help="Block name + nb variables to select + data matrix file + variables metadata file")
16 parser$add_argument('--sample_metadata_in', dest='sample_metadata_in', required=TRUE,
17 help="Samples metadata file")
18 parser$add_argument('--sample_description_col', dest='sample_description_col', type='integer',
19 default=0, help="Sample description column number")
20 parser$add_argument('--ncomp', dest='ncomp', type='integer', default=2,
21 help="Number of components to include in the model")
22 parser$add_argument('--correlation', dest='correlation', action="store_true",
23 help="Add correlation between all blocks")
24 parser$add_argument('--scheme', dest='scheme', default="horst", help="Scheme")
25 parser$add_argument('--mode', dest='mode', default="regression", help="Mode")
26 parser$add_argument('--maxiter', dest='maxiter', type='integer', default=100,
27 help="Maximum number of iterations")
28 parser$add_argument('--scale', dest='scale', action="store_true",
29 help="Each block is standardized to zero means and unit variances")
30 parser$add_argument('--check_missing_values', dest='check_missing_values', action="store_true",
31 help="Check for missing values and raise an error")
32 parser$add_argument('--init', dest='init', default="svd",
33 help="Init (svd or svd.single)")
34 parser$add_argument('--tol', dest='tol', type='double', default=1e-06,
35 help="Convergence stopping value")
36 parser$add_argument('--nearzerovar', dest='nearzerovar', action="store_true",
37 help="Should be set in particular for data with many zero values")
38 parser$add_argument('--rdata_out', dest='rdata_out', required=TRUE, help="Output Rdata file")
39 # parser$add_argument('--sample_metadata_out', dest='sample_metadata_out', required=TRUE, help="Output sample metadata file")
40 parser$add_argument('--variable_metadata_outdir', dest='variable_metadata_outdir', required=TRUE, help="Output variable metadata directory")
41
42 args <- parser$parse_args()
43
44 ## Print parameters
45 print("Blocks:")
46 print(args$blocks_list)
47 print("Sample metadata file:")
48 print(args$sample_metadata_in)
49 print("Sample description column number:")
50 print(args$sample_description_col)
51 print("Number of components:")
52 print(args$ncomp)
53 print("Compute correlation between all blocks:")
54 print(args$correlation)
55 print("Scheme:")
56 print(args$scheme)
57 print("Mode:")
58 print(args$mode)
59 print("Max nb of iterations:")
60 print(args$maxiter)
61 print("Scale:")
62 print(args$scale)
63 print("Check for missing values:")
64 print(args$check_missing_values)
65 print("Tol:")
66 print(args$tol)
67 print("near.zero.var:")
68 print(args$nearzerovar)
69 print("Output Rdata file:")
70 print(args$rdata_out)
71 # print("Output sample metadata file:")
72 # print(args$sample_metadata_out)
73 print("Output variable metadata directory:")
74 print(args$variable_metadata_outdir)
75
76 ## Loading libraries
77 suppressPackageStartupMessages(require(mixOmics))
78
79 ## Read sample metadata file and set description factor matrix
80 sample_metadata <- read.table(args$sample_metadata_in, sep='\t', header=TRUE, row.names=1)
81 sample_metadata_names <- row.names(sample_metadata)
82 # print(sample_metadata_names)
83
84 # print("Sample metadata matrix:")
85 # print(head(sample_metadata))
86
87 description_column <- ncol(sample_metadata)
88 if(args$sample_description_col > 0)
89 {
90 description_column <- args$sample_description_col
91 }
92
93 Y <- factor(sample_metadata[[description_column]])
94
95 print("Y factor matrix:")
96 print(Y)
97
98 ## Read and prepare block datasets
99 list_X <- c()
100 keepX <- c()
101
102 for(i in 1:nrow(args$blocks_list))
103 {
104 # Read block input parameters
105 block_name <- args$blocks_list[i,1]
106 block_keep <- strtoi(args$blocks_list[i,2])
107 block_data_matrix_filename <- args$blocks_list[i,3]
108 # block_meta_var <- args$blocks_list[i,4]
109
110 print(sprintf("Processing block %s", block_name))
111
112 # Store block data matrices
113 block_data_matrix <- t(read.table(block_data_matrix_filename, sep='\t', header=TRUE, row.names=1)) # transpose the matrix so that the samples become rows and the variables become columns
114 block_data_matrix_names <- row.names(block_data_matrix)
115 # print(block_data_matrix_names)
116
117 if(!identical(sample_metadata_names, block_data_matrix_names))
118 {
119 stop("Sample names must be the same and in the same order in the sample metadata matrix and the block data matrix")
120 }
121
122 if(any(is.na(block_data_matrix)))
123 {
124 stop(sprintf("Block %s contains missing values. We recommend not to perform data integration with missing values. If you want to force run, change the advanced parameter 'Check for missing values' to 'No'.", block_name))
125 }
126
127 list_X[[block_name]] <- block_data_matrix
128
129 # Set the nb of variables to keep
130 nb_variables = ncol(list_X[[block_name]])
131 if(block_keep > 0)
132 {
133 keepX[[block_name]] <- rep(block_keep, args$ncomp)
134 }
135 else
136 {
137 keepX[[block_name]] <- rep(nb_variables, args$ncomp)
138 }
139 print(sprintf("Block %s contains %d variables and %d will be selected", block_name, nb_variables, block_keep))
140 }
141
142 # print(list_X)
143
144 ## Generate design matrix
145 block_nb <- nrow(args$blocks_list)
146
147 design <- matrix(0, nrow = block_nb, ncol = block_nb)
148
149 if(args$correlation)
150 {
151 design <- matrix(1, nrow = block_nb, ncol = block_nb)
152 diag(design) <- 0
153 }
154
155 # print("Design matrix:")
156 # print(design)
157
158 ###################
159 ## Main function ##
160 ###################
161
162 mixomics_result <- block.splsda(X = list_X,
163 Y = Y,
164 ncomp = args$ncomp,
165 keepX = keepX,
166 design = design,
167 scheme = args$scheme,
168 mode = args$mode,
169 scale = args$scale,
170 init = args$init,
171 tol = args$tol,
172 max.iter = args$maxiter,
173 near.zero.var = args$nearzerovar,
174 all.outputs = TRUE)
175
176 print("Block.splsda object:")
177 print(mixomics_result)
178 print(attributes(mixomics_result))
179
180 ## Save output Rdata file
181 save(mixomics_result, file=args$rdata_out)
182
183 # R script call
184 source_local <- function(fname)
185 {
186 argv <- commandArgs(trailingOnly = FALSE)
187 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
188 source(paste(base_dir, fname, sep="/"))
189 }
190
191 # Load library
192 source_local("mixomics_blocksplsda_additional_funct.R")
193
194
195 list_union_selected_block_variables = unionSelectBlockVariables(mixomics_result)
196
197 ## Save output sample metadata file
198 # print("Block.splsda variates:")
199 # print(mixomics_result$variates)
200
201 # for(block_name in names(mixomics_result$variates))
202 # {
203 # # print(block_name)
204 # # print(mixomics_result$variates[[block_name]])
205
206 # # Format the column names to add the block name and replace spaces
207 # colnames(mixomics_result$variates[[block_name]]) <- paste("block.splsda_variates", block_name, gsub(" ", "_", colnames(mixomics_result$variates[[block_name]])), sep = "_")
208 # # print(mixomics_result$variates[[block_name]])
209
210 # # Append the new columns to the sample metadata matrix
211 # sample_metadata <- cbind2(sample_metadata, mixomics_result$variates[[block_name]])
212 # }
213
214 # print(sample_metadata)
215
216 # write.table(sample_metadata, file = args$sample_metadata_out, quote = TRUE, sep = "\t", row.names = TRUE, col.names = NA)
217
218 ## Save output variable metadata files in output directory
219 # print("Block.splsda loadings:")
220 # print(mixomics_result$loadings)
221
222 for(i in 1:nrow(args$blocks_list))
223 {
224 # Read again block input parameters
225 block_name <- args$blocks_list[i,1]
226 # block_keep <- strtoi(args$blocks_list[i,2])
227 # block_data_matrix_filename <- args$blocks_list[i,3]
228 block_meta_var <- args$blocks_list[i,4]
229
230 print(sprintf("Saving block %s output metavar", block_name))
231
232
233 meta_variable <- list_union_selected_block_variables[[i]]
234 colnames(meta_variable) <- "block.splsda_var_select"
235
236 # meta_variable <- mixomics_result$loadings[[block_name]]
237 # print(head(meta_variable))
238
239 # Format the column names to add the block name and replace spaces
240 # colnames(meta_variable) <- paste("block.splsda_loadings", gsub(" ", "_", colnames(meta_variable)), sep = "_")
241
242 # Read input block variable metadata files if provided (optional)
243 if(block_meta_var != "None")
244 {
245 input_meta_variable <- read.table(block_meta_var, sep='\t', header=TRUE, row.names=1)
246 # print(head(input_meta_variable))
247
248 # Append the new columns to the variable metadata matrix
249 meta_variable <- cbind2(input_meta_variable, meta_variable)
250 }
251
252 # print(head(meta_variable))
253
254 block_meta_var_output_filename <- paste("mixomics_blocksplsda_block_", block_name, "_variable_metadata.tsv", sep="")
255 write.table(meta_variable, file = paste(args$variable_metadata_outdir,block_meta_var_output_filename, sep='/'), quote = TRUE, sep = "\t", row.names = TRUE, col.names = NA)
256 }