Mercurial > repos > ppericard > viscorvar
comparison mixomics_blocksplsda.R @ 2:c8533e9298e5 draft
"planemo upload for repository https://gitlab.com/bilille/galaxy-viscorvar commit 8cb5630238352459037b3647eebfabb5554566f6-dirty"
author | ppericard |
---|---|
date | Fri, 23 Oct 2020 10:15:56 +0000 |
parents | |
children | 88c1fd2ac110 |
comparison
equal
deleted
inserted
replaced
1:e93350dc99f1 | 2:c8533e9298e5 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 # Setup R error handling to go to stderr | |
4 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
5 | |
6 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
8 | |
9 ## Get parameters ## | |
10 suppressPackageStartupMessages(require(argparse)) | |
11 | |
12 parser <- ArgumentParser(description='Run the mixOmics block.splsda function') | |
13 | |
14 parser$add_argument('--block', dest='blocks_list', nargs=4, action="append", required=TRUE, | |
15 help="Block name + nb variables to select + data matrix file + variables metadata file") | |
16 parser$add_argument('--sample_metadata_in', dest='sample_metadata_in', required=TRUE, | |
17 help="Samples metadata file") | |
18 parser$add_argument('--sample_description_col', dest='sample_description_col', type='integer', | |
19 default=0, help="Sample description column number") | |
20 parser$add_argument('--ncomp', dest='ncomp', type='integer', default=2, | |
21 help="Number of components to include in the model") | |
22 parser$add_argument('--correlation', dest='correlation', action="store_true", | |
23 help="Add correlation between all blocks") | |
24 parser$add_argument('--scheme', dest='scheme', default="horst", help="Scheme") | |
25 parser$add_argument('--mode', dest='mode', default="regression", help="Mode") | |
26 parser$add_argument('--maxiter', dest='maxiter', type='integer', default=100, | |
27 help="Maximum number of iterations") | |
28 parser$add_argument('--scale', dest='scale', action="store_true", | |
29 help="Each block is standardized to zero means and unit variances") | |
30 parser$add_argument('--check_missing_values', dest='check_missing_values', action="store_true", | |
31 help="Check for missing values and raise an error") | |
32 parser$add_argument('--init', dest='init', default="svd", | |
33 help="Init (svd or svd.single)") | |
34 parser$add_argument('--tol', dest='tol', type='double', default=1e-06, | |
35 help="Convergence stopping value") | |
36 parser$add_argument('--nearzerovar', dest='nearzerovar', action="store_true", | |
37 help="Should be set in particular for data with many zero values") | |
38 parser$add_argument('--rdata_out', dest='rdata_out', required=TRUE, help="Output Rdata file") | |
39 # parser$add_argument('--sample_metadata_out', dest='sample_metadata_out', required=TRUE, help="Output sample metadata file") | |
40 parser$add_argument('--variable_metadata_outdir', dest='variable_metadata_outdir', required=TRUE, help="Output variable metadata directory") | |
41 | |
42 args <- parser$parse_args() | |
43 | |
44 ## Print parameters | |
45 print("Blocks:") | |
46 print(args$blocks_list) | |
47 print("Sample metadata file:") | |
48 print(args$sample_metadata_in) | |
49 print("Sample description column number:") | |
50 print(args$sample_description_col) | |
51 print("Number of components:") | |
52 print(args$ncomp) | |
53 print("Compute correlation between all blocks:") | |
54 print(args$correlation) | |
55 print("Scheme:") | |
56 print(args$scheme) | |
57 print("Mode:") | |
58 print(args$mode) | |
59 print("Max nb of iterations:") | |
60 print(args$maxiter) | |
61 print("Scale:") | |
62 print(args$scale) | |
63 print("Check for missing values:") | |
64 print(args$check_missing_values) | |
65 print("Tol:") | |
66 print(args$tol) | |
67 print("near.zero.var:") | |
68 print(args$nearzerovar) | |
69 print("Output Rdata file:") | |
70 print(args$rdata_out) | |
71 # print("Output sample metadata file:") | |
72 # print(args$sample_metadata_out) | |
73 print("Output variable metadata directory:") | |
74 print(args$variable_metadata_outdir) | |
75 | |
76 ## Loading libraries | |
77 suppressPackageStartupMessages(require(mixOmics)) | |
78 | |
79 ## Read sample metadata file and set description factor matrix | |
80 sample_metadata <- read.table(args$sample_metadata_in, sep='\t', header=TRUE, row.names=1) | |
81 sample_metadata_names <- row.names(sample_metadata) | |
82 # print(sample_metadata_names) | |
83 | |
84 # print("Sample metadata matrix:") | |
85 # print(head(sample_metadata)) | |
86 | |
87 description_column <- ncol(sample_metadata) | |
88 if(args$sample_description_col > 0) | |
89 { | |
90 description_column <- args$sample_description_col | |
91 } | |
92 | |
93 Y <- factor(sample_metadata[[description_column]]) | |
94 | |
95 print("Y factor matrix:") | |
96 print(Y) | |
97 | |
98 ## Read and prepare block datasets | |
99 list_X <- c() | |
100 keepX <- c() | |
101 | |
102 for(i in 1:nrow(args$blocks_list)) | |
103 { | |
104 # Read block input parameters | |
105 block_name <- args$blocks_list[i,1] | |
106 block_keep <- strtoi(args$blocks_list[i,2]) | |
107 block_data_matrix_filename <- args$blocks_list[i,3] | |
108 # block_meta_var <- args$blocks_list[i,4] | |
109 | |
110 print(sprintf("Processing block %s", block_name)) | |
111 | |
112 # Store block data matrices | |
113 block_data_matrix <- t(read.table(block_data_matrix_filename, sep='\t', header=TRUE, row.names=1)) # transpose the matrix so that the samples become rows and the variables become columns | |
114 block_data_matrix_names <- row.names(block_data_matrix) | |
115 # print(block_data_matrix_names) | |
116 | |
117 if(!identical(sample_metadata_names, block_data_matrix_names)) | |
118 { | |
119 stop("Sample names must be the same and in the same order in the sample metadata matrix and the block data matrix") | |
120 } | |
121 | |
122 if(any(is.na(block_data_matrix))) | |
123 { | |
124 stop(sprintf("Block %s contains missing values. We recommend not to perform data integration with missing values. If you want to force run, change the advanced parameter 'Check for missing values' to 'No'.", block_name)) | |
125 } | |
126 | |
127 list_X[[block_name]] <- block_data_matrix | |
128 | |
129 # Set the nb of variables to keep | |
130 nb_variables = ncol(list_X[[block_name]]) | |
131 if(block_keep > 0) | |
132 { | |
133 keepX[[block_name]] <- rep(block_keep, args$ncomp) | |
134 } | |
135 else | |
136 { | |
137 keepX[[block_name]] <- rep(nb_variables, args$ncomp) | |
138 } | |
139 print(sprintf("Block %s contains %d variables and %d will be selected", block_name, nb_variables, block_keep)) | |
140 } | |
141 | |
142 # print(list_X) | |
143 | |
144 ## Generate design matrix | |
145 block_nb <- nrow(args$blocks_list) | |
146 | |
147 design <- matrix(0, nrow = block_nb, ncol = block_nb) | |
148 | |
149 if(args$correlation) | |
150 { | |
151 design <- matrix(1, nrow = block_nb, ncol = block_nb) | |
152 diag(design) <- 0 | |
153 } | |
154 | |
155 # print("Design matrix:") | |
156 # print(design) | |
157 | |
158 ################### | |
159 ## Main function ## | |
160 ################### | |
161 | |
162 mixomics_result <- block.splsda(X = list_X, | |
163 Y = Y, | |
164 ncomp = args$ncomp, | |
165 keepX = keepX, | |
166 design = design, | |
167 scheme = args$scheme, | |
168 mode = args$mode, | |
169 scale = args$scale, | |
170 init = args$init, | |
171 tol = args$tol, | |
172 max.iter = args$maxiter, | |
173 near.zero.var = args$nearzerovar, | |
174 all.outputs = TRUE) | |
175 | |
176 print("Block.splsda object:") | |
177 print(mixomics_result) | |
178 print(attributes(mixomics_result)) | |
179 | |
180 ## Save output Rdata file | |
181 save(mixomics_result, file=args$rdata_out) | |
182 | |
183 # R script call | |
184 source_local <- function(fname) | |
185 { | |
186 argv <- commandArgs(trailingOnly = FALSE) | |
187 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
188 source(paste(base_dir, fname, sep="/")) | |
189 } | |
190 | |
191 # Load library | |
192 source_local("mixomics_blocksplsda_additional_funct.R") | |
193 | |
194 | |
195 list_union_selected_block_variables = unionSelectBlockVariables(mixomics_result) | |
196 | |
197 ## Save output sample metadata file | |
198 # print("Block.splsda variates:") | |
199 # print(mixomics_result$variates) | |
200 | |
201 # for(block_name in names(mixomics_result$variates)) | |
202 # { | |
203 # # print(block_name) | |
204 # # print(mixomics_result$variates[[block_name]]) | |
205 | |
206 # # Format the column names to add the block name and replace spaces | |
207 # colnames(mixomics_result$variates[[block_name]]) <- paste("block.splsda_variates", block_name, gsub(" ", "_", colnames(mixomics_result$variates[[block_name]])), sep = "_") | |
208 # # print(mixomics_result$variates[[block_name]]) | |
209 | |
210 # # Append the new columns to the sample metadata matrix | |
211 # sample_metadata <- cbind2(sample_metadata, mixomics_result$variates[[block_name]]) | |
212 # } | |
213 | |
214 # print(sample_metadata) | |
215 | |
216 # write.table(sample_metadata, file = args$sample_metadata_out, quote = TRUE, sep = "\t", row.names = TRUE, col.names = NA) | |
217 | |
218 ## Save output variable metadata files in output directory | |
219 # print("Block.splsda loadings:") | |
220 # print(mixomics_result$loadings) | |
221 | |
222 for(i in 1:nrow(args$blocks_list)) | |
223 { | |
224 # Read again block input parameters | |
225 block_name <- args$blocks_list[i,1] | |
226 # block_keep <- strtoi(args$blocks_list[i,2]) | |
227 # block_data_matrix_filename <- args$blocks_list[i,3] | |
228 block_meta_var <- args$blocks_list[i,4] | |
229 | |
230 print(sprintf("Saving block %s output metavar", block_name)) | |
231 | |
232 | |
233 meta_variable <- list_union_selected_block_variables[[i]] | |
234 colnames(meta_variable) <- "block.splsda_var_select" | |
235 | |
236 # meta_variable <- mixomics_result$loadings[[block_name]] | |
237 # print(head(meta_variable)) | |
238 | |
239 # Format the column names to add the block name and replace spaces | |
240 # colnames(meta_variable) <- paste("block.splsda_loadings", gsub(" ", "_", colnames(meta_variable)), sep = "_") | |
241 | |
242 # Read input block variable metadata files if provided (optional) | |
243 if(block_meta_var != "None") | |
244 { | |
245 input_meta_variable <- read.table(block_meta_var, sep='\t', header=TRUE, row.names=1) | |
246 # print(head(input_meta_variable)) | |
247 | |
248 # Append the new columns to the variable metadata matrix | |
249 meta_variable <- cbind2(input_meta_variable, meta_variable) | |
250 } | |
251 | |
252 # print(head(meta_variable)) | |
253 | |
254 block_meta_var_output_filename <- paste("mixomics_blocksplsda_block_", block_name, "_variable_metadata.tsv", sep="") | |
255 write.table(meta_variable, file = paste(args$variable_metadata_outdir,block_meta_var_output_filename, sep='/'), quote = TRUE, sep = "\t", row.names = TRUE, col.names = NA) | |
256 } |