comparison mixomics_blocksplsda_script.R @ 0:bea08702ed51 draft

planemo upload for repository https://github.com/bilille/galaxy-mixomics-blocksplsda commit 7595141b2b760d3c9781f350abd2aa76a0644b1a
author ppericard
date Fri, 17 May 2019 05:00:22 -0400
parents
children 6595c17673cb
comparison
equal deleted inserted replaced
-1:000000000000 0:bea08702ed51
1 #!/usr/bin/env Rscript
2
3 # Setup R error handling to go to stderr
4 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
5
6 # we need that to not crash galaxy with an UTF8 error on German LC settings.
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
8
9 ## Get parameters ##
10 suppressPackageStartupMessages(require(argparse))
11
12 parser <- ArgumentParser(description='Run the mixOmics block.splsda function')
13
14 parser$add_argument('--block', dest='blocks_list', nargs=4, action="append", required=TRUE, help="Block name + nb variables to select + data matrix file + variables metadata file")
15 parser$add_argument('--sample_metadata_in', dest='sample_metadata_in', required=TRUE, help="Samples metadata file")
16 parser$add_argument('--sample_description_col', dest='sample_description_col', type='integer', required=TRUE, help="Sample description column number")
17 parser$add_argument('--ncomp', dest='ncomp', type='integer', required=TRUE, help="Number of components to include in the model")
18 parser$add_argument('--correlation', dest='correlation', action="store_true", help="Add correlation between all blocks")
19 parser$add_argument('--scheme', dest='scheme', required=TRUE, help="Scheme")
20 parser$add_argument('--mode', dest='mode', required=TRUE, help="Mode")
21 parser$add_argument('--maxiter', dest='maxiter', type='integer', required=TRUE, help="Maximum number of iterations")
22 parser$add_argument('--scale', dest='scale', action="store_true", help="Each block is standardized to zero means and unit variances")
23 parser$add_argument('--init', dest='init', required=TRUE, help="Init (svd or svd.single)")
24 parser$add_argument('--tol', dest='tol', type='double', required=TRUE, help="Convergence stopping value")
25 parser$add_argument('--nearzerovar', dest='nearzerovar', action="store_true", help="Should be set in particular for data with many zero values")
26 parser$add_argument('--rdata_out', dest='rdata_out', required=TRUE, help="Output Rdata file")
27 parser$add_argument('--sample_metadata_out', dest='sample_metadata_out', required=TRUE, help="Output sample metadata file")
28 parser$add_argument('--variable_metadata_outdir', dest='variable_metadata_outdir', required=TRUE, help="Output variable metadata directory")
29
30 args <- parser$parse_args()
31
32 ## Print parameters
33 print("Blocks:")
34 print(args$blocks_list)
35 print("Sample metadata file:")
36 print(args$sample_metadata_in)
37 print("Sample description column number:")
38 print(args$sample_description_col)
39 print("Number of components:")
40 print(args$ncomp)
41 print("Compute correlation between all blocks:")
42 print(args$correlation)
43 print("Scheme:")
44 print(args$scheme)
45 print("Mode:")
46 print(args$mode)
47 print("Max nb of iterations:")
48 print(args$maxiter)
49 print("Scale:")
50 print(args$scale)
51 print("Tol:")
52 print(args$tol)
53 print("near.zero.var:")
54 print(args$nearzerovar)
55 print("Output Rdata file:")
56 print(args$rdata_out)
57 print("Output sample metadata file:")
58 print(args$sample_metadata_out)
59 print("Output variable metadata directory:")
60 print(args$variable_metadata_outdir)
61
62 ## Loading libraries
63 suppressPackageStartupMessages(require(mixOmics))
64
65 ## Read sample metadata file and set description factor matrix
66 sample_metadata <- read.table(args$sample_metadata_in, sep='\t', header=TRUE, row.names=1)
67 sample_metadata_names <- row.names(sample_metadata)
68 # print(sample_metadata_names)
69
70 # print("Sample metadata matrix:")
71 # print(head(sample_metadata))
72
73 description_column <- ncol(sample_metadata)
74 if(args$sample_description_col > 0)
75 {
76 description_column <- args$sample_description_col
77 }
78
79 Y <- factor(sample_metadata[[description_column]])
80
81 print("Y factor matrix:")
82 print(Y)
83
84 ## Read and prepare block datasets
85 list_X <- c()
86 keepX <- c()
87
88 for(i in 1:nrow(args$blocks_list))
89 {
90 # Read block input parameters
91 block_name <- args$blocks_list[i,1]
92 block_keep <- strtoi(args$blocks_list[i,2])
93 block_data_matrix_filename <- args$blocks_list[i,3]
94 # block_meta_var <- args$blocks_list[i,4]
95
96 print(sprintf("Processing block %s", block_name))
97
98 # Store block data matrices
99 block_data_matrix <- t(read.table(block_data_matrix_filename, sep='\t', header=TRUE, row.names=1)) # transpose the matrix so that the samples become rows and the variables become columns
100 block_data_matrix_names <- row.names(block_data_matrix)
101 # print(block_data_matrix_names)
102
103 if(!identical(sample_metadata_names, block_data_matrix_names))
104 {
105 stop("Sample names must be the same and in the same order in the sample metadata matrix and the block data matrix")
106 }
107
108 list_X[[block_name]] <- block_data_matrix
109
110 # Set the nb of variables to keep
111 nb_variables = ncol(list_X[[block_name]])
112 if(block_keep > 0)
113 {
114 keepX[[block_name]] <- rep(block_keep, args$ncomp)
115 }
116 else
117 {
118 keepX[[block_name]] <- rep(nb_variables, args$ncomp)
119 }
120 print(sprintf("Block %s contains %d variables and %d will be selected", block_name, nb_variables, block_keep))
121 }
122
123 # print(list_X)
124
125 ## Generate design matrix
126 block_nb <- nrow(args$blocks_list)
127
128 design <- matrix(0, nrow = block_nb, ncol = block_nb)
129
130 if(args$correlation)
131 {
132 design <- matrix(1, nrow = block_nb, ncol = block_nb)
133 diag(design) <- 0
134 }
135
136 # print("Design matrix:")
137 # print(design)
138
139 ###################
140 ## Main function ##
141 ###################
142
143 mixomics_result <- block.splsda(X = list_X,
144 Y = Y,
145 ncomp = args$ncomp,
146 keepX = keepX,
147 design = design,
148 scheme = args$scheme,
149 mode = args$mode,
150 scale = args$scale,
151 init = args$init,
152 tol = args$tol,
153 max.iter = args$maxiter,
154 near.zero.var = args$nearzerovar,
155 all.outputs = TRUE)
156
157 print("Block.splsda object:")
158 print(mixomics_result)
159
160 ## Save output Rdata file
161 save(mixomics_result, file=args$rdata_out)
162
163 ## Save output sample metadata file
164 # print("Block.splsda variates:")
165 # print(mixomics_result$variates)
166
167 for(bname in names(mixomics_result$variates))
168 {
169 # print(bname)
170 # print(mixomics_result$variates[[bname]])
171
172 # Format the column names to add the block name and replace spaces
173 colnames(mixomics_result$variates[[bname]]) <- paste("block.splsda", bname, gsub(" ", "_", colnames(mixomics_result$variates[[bname]])), sep = "_")
174 # print(mixomics_result$variates[[bname]])
175
176 # Append the new columns to the sample metadata matrix
177 sample_metadata <- cbind2(sample_metadata, mixomics_result$variates[[bname]])
178 }
179
180 # print(sample_metadata)
181
182 write.table(sample_metadata, file = args$sample_metadata_out, quote = TRUE, sep = "\t", row.names = TRUE, col.names = NA)
183
184 ## Save output variable metadata files in output directory
185 # print("Block.splsda loadings:")
186 # print(mixomics_result$loadings)
187
188 for(i in 1:nrow(args$blocks_list))
189 {
190 # Read again block input parameters
191 block_name <- args$blocks_list[i,1]
192 # block_keep <- strtoi(args$blocks_list[i,2])
193 # block_data_matrix_filename <- args$blocks_list[i,3]
194 block_meta_var <- args$blocks_list[i,4]
195
196 meta_variable <- mixomics_result$loadings[[block_name]]
197 # print(head(meta_variable))
198
199 # Read input block variable metadata files if provided (optional)
200 if(block_meta_var != "None")
201 {
202 input_meta_variable <- read.table(block_meta_var, sep='\t', header=TRUE, row.names=1)
203 # print(head(input_meta_variable))
204
205 # Append the new columns to the variable metadata matrix
206 meta_variable <- cbind2(input_meta_variable, meta_variable)
207 }
208
209 # print(head(meta_variable))
210
211 block_meta_var_output_filename <- paste("mixomics_blocksplsda_block_", block_name, "_variable_metadata.tsv", sep="")
212 write.table(meta_variable, file = paste(args$variable_metadata_outdir,block_meta_var_output_filename, sep='/'), quote = TRUE, sep = "\t", row.names = TRUE, col.names = NA)
213 }