comparison manipulate_eset.xml @ 0:22232092be53 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit d007ae51743e621dc47524f681501e72ef3a2910"
author bgruening
date Mon, 02 May 2022 09:59:18 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:22232092be53
1 <tool id="music_manipulate_eset" name="Manipulate Expression Set Object" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
2 profile="21.09" license="GPL-3.0-or-later" >
3 <description>Manipulate ExpressionSet objects by a variety of attributes</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 cat '$conf' >> /dev/stderr &&
10 Rscript --vanilla '$conf'
11
12 ]]></command>
13 <configfiles>
14 <configfile name="conf" >
15 suppressWarnings(suppressPackageStartupMessages(library(xbioc)))
16 suppressWarnings(suppressPackageStartupMessages(library(MuSiC)))
17
18 vec_ranges = function(vstr) {
19 ## convert '3:1,22,12:15' to '3,2,1,22,12,13,14,15'
20 unlist(sapply(unlist(strsplit(vstr, split=",")),
21 function(x) {
22 tmp = as.integer(unlist(strsplit(x, split=":")))
23 if (length(tmp) > 1) {
24 seq(tmp[[1]], tmp[[2]])
25 } else {
26 tmp[[1]]
27 }
28 }, USE.NAMES=FALSE))
29 }
30
31 null_str_vec = function(gstr){
32 tokens = unlist(as.vector(strsplit(gstr, split=",")))
33 if (length(tokens) == 0){
34 return(NULL)
35 }
36 if (length(tokens) == 1){
37 return(tokens[[1]])
38 }
39 return(tokens)
40 }
41
42 get_subs = function(values, by_method){
43 if (by_method == "subsample") {
44 sample(as.integer(values))
45 } else if (by_method == "labels") {
46 null_str_vec(values)
47 } else if (by_method == "range_and_index") {
48 vec_ranges(values)
49 } else {
50 NA ## equivalent to ALL
51 }
52 }
53
54 rds_eset = readRDS('$rds_eset')
55 #if str($combine_eset.do) == "Yes":
56 new_eset = combine(rds_eset,
57 #for $e, $egroup in enumerate($combine_eset.eset_group):
58 readRDS('$egroup.eset')
59 #if $e != len($combine_eset.eset_group)-1
60 ,
61 #end if
62 #end for
63 )
64 #else
65 new_eset = rds_eset
66 #end if
67 sub_eset = new_eset
68 #if str($subset_eset.do) == "Yes":
69 #if str($subset_eset.subset_yes.by) == "phenotype":
70 #for $s, $sgroup in enumerate($subset_eset.subset_yes.pheno_samples)
71 sub_eset = sub_eset[ ,sub_eset[['$sgroup.column']] %in% c(null_str_vec('$sgroup.values'))]
72 #end for
73 #for $g, $ggroup in enumerate($subset_eset.subset_yes.pheno_genes)
74 sub_eset = sub_eset[sub_eset[['$ggroup.column']] %in% c(null_str_vec('$ggroup.values')), ]
75 #end for
76 #else
77 genes = get_subs('$subset_eset.subset_yes.genes', '$subset_eset.subset_yes.by')
78 samples = get_subs('$subset_eset.subset_yes.samples', '$subset_eset.subset_yes.by')
79 sub_eset = sub_eset[genes, samples]
80 #end if
81 #end if
82
83 ## print data to stdout
84 print(sub_eset)
85 saveRDS(sub_eset, file= '$out_eset')
86
87 </configfile>
88 </configfiles>
89 <inputs>
90 <param name="rds_eset" label="Expression Set Dataset" type="data" format="@RDATATYPE@" />
91 <conditional name="combine_eset" >
92 <param name="do" type="select" label="Concatenate other Expression Set objects?"
93 help="Phenotype data must match between objects, and objects will be concatenated in the order given below." >
94 <option value="No" selected="true" >No</option>
95 <option value="Yes" >Yes</option>
96 </param>
97 <when value="No" ></when>
98 <when value="Yes" >
99 <repeat name="eset_group" title="Additional Dataset" min="1" >
100 <param name="eset" label="Expression Set Dataset" type="data" format="@RDATATYPE@" />
101 </repeat>
102 </when>
103 </conditional>
104 <conditional name="subset_eset" >
105 <param name="do" type="select" label="Subset the dataset?"
106 help="If multiple objects are concatenated as in the above section, the resulting object will be subsetted." >
107 <option value="No" selected="true" >No</option>
108 <option value="Yes" >Yes</option>
109 </param>
110 <when value="No" ></when>
111 <when value="Yes" >
112 <conditional name="subset_yes" >
113 <param name="by" type="select" label="By"
114 help="e.g. random subsampling, index ranges and indices, specific labels, phenotype conditions" >
115 <option value="subsample" selected="true" >Random Subsample</option>
116 <option value="labels" >Specific Labels</option>
117 <option value="range_and_index" >Index Ranges and Specific Indices</option>
118 <option value="phenotype" >Filter Samples and Genes by Phenotype Values</option>
119 </param>
120 <when value="subsample" >
121 <param name="samples" type="integer" label="Select N Samples"
122 value="" optional="true" help="e.g. '10' will select 10 random samples." />
123 <param name="genes" type="integer" label="Select N Genes"
124 value="" optional="true" help="e.g. '123' will select 123 random genes." />
125 </when>
126 <when value="labels" >
127 <param name="samples" type="text" label="List of Sample Labels, comma-delimited"
128 value="" optional="true" help="e.g. 'Control1,ALPOL56,SampleX' would select just those 3 samples." />
129 <param name="genes" type="text" label="List of Gene Labels , comma-delimited"
130 value="" optional="true" help="e.g. 'GeneA,GeneX,Gene123' would select just those 3 genes." />
131 </when>
132 <when value="range_and_index" >
133 <param name="samples" type="text"
134 label="List of Sample Indexes, ranges are colon-delimited, seperated by commas."
135 value="" optional="true" help="e.g. '5:3,57:60,27' would yield '5,4,3,57,58,59,60,27' " />
136 <param name="genes" type="text"
137 label="List of Gene Indexes, ranges are colon-delimited, seperated by commas."
138 value="" optional="true" help="e.g. '15:18,26,27,3:1' would yield '15,16,17,18,26,27,3,2,1' " />
139 </when>
140 <when value="phenotype" >
141 <repeat name="pheno_samples" title="Filter Samples by Condition" min="0" >
142 <param name="column" type="text" value="" label="Name of phenotype column"
143 help="e.g. 'gender' or 'control' etc"/>
144 <param name="values" type="text" value="" label="List of values in this column to filter for, comma-delimited"
145 help="e.g. 'female,unknown' selects only samples with values in the above phenotype column of 'female' and 'unknown'."/>
146 </repeat>
147 <repeat name="pheno_genes" title="Filter Genes by Condition" min="0" >
148 <param name="column" type="text" value="" label="Name of phenotype column"
149 help="e.g. 'housekeeping' or 'marker' etc"/>
150 <param name="values" type="text" value="" label="List of values in this column to filter for, comma-delimited"
151 help="e.g. '' selects only samples with values in the above phenotype column of 'female' and 'unknown'."/>
152 </repeat>
153 </when>
154 </conditional>
155 </when>
156 </conditional>
157 </inputs>
158 <outputs>
159 <data name="out_eset" format="@RDATATYPE@" label="${tool.name} on ${on_string}: ExpressionSet Object" />
160 </outputs>
161 <tests>
162 <test expect_num_outputs="1" >
163 <!-- No operation, do nothing -->
164 <param name="rds_eset" value="Control_Bulk.rds" />
165 <output name="out_eset" value="Control_Bulk.rds" compare="sim_size" />
166 </test>
167 <test expect_num_outputs="1" >
168 <!-- No concat, subset by ranges -->
169 <param name="rds_eset" value="Control_Bulk.rds" />
170 <conditional name="combine_eset" >
171 <param name="do" value="No" />
172 </conditional>
173 <conditional name="subset_eset" >
174 <param name="do" value="Yes" />
175 <conditional name="subset_yes" >
176 <param name="by" value="range_and_index" />
177 <param name="samples" value="2:3" />
178 <param name="genes" value="100:20,22,1:5" />
179 </conditional>
180 </conditional>
181 <assert_stdout>
182 <has_text text="assayData: 87 features, 2 samples" />
183 </assert_stdout>
184 </test>
185 <test expect_num_outputs="1" >
186 <!-- Concat and subset by ranges -->
187 <param name="rds_eset" value="Control_Bulk.rds" />
188 <conditional name="combine_eset" >
189 <param name="do" value="Yes" />
190 <repeat name="eset_group" >
191 <param name="eset" value="APOL1_Bulk.rds" />
192 </repeat>
193 <repeat name="eset_group" >
194 <param name="eset" value="Control_Bulk.rds" />
195 </repeat>
196 </conditional>
197 <conditional name="subset_eset" >
198 <param name="do" value="Yes" />
199 <conditional name="subset_yes" >
200 <param name="by" value="range_and_index" />
201 <param name="samples" value="5:7,1" />
202 <param name="genes" value="100:20,22,1:3" />
203 </conditional>
204 </conditional>
205 <assert_stdout>
206 <has_text text="assayData: 85 features, 4 samples" />
207 </assert_stdout>
208 </test>
209 <test expect_num_outputs="1" >
210 <!-- Concat and subset by labels -->
211 <param name="rds_eset" value="Control_Bulk.rds" />
212 <conditional name="combine_eset" >
213 <param name="do" value="Yes" />
214 <repeat name="eset_group" >
215 <param name="eset" value="APOL1_Bulk.rds" />
216 </repeat>
217 </conditional>
218 <conditional name="subset_eset" >
219 <param name="do" value="Yes" />
220 <conditional name="subset_yes" >
221 <param name="by" value="labels" />
222 <param name="samples" value="control.NA.27,control.NA.39" />
223 <param name="genes" value="Nqo1,Card14,Scube2,Nup214" />
224 </conditional>
225 </conditional>
226 <assert_stdout>
227 <has_text text="assayData: 4 features, 2 samples" />
228 <has_text text="sampleNames: control.NA.27 control.NA.39" />
229 </assert_stdout>
230 </test>
231 <test expect_num_outputs="1" >
232 <!-- Concat and subset by filtering phenotype data -->
233 <param name="rds_eset" value="Control_Bulk.rds" />
234 <conditional name="combine_eset" >
235 <param name="do" value="Yes" />
236 <repeat name="eset_group" >
237 <param name="eset" value="APOL1_Bulk.rds" />
238 </repeat>
239 <repeat name="eset_group" >
240 <param name="eset" value="Control_Bulk.rds" />
241 </repeat>
242 </conditional>
243 <conditional name="subset_eset" >
244 <param name="do" value="Yes" />
245 <conditional name="subset_yes" >
246 <param name="by" value="phenotype" />
247 <repeat name="pheno_samples" >
248 <param name="column" value="Control" />
249 <param name="values" value="control" />
250 </repeat>
251 <repeat name="pheno_samples" >
252 <param name="column" value="sampleID" />
253 <param name="values" value="3" />
254 </repeat>
255 </conditional>
256 </conditional>
257 <assert_stdout>
258 <has_text text="assayData: 19033 features, 1 samples" />
259 <has_text text="sampleNames: control.NA.39" />
260 </assert_stdout>
261 </test>
262 <test expect_num_outputs="1" >
263 <!-- Concat and random subsample -->
264 <param name="rds_eset" value="Control_Bulk.rds" />
265 <conditional name="combine_eset" >
266 <param name="do" value="Yes" />
267 <repeat name="eset_group" >
268 <param name="eset" value="APOL1_Bulk.rds" />
269 </repeat>
270 <repeat name="eset_group" >
271 <param name="eset" value="Control_Bulk.rds" />
272 </repeat>
273 </conditional>
274 <conditional name="subset_eset" >
275 <param name="do" value="Yes" />
276 <conditional name="subset_yes" >
277 <param name="by" value="subsample" />
278 <param name="samples" value="3" />
279 <param name="genes" value="25" />
280 </conditional>
281 </conditional>
282 <assert_stdout>
283 <has_text text="assayData: 25 features, 3 samples" />
284 </assert_stdout>
285 </test>
286 </tests>
287 <help><![CDATA[
288 Manipulate an ExpressionSet object by concatenation and or subsetting.
289
290 For more options and information, consult `the manual <http://www.bioconductor.org/packages/release/bioc/vignettes/Biobase/inst/doc/ExpressionSetIntroduction.pdf>`_ and the `rdocumentation <https://www.rdocumentation.org/packages/Biobase/versions/2.32.0/topics/ExpressionSet>`_
291 .
292 ]]></help>
293 <citations>
294 <citation type="bibtex">
295 @misc{falcon2007introduction,
296 title={An introduction to bioconductor’s expressionset class},
297 author={Falcon, Seth and Morgan, Martin and Gentleman, Robert},
298 year={2007}
299 }
300 </citation>
301 </citations>
302 </tool>