comparison music-deconvolution.xml.orig @ 6:fb36f390cc52 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit d5c7ca22af1d4f0eaa7a607886554bebb95e8c50
author bgruening
date Mon, 28 Oct 2024 17:32:19 +0000
parents
children
comparison
equal deleted inserted replaced
5:2ba99a52bd44 6:fb36f390cc52
1 <tool id="music_deconvolution" name="MuSiC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
2 profile="21.09" license="GPL-3.0-or-later" >
3 <description>estimate cell type proportions in bulk RNA-seq data</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <command detect_errors="exit_code" ><![CDATA[
9 mkdir report_data &&
10 Rscript --vanilla '$__tool_directory__/scripts/${do.method}.R' '$conf'
11 ]]></command>
12 <configfiles>
13 <configfile name="conf" >
14
15 null_str_vec = function(gstr){
16 tokens = unlist(as.vector(strsplit(gstr, split=",")))
17 if (length(tokens) == 0){
18 return(NULL)
19 }
20 if (length(tokens) == 1){
21 return(tokens[[1]])
22 }
23 return(tokens)
24 }
25
26 bulk_eset = readRDS('$bulk_eset')
27 scrna_eset = readRDS('$scrna_eset')
28 use_disease_factor = FALSE
29 maxyscale = NA
30
31 #if str($do.method) == "estimateprops":
32
33 maxyscale = as.numeric('$do.maxyscale') ## yields "NA" if blank
34 phenotype_factors = null_str_vec('$do.phenotype_factors')
35 phenotype_factors_always_exclude = null_str_vec('$do.phenotype_factors_always_exclude')
36 celltypes_label = null_str_vec('$do.celltypes_label')
37 samples_label = null_str_vec('$do.samples_label')
38 celltypes = null_str_vec('$do.celltypes')
39 methods = c("MuSiC", "NNLS")
40
41 #if str($do.disease_factor.use) == "yes":
42 use_disease_factor = TRUE
43 <<<<<<< HEAD
44 phenotype_scrna_target = null_str_vec('$do.disease_factor.phenotype_scrna_target')
45 =======
46 >>>>>>> 768a6e5b (v3 update:)
47 phenotype_target = null_str_vec('$do.disease_factor.phenotype_target')
48 phenotype_target_threshold = as.numeric('$do.disease_factor.phenotype_target_threshold')
49 sample_disease_group = null_str_vec('$do.disease_factor.sample_disease_group')
50 sample_disease_group_scale = as.integer('$do.disease_factor.sample_disease_group_scale')
51 <<<<<<< HEAD
52 =======
53 compare_title = null_str_vec('$do.disease_factor.compare_title')
54 >>>>>>> 768a6e5b (v3 update:)
55 #end if
56
57 outfile_pdf='$out_pdf'
58
59 #elif str($do.method) == "dendrogram":
60
61 celltypes_label = null_str_vec('$do.celltypes_label')
62 clustertype_label = null_str_vec('$do.clustertype_label')
63 samples_label = null_str_vec('$do.samples_label')
64 celltypes = null_str_vec('$do.celltypes')
65
66 data.to.use = list(
67 #for $i, $repeat in enumerate( $do.cluster_groups )
68 #if $i == 0:
69 $repeat.cluster_id = list(cell.types = null_str_vec('$repeat.celltypes'),
70 marker.names = null_str_vec('$repeat.marker_name'),
71 marker.list = read_list('$repeat.marker_list'))
72 #else
73 , $repeat.cluster_id = list(cell.types = null_str_vec('$repeat.celltypes'),
74 marker.names = null_str_vec('$repeat.marker_name'),
75 marker.list = read_list('$repeat.marker_list'))
76 #end if
77 #end for
78 )
79
80 outfile_pdf='$out_pdf'
81 outfile_tab='$out_tab'
82
83 #else
84 stop("No such option")
85 #end if
86
87 </configfile>
88 </configfiles>
89 <inputs>
90 <param name="scrna_eset" label="scRNA Dataset" type="data" format="@RDATATYPE@" />
91 <param name="bulk_eset" label="Bulk RNA Dataset" type="data" format="@RDATATYPE@" />
92 <conditional name="do" >
93 <param name="method" type="select" label="Purpose" >
94 <!-- The values here correspond to script names in the scripts folder
95 and must remain so -->
96 <option value="estimateprops">Estimate Proportions</option>
97 <option value="dendrogram">Compute Dendrogram</option>
98 </param>
99 <when value="estimateprops" >
100 <param name="celltypes_label" type="text" value="cellType"
101 label="Cell Types Label from scRNA dataset" >
102 <expand macro="validator_text" />
103 </param>
104 <param name="samples_label" type="text" value="sampleID"
105 label="Samples Identifier from scRNA dataset" >
106 <expand macro="validator_text" />
107 </param>
108 <expand macro="celltypes_macro" />
109 <param name="phenotype_factors" type="text"
110 label="Phenotype factors"
111 help="List of phenotypes factors to be used in the linear regression. Please make sure that each factor has more than one unique value. Names correspond to column names in the bulk RNA dataset phenotype table. If blank, then treat all bulk phenotype columns as factors." >
112 <expand macro="validator_index_identifiers" />
113 </param>
114 <param name="phenotype_factors_always_exclude" type="text"
115 label="Excluded phenotype factors"
116 help="List of phenotype factors to always exclude in the analysis"
117 value="sampleID,SubjectName" >
118 <expand macro="validator_index_identifiers" />
119 </param>
120 <conditional name="disease_factor" >
121 <param name="use" type="select" label="Show proportions of a disease factor?" >
122 <option value="no" selected="true" >No</option>
123 <option value="yes" >Yes</option>
124 </param>
125 <when value="no" ></when>
126 <when value="yes" >
127 <<<<<<< HEAD
128 <param name="phenotype_scrna_target" type="text" label="scRNA Phenotype Cell Target"
129 help="The name of a target scRNA cell type to select in the phenotype comparison." >
130 <expand macro="validator_text" />
131 </param>
132 <param name="phenotype_target" type="text" label="Bulk Phenotype Target"
133 help="MUST exist in the bulk RNA datasets phenotype factors as above." >
134 <expand macro="validator_text" />
135 </param>
136 <param name="phenotype_target_threshold" type="float" label="Bulk Phenotype Target Threshold"
137 value="-99"
138 help="The (%) threshold at which the phenotype target manifests. Leave at -99 to select all." >
139 </param>
140 <param name="sample_disease_group" type="text" label="scRNA Sample Disease Group"
141 help="Name for target disease group, ideally a value from the scRNA phenotype factor data" >
142 <expand macro="validator_text" />
143 </param>
144 <param name="sample_disease_group_scale" type="integer"
145 label="scRNA Sample Disease Group (Scale)" value="5"
146 help="Used to accentutate certain features in the plots. Increase this number to reduce the effect." />
147 =======
148 <param name="phenotype_target" type="text" label="Phenotype Target"
149 help="MUST exist in the bulk RNA datasets phenotype factors as above." >
150 <expand macro="validator_text" />
151 </param>
152 <param name="phenotype_target_threshold" type="float" label="Phenotype Target Threshold"
153 value="-99"
154 help="The (%) threshold at which the phenotype target manifests. Leave at -99 to select all." >
155 </param>
156 <param name="sample_disease_group" type="text" label="Sample Disease Group"
157 help="MUST exist in the sample_groups above." >
158 <expand macro="validator_text" />
159 </param>
160 <param name="sample_disease_group_scale" type="integer"
161 label="Sample Disease Group (Scale)" value="5"
162 help="Used to accentutate certain features in the plots. Increase this number to reduce the effect." />
163 <param name="compare_title" type="text" label="Plot Title" >
164 <expand macro="validator_text" />
165 </param>
166 >>>>>>> 768a6e5b (v3 update:)
167 </when>
168 </conditional>
169 <param name="maxyscale" type="float" min="0" value="" optional="true"
170 label="Scale all Y-axes to max limit" help="Leave blank to autoscale each plot."/>
171 </when>
172 <when value="dendrogram" >
173 <param name="celltypes_label" type="text" value="cellType"
174 label="Cell Types Label from scRNA dataset" >
175 <expand macro="validator_text" />
176 </param>
177 <param name="clustertype_label" type="text" value="clusterType"
178 label="Cluster Types Label from scRNA dataset" >
179 <expand macro="validator_text" />
180 </param>
181 <param name="samples_label" type="text" value="sampleID"
182 label="Samples Identifier from scRNA dataset" >
183 <expand macro="validator_text" />
184 </param>
185 <expand macro="celltypes_macro" />
186 <repeat name="cluster_groups" title="Cluster Groups" min="0"
187 help="Insert cell cluster groups based on a previous clustering." >
188 <param name="cluster_id" label="Cluster ID" type="text" value=""
189 help="e.g. C1 or Cluster1, etc." />
190 <expand macro="celltypes_macro" />
191 <param name="marker_name" label="Marker Gene Group Name" type="text"
192 optional="true" value=""
193 help="Name of the list of gene markers used to describe the marker list supplied below." >
194 <expand macro="validator_text" />
195 </param>
196 <param name="marker_list" label="List of Gene Markers" type="data" format="txt,tabular"
197 optional="true"
198 help="A single column of marker genes" />
199 </repeat>
200 </when>
201 </conditional>
202 </inputs>
203 <outputs>
204 <data name="out_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF Plots" />
205 <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Cell Proportions by Sample" >
206 <filter>do["method"] == "dendrogram" and len(do["cluster_groups"]) >0</filter>
207 </data>
208 <collection name="props" type="list" label="${tool.name} on ${on_string}: Proportion Matrices" >
209 <filter>do["method"] == "estimateprops"</filter>
210 <discover_datasets pattern="prop_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
211 </collection>
212 <collection name="summaries" type="list" label="${tool.name} on ${on_string}: Summaries and Logs">
213 <filter>do["method"] == "estimateprops" and do["disease_factor"]["use"] == "yes"</filter>
214 <discover_datasets pattern="summ_(?P&lt;designation&gt;.+)\.txt" format="txt" directory="report_data" />
215 <discover_datasets pattern="varprop_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
216 <discover_datasets pattern="rsquared_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
217 <discover_datasets pattern="weightgene_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="report_data" />
218 </collection>
219 </outputs>
220 <tests>
221 <test expect_num_outputs="1" >
222 <!-- Dendrogram test 1 -->
223 <param name="bulk_eset" value="Mousebulkeset.rds" />
224 <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" />
225 <conditional name="do" >
226 <param name="method" value="dendrogram" />
227 <param name="celltypes_label" value="cellType" />
228 <param name="samples_label" value="sampleID" />
229 <param name="celltypes" value="Endo,Podo,PT,LOH,DCT,CD-PC,CD-IC,Fib,Macro,Neutro,B lymph,T lymph,NK" />
230 </conditional>
231 <output name="out_pdf" value="dendro_1.pdf" compare="sim_size" />
232 </test>
233 <test expect_num_outputs="2" >
234 <!-- Dendrogram test 2 -->
235 <param name="bulk_eset" value="Mousebulkeset.rds" />
236 <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" />
237 <conditional name="do" >
238 <param name="method" value="dendrogram" />
239 <param name="celltypes_label" value="cellType" />
240 <param name="samples_label" value="sampleID" />
241 <param name="celltypes" value="Endo,Podo,PT,LOH,DCT,CD-PC,CD-IC,Fib,Macro,Neutro,B lymph,T lymph,NK" />
242 <repeat name="cluster_groups" >
243 <param name="cluster_id" value="C1" />
244 <param name="celltypes" value="Neutro" />
245 </repeat>
246 <repeat name="cluster_groups" >
247 <param name="cluster_id" value="C2" />
248 <param name="celltypes" value="Podo" />
249 </repeat>
250 <repeat name="cluster_groups" >
251 <param name="cluster_id" value="C3" />
252 <param name="celltypes" value="Endo,CD-PC,LOH,CD-IC,DCT,PT" />
253 <param name="marker_name" value="Epithelial" />
254 <param name="marker_list" value="epith.markers" />
255 </repeat>
256 <repeat name="cluster_groups" >
257 <param name="cluster_id" value="C4" />
258 <param name="celltypes" value="Macro,Fib,B lymph,NK,T lymph" />
259 <param name="marker_name" value="Immune" />
260 <param name="marker_list" value="immune.markers" />
261 </repeat>
262 </conditional>
263 <output name="out_pdf" value="dendro.pdf" compare="sim_size" />
264 <output name="out_tab">
265 <assert_contents>
266 <has_text_matching expression="^\s+Neutro\s+Podo\s+Endo" />
267 <has_text text="APOL1.GNA78M"/>
268 </assert_contents>
269 </output>
270 </test>
271 <test expect_num_outputs="2" >
272 <!-- Estimate Proportions: no disease factor, no fitting reports -->
273 <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" />
274 <param name="scrna_eset" value="EMTABesethealthy.subset.rds" />
275 <conditional name="do" >
276 <param name="method" value="estimateprops" />
277 <param name="celltypes_label" value="cellType" />
278 <param name="samples_label" value="sampleID" />
279 <param name="disease_factor" value="no" />
280 </conditional>
281 <output name="out_pdf" value="default_output_no_disease.pdf" compare="sim_size" />
282 </test>
283 <test expect_num_outputs="3" >
284 <!-- Estimate Proportions: no disease factor -->
285 <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" />
286 <param name="scrna_eset" value="EMTABesethealthy.subset.rds" />
287 <conditional name="do" >
288 <param name="method" value="estimateprops" />
289 <param name="celltypes_label" value="cellType" />
290 <param name="samples_label" value="sampleID" />
291 <param name="disease_factor" value="no" />
292 </conditional>
293 <output name="out_pdf" value="default_output_no_disease.pdf" compare="sim_size" />
294 <output_collection name="summaries" count="5">
295 <element name="Log of MuSiC fitting" ftype="txt">
296 <assert_contents>
297 <has_text text="Residual standard error: 0.1734 on 72 degrees of freedom"/>
298 </assert_contents>
299 </element>
300 <element name="Log of NNLS fitting" ftype="txt">
301 <assert_contents>
302 <has_text text="Residual standard error: 0.2687 on 72 degrees of freedom"/>
303 </assert_contents>
304 </element>
305 </output_collection>
306 </test>
307 <test expect_num_outputs="3" >
308 <!-- Estimate Proportions test -->
309 <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" />
310 <param name="scrna_eset" value="EMTABesethealthy.subset.rds" />
311 <conditional name="do" >
312 <param name="method" value="estimateprops" />
313 <param name="celltypes_label" value="cellType" />
314 <param name="samples_label" value="sampleID" />
315 <param name="celltypes" value="alpha,beta,delta,gamma,acinar,ductal" />
316 <conditional name="disease_factor" >
317 <param name="use" value="yes" />
318 <<<<<<< HEAD
319 <param name="phenotype_scrna_target" value="beta" />
320 =======
321 >>>>>>> 768a6e5b (v3 update:)
322 <param name="phenotype_factors" value="age,bmi,hba1c,gender" />
323 <param name="phenotype_target" value="hba1c" />
324 <param name="phenotype_target_threshold" value="6.5" />
325 <param name="sample_disease_group" value="T2D" />
326 <param name="sample_disease_group_scale" value="5" />
327 <<<<<<< HEAD
328 =======
329 <param name="compare_title" value="HbA1c vs Beta Cell Type Proportion" />
330 >>>>>>> 768a6e5b (v3 update:)
331 </conditional>
332 </conditional>
333 <output name="out_pdf" value="default_output.pdf" compare="sim_size" />
334 <output_collection name="summaries" count="5">
335 <element name="Log of MuSiC fitting" ftype="txt">
336 <assert_contents>
337 <has_text text="Residual standard error: 0.1704 on 72 degrees of freedom"/>
338 </assert_contents>
339 </element>
340 <element name="Log of NNLS fitting" ftype="txt">
341 <assert_contents>
342 <has_text text="Residual standard error: 0.0645 on 72 degrees of freedom"/>
343 </assert_contents>
344 </element>
345 </output_collection>
346 </test>
347 </tests>
348 <help><![CDATA[
349 MuSiC utilizes cell-type specific gene expression from single-cell RNA sequencing (RNA-seq) data to characterize cell type compositions from bulk RNA-seq data in complex tissues. By appropriate weighting of genes showing cross-subject and cross-cell consistency, MuSiC enables the transfer of cell type-specific gene expression information from one dataset to another.
350
351 Solid tissues often contain closely related cell types which leads to collinearity. To deal with collinearity, MuSiC employs a tree-guided procedure that recursively zooms in on closely related cell types. Briefly, we first group similar cell types into the same cluster and estimate cluster proportions, then recursively repeat this procedure within each cluster.
352
353 ]]></help>
354 <citations>
355 <citation type="doi">https://doi.org/10.1038/s41467-018-08023-x</citation>
356 </citations>
357 </tool>