comparison music-deconvolution.xml @ 0:224721e76869 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit 08c6fd3885bdfbf8b5c3f4dcc2d04729b577e3e1"
author bgruening
date Sun, 12 Sep 2021 19:48:48 +0000
parents
children 3ca0132c182a
comparison
equal deleted inserted replaced
-1:000000000000 0:224721e76869
1 <tool id="music_deconvolution" name="MuSiC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
2 profile="20.05" license="GPL-3.0-or-later" >
3 <description>estimate cell type proportions in bulk RNA-seq data</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <command detect_errors="exit_code"><![CDATA[
9 mkdir report_data &&
10 Rscript --vanilla '$__tool_directory__/scripts/${do.method}.R' '$conf'
11 ]]></command>
12 <configfiles>
13 <configfile name="conf" >
14
15 null_str_vec = function(gstr){
16 tokens = unlist(as.vector(strsplit(gstr, split=",")))
17 if (length(tokens) == 0){
18 return(NULL)
19 }
20 if (length(tokens) == 1){
21 return(tokens[[1]])
22 }
23 return(tokens)
24 }
25
26 bulk_eset = readRDS('$bulk_eset')
27 scrna_eset = readRDS('$scrna_eset')
28
29 #if str($do.method) == "estimateprops":
30
31 phenotype_factors = null_str_vec('$do.phenotype_factors')
32 celltypes_label = null_str_vec('$do.celltypes_label')
33 samples_label = null_str_vec('$do.samples_label')
34 celltypes = null_str_vec('$do.celltypes')
35 methods = null_str_vec('$do.methods')
36 phenotype_gene = null_str_vec('$do.phenotype_gene')
37 sample_groups = null_str_vec('$do.sample_groups')
38 sample_disease_group = null_str_vec('$do.sample_disease_group')
39 sample_disease_group_scale = as.integer('$do.sample_disease_group_scale')
40 healthy_phenotype = null_str_vec('$do.healthy_phenotype')
41 compare_title = null_str_vec('$do.compare_title')
42 outfile_pdf='$out_pdf'
43
44 #elif str($do.method) == "dendrogram":
45
46 celltypes_label = null_str_vec('$do.celltypes_label')
47 clustertype_label = null_str_vec('$do.clustertype_label')
48 samples_label = null_str_vec('$do.samples_label')
49 celltypes = null_str_vec('$do.celltypes')
50
51 data.to.use = list(
52 #for $i, $repeat in enumerate( $do.cluster_groups )
53 #if $i == 0:
54 $repeat.cluster_id = list(cell.types = null_str_vec('$repeat.celltypes'),
55 marker.names = null_str_vec('$repeat.marker_name'),
56 marker.list = read_list('$repeat.marker_list'))
57 #else
58 , $repeat.cluster_id = list(cell.types = null_str_vec('$repeat.celltypes'),
59 marker.names = null_str_vec('$repeat.marker_name'),
60 marker.list = read_list('$repeat.marker_list'))
61 #end if
62 #end for
63 )
64
65 outfile_pdf='$out_pdf'
66 outfile_tab='$out_tab'
67
68 #else
69 stop("No such option")
70 #end if
71
72 </configfile>
73 </configfiles>
74 <inputs>
75 <param name="scrna_eset" label="scRNA Dataset" type="data" format="rdata.eset" />
76 <param name="bulk_eset" label="Bulk RNA Dataset" type="data" format="rdata.eset" />
77 <conditional name="do" >
78 <param name="method" type="select" label="Purpose" >
79 <!-- The values here correspond to script names in the scripts folder
80 and must remain so -->
81 <option value="estimateprops">Estimate Proportions</option>
82 <option value="dendrogram">Compute Dendrogram</option>
83 </param>
84 <when value="estimateprops" >
85 <param name="celltypes_label" type="text" value="cellType"
86 label="Cell Types Label from scRNA dataset" >
87 <expand macro="validator_text" />
88 </param>
89 <param name="samples_label" type="text" value="sampleID"
90 label="Samples Identifier from scRNA dataset" >
91 <expand macro="validator_text" />
92 </param>
93 <expand macro="celltypes_macro" />
94 <param name="methods" multiple="true" type="select" display="checkboxes" label="Cell Proportion Method" >
95 <option value="MuSiC" selected="true" />
96 <option value="NNLS" selected="true" />
97 </param>
98 <param name="phenotype_factors" type="text"
99 label="List of phenotypes factors" help="If blank, then use all phenotypes." >
100 <expand macro="validator_index_identifiers" />
101 </param>
102 <param name="phenotype_gene" type="text" label="Causative Gene"
103 help="MUST exist in the phenotype factors above." >
104 <expand macro="validator_text" />
105 </param>
106 <param name="sample_groups" type="text" label="List of Sample Groups" >
107 <expand macro="validator_index_identifiers" />
108 </param>
109 <param name="sample_disease_group" type="text" label="Sample Disease Group"
110 help="MUST exist in the sample_groups above." >
111 <expand macro="validator_text" />
112 </param>
113 <param name="sample_disease_group_scale" type="integer"
114 label="Sample Disease Group (Scale)" value="5"
115 help="Used to accentutate certain features in the plots. Increase this number to reduce the effect." />
116 <param name="healthy_phenotype" type="text" label="Healthy Phenotype" >
117 <expand macro="validator_text" />
118 </param>
119 <param name="compare_title" type="text" label="Plot Title" >
120 <expand macro="validator_text" />
121 </param>
122 </when>
123 <when value="dendrogram" >
124 <param name="celltypes_label" type="text" value="cellType"
125 label="Cell Types Label from scRNA dataset" >
126 <expand macro="validator_text" />
127 </param>
128 <param name="clustertype_label" type="text" value="clusterType"
129 label="Cell Types Label from scRNA dataset" >
130 <expand macro="validator_text" />
131 </param>
132 <param name="samples_label" type="text" value="sampleID"
133 label="Samples Identifier from scRNA dataset" >
134 <expand macro="validator_text" />
135 </param>
136 <expand macro="celltypes_macro" />
137 <repeat name="cluster_groups" title="Cluster Groups" min="2" >
138 <param name="cluster_id" label="Cluster ID" type="text" value=""
139 help="e.g. C1 or Cluster1, etc." />
140 <expand macro="celltypes_macro" />
141 <param name="marker_name" label="Marker Gene Group Name" type="text"
142 optional="true" value=""
143 help="Name of the list of geme markers used to describe the marker list supplied below." >
144 <expand macro="validator_text" />
145 </param>
146 <param name="marker_list" label="List of Gene Markers" type="data" format="txt,tabular"
147 optional="true"
148 help="A single column of marker genes" />
149 </repeat>
150 </when>
151 </conditional>
152 </inputs>
153 <outputs>
154 <data name="out_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF Plots" />
155 <data name="out_tab" format="tabular" label="${tool.name} on ${on_string}: Cell Proportions by Sample" >
156 <filter>do["method"] == "dendrogram"</filter>
157 </data>
158 <collection name="summaries" type="list" label="${tool.name} on ${on_string}: Method Summaries">
159 <filter>do["method"] == "estimateprops"</filter>
160 <discover_datasets pattern="summ_(?P&lt;designation&gt;.+)\.txt" format="txt"
161 directory="report_data" />
162 </collection>
163 </outputs>
164 <tests>
165 <test expect_num_outputs="2" >
166 <!-- Dendrogram test -->
167 <param name="bulk_eset" value="Mousebulkeset.rds" />
168 <param name="scrna_eset" value="Mousesubeset.degenesonly2.half.rds" />
169 <conditional name="do" >
170 <param name="method" value="dendrogram" />
171 <param name="celltypes_label" value="cellType" />
172 <param name="samples_label" value="sampleID" />
173 <param name="celltypes" value="Endo,Podo,PT,LOH,DCT,CD-PC,CD-IC,Fib,Macro,Neutro,B lymph,T lymph,NK" />
174 <repeat name="cluster_groups" >
175 <param name="cluster_id" value="C1" />
176 <param name="celltypes" value="Neutro" />
177 </repeat>
178 <repeat name="cluster_groups" >
179 <param name="cluster_id" value="C2" />
180 <param name="celltypes" value="Podo" />
181 </repeat>
182 <repeat name="cluster_groups" >
183 <param name="cluster_id" value="C3" />
184 <param name="celltypes" value="Endo,CD-PC,LOH,CD-IC,DCT,PT" />
185 <param name="marker_name" value="Epithelial" />
186 <param name="marker_list" value="epith.markers" />
187 </repeat>
188 <repeat name="cluster_groups" >
189 <param name="cluster_id" value="C4" />
190 <param name="celltypes" value="Macro,Fib,B lymph,NK,T lymph" />
191 <param name="marker_name" value="Immune" />
192 <param name="marker_list" value="immune.markers" />
193 </repeat>
194 </conditional>
195 <output name="out_pdf" value="dendro.pdf" compare="sim_size" />
196 <output name="out_tab">
197 <assert_contents>
198 <has_text_matching expression="^\s+Est\.prop\.weighted\.cluster\.Neutro\s+Est\.prop\.weighted\.cluster\.Podo\s+Est\.prop\.weighted\.cluster\.Endo" />
199 <has_text text="APOL1.GNA78M"/>
200 </assert_contents>
201 </output>
202 </test>
203 <test expect_num_outputs="2" >
204 <!-- Estimate Proportions test -->
205 <param name="bulk_eset" value="GSE50244bulkeset.subset.rds" />
206 <param name="scrna_eset" value="EMTABesethealthy.subset.rds" />
207 <conditional name="do" >
208 <param name="method" value="estimateprops" />
209 <param name="celltypes_label" value="cellType" />
210 <param name="samples_label" value="sampleID" />
211 <param name="celltypes" value="alpha,beta,delta,gamma,acinar,ductal" />
212 <param name="methods" value="MuSiC,NNLS" />
213 <param name="phenotype_factors" value="age,bmi,hba1c,gender" />
214 <param name="phenotype_gene" value="hba1c" />
215 <param name="sample_groups" value="Normal,T2D" />
216 <param name="sample_disease_group" value="T2D" />
217 <param name="sample_disease_group_scale" value="5" />
218 <param name="healthy_phenotype" value="Normal" />
219 <param name="compare_title" value="HbA1c vs Beta Cell Type Proportion" />
220 </conditional>
221 <output name="out_pdf" value="default_output.pdf" compare="sim_size" />
222 <output_collection name="summaries" count="2">
223 <element name="MuSiC" ftype="txt">
224 <assert_contents>
225 <has_text text="Residual standard error: 0.1662 on 72 degrees of freedom"/>
226 </assert_contents>
227 </element>
228 <element name="NNLS" ftype="txt">
229 <assert_contents>
230 <has_text text="Residual standard error: 0.06561 on 72 degrees of freedom"/>
231 </assert_contents>
232 </element>
233 </output_collection>
234 </test>
235 </tests>
236 <help><![CDATA[
237 MuSiC utilizes cell-type specific gene expression from single-cell RNA sequencing (RNA-seq) data to characterize cell type compositions from bulk RNA-seq data in complex tissues. By appropriate weighting of genes showing cross-subject and cross-cell consistency, MuSiC enables the transfer of cell type-specific gene expression information from one dataset to another.
238
239 Solid tissues often contain closely related cell types which leads to collinearity. To deal with collinearity, MuSiC employs a tree-guided procedure that recursively zooms in on closely related cell types. Briefly, we first group similar cell types into the same cluster and estimate cluster proportions, then recursively repeat this procedure within each cluster.
240
241 .. image:: https://xuranw.github.io/MuSiC/articles/images/FigureMethod.jpg
242 ]]></help>
243 <citations>
244 <citation type="doi">https://doi.org/10.1038/s41467-018-08023-x</citation>
245 </citations>
246 </tool>