Mercurial > repos > rnateam > graphclust_postprocessing
comparison glob_report.xml.orig @ 17:f93c868203cc draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust/CollectResults commit 4406735e44aba20859c252be39f4e99df28c7a92
author | rnateam |
---|---|
date | Sat, 27 Oct 2018 13:23:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
16:79df97a1bc0f | 17:f93c868203cc |
---|---|
1 <<<<<<< HEAD | |
2 <tool id="glob_report" name="cluster_collection_report" version="0.4" > | |
3 ======= | |
4 <tool id="glob_report" name="cluster_collection_report" version="0.3" > | |
5 >>>>>>> edc317491e1fdf1233bd9b45376dc05abf6eabd5 | |
6 <requirements> | |
7 <requirement type="package" version="0.5.2">graphclust-wrappers</requirement> | |
8 <requirement type="package" version='0.5'>perl-array-utils</requirement> | |
9 <requirement type="package" version='0.18.1'>scikit-learn</requirement> | |
10 <requirement type="package" version='1.8.10'>locarna</requirement> | |
11 <requirement type="package" version='2.1'>rnaz</requirement> | |
12 <requirement type="package" version="1.1.2">infernal</requirement> | |
13 <requirement type="package" version='2.2.10'>viennarna</requirement> | |
14 <requirement type="package" version='1.3.26'>graphicsmagick</requirement> | |
15 <requirement type="package" version='0.6.1'>rscape</requirement> | |
16 <requirement type="package" version='6.0'>unzip</requirement> | |
17 | |
18 </requirements> | |
19 <stdio> | |
20 <exit_code range="1:" /> | |
21 </stdio> | |
22 <command> | |
23 <![CDATA[ | |
24 unzip $FASTA &> /dev/null && | |
25 | |
26 mkdir ./CMSEARCH && | |
27 mkdir ./MODEL && | |
28 | |
29 #set $inputFiles = "" | |
30 | |
31 #for $cms_res in $cmsearch_results: | |
32 ###set $inputFiles += str($cms_res.element_identifier)+',' | |
33 ln -f -s '$cms_res' ./CMSEARCH/$cms_res.element_identifier && | |
34 #end for | |
35 #set $inputFiles = $inputFiles[:-1] | |
36 | |
37 #set $inputFilesTrees = "" | |
38 | |
39 #for $mods in $model_tree_files: | |
40 ###set $inputFilesTrees += str($mods.element_identifier)+',' | |
41 ln -f -s '$mods' ./MODEL/$mods.element_identifier && | |
42 #end for | |
43 #set $inputFilesTrees = $inputFilesTrees[:-1] | |
44 | |
45 | |
46 'glob_res.pl' | |
47 ##'$inputFiles' | |
48 $merge_cluster_ol | |
49 $merge_overlap | |
50 $min_cluster_size | |
51 $cm_min_bitscore | |
52 $cm_max_eval | |
53 $cm_bitscore_sig | |
54 $partition_type '' | |
55 $cut_type | |
56 ##'$inputFilesTrees' | |
57 $results_top_num | |
58 #if $iteration_num.iteration_num_selector: | |
59 $iteration_num.CI | |
60 $final_partition_soft | |
61 $final_partition_used_cmsearch | |
62 '$combined_cm' | |
63 | |
64 #end if | |
65 | |
66 #if str($advanced_opts.advanced_opts_selector) == "show": | |
67 #if str($advanced_opts.param_type.param_type_selector) == "gclust": | |
68 $advanced_opts.param_type.p | |
69 $advanced_opts.param_type.max_diff_am | |
70 $advanced_opts.param_type.max_diff | |
71 $advanced_opts.param_type.tau | |
72 $advanced_opts.param_type.struct_weight | |
73 $advanced_opts.param_type.indel_opening | |
74 $advanced_opts.param_type.indel | |
75 $advanced_opts.param_type.alifold_consensus_dp | |
76 #end if | |
77 #end if | |
78 | |
79 && | |
80 <<<<<<< HEAD | |
81 python '$__tool_directory__/evaluation.py' FASTA/ RESULTS/ | |
82 ======= | |
83 python '$__tool_directory__/evaluation.py' | |
84 >>>>>>> edc317491e1fdf1233bd9b45376dc05abf6eabd5 | |
85 | |
86 #if $cdhit: | |
87 && | |
88 python '$__tool_directory__/addCdhitseqs.py' '$cdhit' | |
89 #end if | |
90 ]]> | |
91 </command> | |
92 <inputs> | |
93 <param type="data" name="FASTA" format="zip" /> | |
94 <param type="data" name="cmsearch_results" format="tabular" multiple="True"/> | |
95 <param type="data" name="model_tree_files" format="txt" multiple="True"/> | |
96 <param name="partition_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Hard partition"/> | |
97 <param name="cut_type" type="boolean" checked="True" truevalue="0" falsevalue="1" label="Use CM score for cutoff" help="otherwise use E-value"/> | |
98 <param type="data" name="cdhit" format="txt" optional="true"/> | |
99 <conditional name="iteration_num"> | |
100 <param name="iteration_num_selector" type="boolean" checked="no" label="Multiple iterations" help="for single iteration- NO, for multiple-YES"/> | |
101 <when value="true"> | |
102 <param name="CI" type="integer" value="2" size="5" label="Number of current iteration "/> | |
103 <param type="data" name="final_partition_soft" format="txt" /> | |
104 <param type="data" name="final_partition_used_cmsearch" format="txt" /> | |
105 <param type="data" name="combined_cm" format="txt" /> | |
106 </when> | |
107 <when value="false" ></when> | |
108 </conditional> | |
109 <param name="merge_cluster_ol" type="float" value="0.66" size="5" label="merge_cluster_ol" help=""/> | |
110 <param name="merge_overlap" type="float" value="0.51" size="5" label="merge_overlap" help=""/> | |
111 <param name="min_cluster_size" type="integer" value="3" size="5" label="min_cluster_size" help=""/> | |
112 <param name="cm_min_bitscore" type="integer" value="20" size="5" label="cm_min_bitscore" help=""/> | |
113 <param name="cm_max_eval" type="float" value="0.001" size="5" label="cm_max_eval" help=""/> | |
114 <param name="cm_bitscore_sig" type="integer" value="1" size="5" label="cm_bitscore_sig" help=""/> | |
115 <param name="results_top_num" type="integer" value="5" size="5" label="results_top_num" help=""/> | |
116 | |
117 <conditional name="advanced_opts"> | |
118 <param name="advanced_opts_selector" type="select" label="Advanced Options"> | |
119 <option value="hide" selected="True">Hide</option> | |
120 <option value="show">Show</option> | |
121 </param> | |
122 <when value="hide"></when> | |
123 <when value="show"> | |
124 | |
125 <conditional name="param_type"> | |
126 <param name="param_type_selector" type="select" label="Choose the type of parameters"> | |
127 <option value="locarna">LocARNA defaults</option> | |
128 <option value="gclust" selected="True">GrapClust defaults(changeable)</option> | |
129 </param> | |
130 <when value="gclust"> | |
131 | |
132 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/> | |
133 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/> | |
134 <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/> | |
135 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/> | |
136 | |
137 <param name="struct_weight" argument="struct-weight" | |
138 label="Structure weight" type="integer" | |
139 value="180" min="0" max="800" /> | |
140 <param name="indel_opening" argument="indel-opening" | |
141 label="Indel opening score" type="integer" | |
142 value="-400" max="0" min="-1500" /> | |
143 <param argument="indel" label="Indel score" type="integer" | |
144 value="-200" min="-1000" max="0" /> | |
145 | |
146 <param name="alifold_consensus_dp" | |
147 type="boolean" checked="True" | |
148 truevalue="--alifold-consensus-dp" falsevalue=" " | |
149 label="Compute consensus dot plot by alifold" /> | |
150 | |
151 </when> | |
152 <when value="locarna"> | |
153 </when> | |
154 </conditional> | |
155 | |
156 </when> | |
157 </conditional> | |
158 | |
159 </inputs> | |
160 <outputs> | |
161 <data name="final_stats" format="txt" from_work_dir="RESULTS/cluster.final.stats" label="cluster.final.stats" /> | |
162 <data name="tableForEval" format="tabular" from_work_dir="RESULTS/fullTab.tabular" label="tableForEval" /> | |
163 <data name="final_soft" format="txt" from_work_dir="RESULTS/partitions/final_partition.soft" label="soft_part" /> | |
164 <data name="final_used_cmsearch" format="txt" from_work_dir="RESULTS/partitions/final_partition.used_cmsearch" label="final_partition_used_cmsearch" /> | |
165 <data name="evaluation" format="txt" from_work_dir="RESULTS/evaluation.txt" label="evaluation_of_clusters" /> | |
166 <data name="combined_cm_out" format="txt" from_work_dir="combined_cm_out" label="combined_cmsearch_output" /> | |
167 <collection name="clusters" type="list" label="CLUSTERS"> | |
168 <discover_datasets pattern="(?P<name>^.*\.all$)" directory="RESULTS" /> | |
169 </collection> | |
170 <collection name="partitions" type="list" label="Partitions"> | |
171 <discover_datasets pattern="(?P<name>^.*$)" directory="RESULTS/partitions" /> | |
172 </collection> | |
173 <collection name="topSecondaryStruct" type="list" label="Top $results_top_num alirna.ps"> | |
174 <discover_datasets format="png" pattern="(?P<name>^.*\.alirna.png$)" /> | |
175 </collection> | |
176 <collection name="topDot" type="list" label="Top $results_top_num aln.ps"> | |
177 <discover_datasets format="png" pattern="(?P<name>^.*\.aln.png$)" /> | |
178 </collection> | |
179 <collection name="rscapePlot" type="list" label="R-scape Plot"> | |
180 <discover_datasets format="pdf" pattern="(?P<name>^.*\.pdf$)" /> | |
181 </collection> | |
182 <data name="RESULTS_zip" format="zip" from_work_dir="RESULTS.zip" label="RESULTS.zip" /> | |
183 </outputs> | |
184 <tests> | |
185 <test> | |
186 <param name="FASTA" value="FASTA.zip" ftype="searchgui_archive"/> | |
187 <param name="cmsearch_results" value="1.1.tree,1.2.tree"/> | |
188 <param name="model_tree_files" value="1.1.model.tree.fa,1.2.model.tree.fa"/> | |
189 <param name="combined_cm_out" value="combined_cm_out"/> | |
190 <param name="partition_type" value="0"/> | |
191 <param name="cut_type" value="0"/> | |
192 <conditional name="iteration_num"> | |
193 <param name="iteration_num_selector" value="false"/> | |
194 </conditional> | |
195 <param name="merge_cluster_ol" value="0.66"/> | |
196 <param name="merge_overlap" value="0.51"/> | |
197 <param name="min_cluster_size" value="3"/> | |
198 <param name="cm_min_bitscore" value="20"/> | |
199 <param name="cm_max_eval" value="0.001"/> | |
200 <param name="cm_bitscore_sig" value="0"/> | |
201 <output name="final_stats" file="RESULTS/cluster.final.stats" /> | |
202 <output_collection name="clusters" type="list"> | |
203 <element name="1.cluster.all" file="RESULTS/1.cluster.all" compare="contains"/> | |
204 <element name="2.cluster.all" file="RESULTS/2.cluster.all" compare="contains"/> | |
205 | |
206 </output_collection> | |
207 <output_collection name="partitions"> | |
208 <element name="final_overlap.map" file="RESULTS/partitions/final_overlap.map" compare="contains"> | |
209 <assert_contents> | |
210 <has_text text="1.1 1.1 " /> | |
211 <has_text text="1.2 1.2" /> | |
212 </assert_contents> | |
213 </element> | |
214 <element name="final_overlap.matrix" file="RESULTS/partitions/final_overlap.matrix" compare="contains"> | |
215 <assert_contents> | |
216 <has_text text="MODEL CLASS 0 0" /> | |
217 <!--has_text text="1.2" /> | |
218 <has_text text="1.1" /--> | |
219 </assert_contents> | |
220 </element> | |
221 <element name="final_partition.hard.best" file="RESULTS/partitions/final_partition.hard.best" /> | |
222 <element name="final_partition.hard.merged" file="RESULTS/partitions/final_partition.hard.merged" /> | |
223 <element name="final_partition.soft" file="RESULTS/partitions/final_partition.soft" /> | |
224 <element name="final_partition.used_cmsearch" file="RESULTS/partitions/final_partition.used_cmsearch" compare="contains"/> | |
225 </output_collection> | |
226 <param name="results_top_num" value="5"/> | |
227 <output_collection name="topSecondaryStruct" type="list"> | |
228 <element name="1.cluster.top5.alirna.png" file="1.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> | |
229 <element name="2.cluster.top5.alirna.png" file="2.cluster.top5.alirna.png" ftype="png" compare="sim_size" /> | |
230 </output_collection> | |
231 <output_collection name="topDot" type="list"> | |
232 <element name="1.cluster.top5.aln.png" file="1.cluster.top5.aln.png" ftype="png" compare="sim_size" /> | |
233 <element name="2.cluster.top5.aln.png" file="2.cluster.top5.aln.png" ftype="png" compare="sim_size" /> | |
234 </output_collection> | |
235 | |
236 <output_collection name="rscapePlot" type="list"> | |
237 <element name="1.cluster.top5.result.aln_1.R2R.sto.pdf" file="1.cluster.top5.result.aln_1.R2R.sto.pdf" ftype="pdf" compare="sim_size" /> | |
238 <element name="2.cluster.top5.result.aln_1.R2R.sto.pdf" file="2.cluster.top5.result.aln_1.R2R.sto.pdf" ftype="pdf" compare="sim_size" /> | |
239 </output_collection> | |
240 | |
241 <output name="RESULTS_zip" file="RESULTS.zip" ftype="zip" compare="sim_size" delta="20000"/> | |
242 | |
243 </test> | |
244 </tests> | |
245 <help> | |
246 <![CDATA[ | |
247 | |
248 **What it does** | |
249 | |
250 Post-processing. Redundant clusters are merged and instances that belong to multiple clusters | |
251 are assigned unambiguously. For every pair of clusters, the relative overlap (i.e. the fraction of | |
252 instances that occur in both clusters) is computed and clusters are merged if the overlap exceeds 50%. | |
253 Cluster members are finally ranked by their CM bitscore. | |
254 | |
255 ]]> | |
256 </help> | |
257 <citations> | |
258 <citation type="bibtex">@inproceedings{costa2010fast, | |
259 title={Fast neighborhood subgraph pairwise distance kernel}, | |
260 author={Costa, Fabrizio and De Grave, Kurt}, | |
261 booktitle={Proceedings of the 26th International Conference on Machine Learning}, | |
262 pages={255--262}, | |
263 year={2010}, | |
264 organization={Omnipress} | |
265 } | |
266 </citation> | |
267 </citations> | |
268 </tool> |