comparison align_cluster.xml @ 0:c05c83b3ef0f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tools/GraphClust/AlignCluster commit 4406735e44aba20859c252be39f4e99df28c7a92
author rnateam
date Sat, 27 Oct 2018 13:21:39 -0400
parents
children 953353eacec2
comparison
equal deleted inserted replaced
-1:000000000000 0:c05c83b3ef0f
1 <tool id="graphclust_align_cluster" name="Align GraphClust cluster " version="0.1" >
2 <description>structural alignment and conservation analysis of predicted clusters</description>
3 <requirements>
4 <requirement type="package" version="0.6.0">graphclust-wrappers</requirement>
5 <requirement type="package" version='0.5'>perl-array-utils</requirement>
6 <requirement type="package" version='0.18.1'>scikit-learn</requirement>
7 <requirement type="package" version='1.8.10'>locarna</requirement>
8 <requirement type="package" version='2.1'>rnaz</requirement>
9 <requirement type="package" version="1.1.2">infernal</requirement>
10 <requirement type="package" version='2.2.10'>viennarna</requirement>
11 <requirement type="package" version='1.3.30'>graphicsmagick</requirement>
12 <requirement type="package" version='0.6.1'>rscape</requirement>
13 <requirement type="package" version='6.0'>unzip</requirement>
14 <requirement type="package" version='0.1'>evofold2</requirement>
15 <requirement type="package" version='1.70'>biopython</requirement>
16 <requirement type="package" version='0.23.0'>pandas</requirement>
17 </requirements>
18 <command detect_errors="aggressive">
19 <![CDATA[
20 #set $alignment_clustal = 'cluster.aln'
21 mkdir ./RESULTS/ &&
22 'gc_align_clusters.pl'
23 '$cluster_all_fa'
24 ./RESULTS/ C1
25 $results_top_num
26
27 "
28 #if str($advanced_opts.advanced_opts_selector) == "show":
29 #if str($advanced_opts.param_type.param_type_selector) == "gclust":
30 $advanced_opts.param_type.p
31 $advanced_opts.param_type.max_diff_am
32 $advanced_opts.param_type.max_diff
33 $advanced_opts.param_type.tau
34 $advanced_opts.param_type.struct_weight
35 $advanced_opts.param_type.indel_opening
36 $advanced_opts.param_type.indel
37 $advanced_opts.param_type.alifold_consensus_dp
38 #end if
39 #end if
40 "
41 &&
42 R-scape --outdir ./RESULTS/ result.aln.sth &&
43 RNAz --locarnate '$alignment_clustal' > alignment.rnaz.out &&
44 #if str($genomic_cons_opts.genomic_cons_opts_selector) != "no":
45 clustal_to_alma.py '$alignment_clustal' $cluster_all &&
46 EvoFoldV2.sh -o alignment.evofold.out '${alignment_clustal}.ama'
47 #if str($genomic_cons_opts.genomic_cons_opts_selector) == "hg38_100way":
48 '$__tool_directory__/hg38.100way.nh'
49 #elif str($genomic_cons_opts.genomic_cons_opts_selector) == "hg38_20way":
50 '$__tool_directory__/hg38.20way.nh'
51 #else
52 '$alignment_clustal'.ama '$genomic_cons_opts.phylo_tree'
53 #end if
54 &&
55 #else
56 touch alignment.evofold.out &&
57 #end if
58
59 extract_conservation_metrics.py
60 '$alignment_clustal' alignment.rnaz.out RESULTS/result.aln.sum alignment.evofold.out
61 '$cluster_all'
62 #if str($bed_opts.bed_opts_selector) == "yes":
63 '$bed_opts.transcript_loci_fasta'
64 '$bed_opts.transcript_loci_bed'
65 '$bed_opts.genome_version'
66 #else
67 '' '' ''
68 #end if
69 conservation_metrics.tsv
70
71 ]]>
72 </command>
73 <inputs>
74 <param type="data" name="cluster_all_fa" label="cluster-sequences-sorted" format="fasta"
75 help="Cluster sequences from the collect-result step"/>
76 <param type="data" name="cluster_all" label="cmsearch-results" format="txt"
77 help="Tabular cmsearch results of the cluster-collection step (CLUSTERS-cmsearch)"/>
78 <param name="results_top_num" type="integer" value="5" size="5" label="results_top_num"
79 help="Top number of cmsearch hit sequences are used for structural alignment and metrics"/>
80
81 <conditional name="genomic_cons_opts">
82 <param type="select" name="genomic_cons_opts_selector" label="Phylo tree for evofold"
83 help="For MAF genomic alignments as input data, corresponding phylo tree is required to perform Evofold2 structural conservation predcition. For other genomic alignemnts, please upload the tree file.">
84 <option value="no">No</option>
85 <option value="hg38_100way">hg38-100way</option>
86 <option value="hg38_20way">hg38-20way</option>
87 <option value="user_phylo_tree">from history</option>
88 </param>
89 <when value="no" />
90 <when value="hg38_100way" />
91 <when value="hg38_20way" />
92 <when value="user_phylo_tree">
93 <param format="newick" name="phylo_tree" type="data" label="Newick phylo-tree" />
94 </when>
95 </conditional>
96 <conditional name="bed_opts">
97 <param type="select" name="bed_opts_selector" label="Extract genomic coordinates of the clusters" >
98 <option value="no">No</option>
99 <option value="yes">Yes</option>
100 </param>
101 <when value="no" />
102 <when value="yes" >
103 <param format="fasta" name="transcript_loci_fasta" type="data" label="Loci reference sequence"
104 help="Sequence of reference species (human) for the genomic loci (precursor mRNA/lncRNA) to locate the clusters" />
105 <param name='transcript_loci_bed' type="text" value="chr1 0 100000 gene 0 +"
106 help="bed entry string of the reference transcript loci for bed/ucsc-track output. 'chrom start end name score strand' white-space separated.">
107 <validator type="regex" message="one-line bed string must have at least 6 entries as BED6 format (chrom start end name 0 strand)
108 (space and tab allowed)">^\S+\s+[0-9]+\s+[0-9]+\s+\S+\s+\S+\s+[-+]\s*$</validator>
109 </param>
110 <param name='genome_version' value="hg38" type="text" label="reference genome assembly"
111 help="ucsc reference genome assembly version used in the input MAF alignments (e.g. hg38, hg19, mm10). To identify the reference in clusters">
112 <validator type="regex" message="ucsc genome assembly version has an alphabet prefix and a number suffix">^[a-zA-Z]+[0-9]+$</validator>
113 </param>
114 </when>
115 </conditional>
116 <conditional name="advanced_opts">
117 <param name="advanced_opts_selector" type="select" label="Locarna alignment options">
118 <option value="hide" selected="True">Hide</option>
119 <option value="show">Show</option>
120 </param>
121 <when value="hide"></when>
122 <when value="show">
123 <conditional name="param_type">
124 <param name="param_type_selector" type="select" label="Choose the type of parameters">
125 <option value="locarna">LocARNA defaults</option>
126 <option value="gclust" selected="True">GrapClust defaults(changeable)</option>
127 </param>
128 <when value="gclust">
129 <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/>
130 <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/>
131 <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/>
132 <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/>
133 <param name="struct_weight" argument="struct-weight"
134 label="Structure weight" type="integer"
135 value="180" min="0" max="800" />
136 <param name="indel_opening" argument="indel-opening"
137 label="Indel opening score" type="integer"
138 value="-400" max="0" min="-1500" />
139 <param argument="indel" label="Indel score" type="integer"
140 value="-200" min="-1000" max="0" />
141 <param name="alifold_consensus_dp"
142 type="boolean" checked="True"
143 truevalue="--alifold-consensus-dp" falsevalue=" "
144 label="Compute consensus dot plot by alifold" />
145 </when>
146 <when value="locarna">
147 </when>
148 </conditional>
149 </when>
150 </conditional>
151 </inputs>
152 <outputs>
153 <data name="alignment_ps" format="data" from_work_dir="cluster.aln.ps" label="alignment.ps" />
154 <data name="alignment_png" format="png" from_work_dir="cluster.aln.png" label="alignment.png" />
155 <data name="structure_ps" format="data" from_work_dir="cluster.alirna.ps" label="structure.ps" />
156 <data name="structure_png" format="png" from_work_dir="cluster.alirna.png" label="structure.png" />
157 <data name="alignment_sth" format="stockholm" from_work_dir="result.aln.sth" label="alignment.sth" />
158 <data name="alignment_clustal_out" format="clustal" from_work_dir="cluster.aln" label="alignment.clustal" />
159 <data name="Rscape_R2R" format="pdf" from_work_dir="RESULTS/result.aln_1.R2R.sto.pdf" label="Rscape-R2R" />
160 <data name="rnaz_out" format="txt" from_work_dir="alignment.rnaz.out" label="rnaz.out" />
161 <data name="rscape_out" format="txt" from_work_dir="alignment.sum" label="Rscape.summary" />
162 <data name="evofold_out" format="txt" from_work_dir="alignment.evofold.out" label="evofold.out" />
163 <data name="cons_tsv" format="tabular" from_work_dir="conservation_metrics.tsv" label="conservation_metrics.tsv" />
164 </outputs>
165 <tests>
166 <test>
167 <param name="cluster_all_fa" value="cluster1.all.fa"/>
168 <param name="cluster_all" value="cluster1.all"/>
169 <output name="alignment_ps" file="alignment1.ps" compare="sim_size" />
170 <output name="alignment_png" file="alignment1.png" compare="sim_size" />
171 <output name="structure_ps" file="structure1.ps" compare="sim_size" />
172 <output name="structure_png" file="structure1.png" compare="sim_size" />
173 <output name="alignment_sth" file="alignment1.sth" compare="sim_size" delta="100" />
174 <output name="alignment_clustal_out" file="alignment1.clustal" />
175 <output name="Rscape_R2R" file="alignment1_R2R.sto.pdf" compare="sim_size" />
176 <output name="rnaz_out" file="alignment1.rnaz.out" compare="sim_size" delta="10"/>
177 <output name="evofold_out" file="alignment1.evofold.out" />
178 <output name="cons_tsv" file="alignment1.cons.tsv" compare="sim_size" delta="0"/>
179 <conditional name="genomic_cons_opts">
180 <param name="genomic_cons_opts_selector" value="no"/>
181 </conditional>
182 <param name="results_top_num" value="5"/>
183
184 </test>
185
186 <test>
187 <param name="cluster_all_fa" value="cluster2.all.fa"/>
188 <param name="cluster_all" value="cluster2.all"/>
189 <output name="alignment_png" file="alignment2.png" compare="sim_size" />
190 <output name="structure_png" file="structure2.png" compare="sim_size" />
191 <output name="alignment_sth" file="alignment2.sth" compare="sim_size" delta="100" />
192 <output name="alignment_clustal_out" file="alignment2.clustal" />
193 <output name="rnaz_out" file="alignment2.rnaz.out" />
194 <output name="evofold_out" file="alignment2.evofold.out" />
195 <output name="cons_tsv" file="alignment2.cons.tsv" />
196 <conditional name="genomic_cons_opts">
197 <param name="genomic_cons_opts_selector" value="hg38_20way"/>
198 </conditional>
199 </test>
200
201 <test>
202 <param name="cluster_all_fa" value="cluster2.all.fa"/>
203 <param name="cluster_all" value="cluster2.all"/>
204 <output name="cons_tsv" file="alignment2b.cons.tsv" />
205 <conditional name="genomic_cons_opts">
206 <param name="genomic_cons_opts_selector" value="hg38_20way"/>
207 </conditional>
208 <conditional name="bed_opts">
209 <param name="bed_opts_selector" value="yes"/>
210 <param name="transcript_loci_fasta" value="alignment2b-transcript.fa"/>
211 <param name="transcript_loci_bed" value="chr7 27162434 27166719 Hoxa9-Xtend5UTR 2 -"/>
212 </conditional>
213 </test>
214 </tests>
215 <help>
216 <![CDATA[
217 Align predicted clusters of glob_report_no_align step with locarna and conservation analysis and visualizations.
218
219 ]]>
220 </help>
221 <citations>
222 <citation type="doi">10.5281/zenodo.597695</citation>
223 </citations>
224 </tool>