comparison metagene_annotator.xml @ 0:b04960a7abf5 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/metagene_annotator commit 6d8b6e0fa2f1b47b337dbf21f5bc320586ccbd4c
author galaxyp
date Wed, 21 Mar 2018 17:15:25 -0400
parents
children 17c7ab82bfbc
comparison
equal deleted inserted replaced
-1:000000000000 0:b04960a7abf5
1 <tool id="metagene_annotator" name="MetaGeneAnnotator" version="1.0.0">
2 <description>gene-finding program for prokaryote and phage (used by sixgill)</description>
3 <requirements>
4 <requirement type="package">metagene_annotator</requirement>
5 <requirement type="package">python</requirement>
6 </requirements>
7 <command detect_errors="exit_code"><![CDATA[
8 #set $output_list = str($output_formats).split(',')
9 touch mga_output
10 #for $input in $inputs:
11 && mga ${input} $multiple_species >> mga_output
12 #end for
13 #if 'tsv' in $output_list or 'bed' in $output_list:
14 && python '$__tool_directory__/convert_mga.py' mga_output -v
15 #if 'tsv' in $output_list
16 --tsv '$mga_tsv'
17 #end if
18 #if 'bed' in $output_list
19 --bed '$mga_bed'
20 #end if
21 #end if
22 ]]></command>
23 <inputs>
24 <param name="inputs" type="data" format="fasta" multiple="true" label="prokaryote DNA sequences"/>
25 <param name="multiple_species" type="boolean" truevalue="-m" falsevalue="-s" checked="true"
26 label="MetaGenomic - Sequences are from multiple organisms" />
27 <param name="output_formats" type="select" multiple="true" display="checkboxes" label="output formats">
28 <option value="txt" selected="true">MetaGeneAnnotator text report</option>
29 <option value="tsv">MetaGeneAnnotator tabular report with sequence columns</option>
30 <option value="bed">MetaGeneAnnotator in BED format</option>
31 </param>
32 </inputs>
33 <outputs>
34 <data name="mga_txt" format="txt" from_work_dir="mga_output" label="${tool.name} on ${on_string} metagenefile">
35 <filter>'txt' in output_formats</filter>
36 </data>
37 <data name="mga_tsv" format="tabular" label="${tool.name} on ${on_string} mga table">
38 <filter>'tsv' in output_formats</filter>
39 <actions>
40 <action name="column_names" type="metadata"
41 default="seq_ID,seq_model,seq_gc,seq_rbs,gene ID,start pos,end pos,strand,frame,complete/partial,gene score,used model,rbs start,rbs end,rbs score"/>
42 </actions>
43 </data>
44 <data name="mga_bed" format="bed" label="${tool.name} on ${on_string} mga bed">
45 <filter>'bed' in output_formats</filter>
46 <actions>
47 <action name="column_names" type="metadata"
48 default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts"/>
49 </actions>
50 </data>
51 </outputs>
52 <tests>
53 <test>
54 <param name="inputs" value="metasequences.fasta" ftype="fasta"/>
55 <param name="multiple_species" value="True"/>
56 <param name="output_formats" value="txt"/>
57 <output name="mga_txt">
58 <assert_contents>
59 <has_text_matching expression="# 1/1\s# gc = 0.275862, rbs = -1\s# self: -" />
60 <has_text_matching expression="gene_1\t1812\t1994\t-\t0\t11\t14.10\d+\tb\t2002\t2007\t2.11\d+" />
61 </assert_contents>
62 </output>
63 </test>
64 <test>
65 <param name="inputs" value="metasequences.fasta" ftype="fasta"/>
66 <param name="multiple_species" value="False"/>
67 <param name="output_formats" value="txt"/>
68 <output name="mga_txt">
69 <assert_contents>
70 <has_text_matching expression="# 1/1\s# gc = 0.275862, rbs = 0.428571\s# self: b" />
71 <has_text_matching expression="gene_1\t1812\t1994\t-\t0\t11\t12.48\d+\tb\t2002\t2007\t0.49\d+" />
72 </assert_contents>
73 </output>
74 </test>
75 <!-- Try these later
76 <test>
77 <param name="inputs" value="metasequences1.fasta,metasequences2.fasta" ftype="fasta"/>
78 <param name="multiple_species" value="True"/>
79 <param name="output_formats" value="txt"/>
80 <output name="mga_txt">
81 <assert_contents>
82 <has_text_matching expression="# 1/1.*# 10/1" />
83 <has_text_matching expression="gene_1\t1812\t1994\t-\t0\t11\t14.10\d+\tb\t2002\t2007\t2.11\d+" />
84 </assert_contents>
85 </output>
86 </test>
87 <test>
88 <param name="inputs" value="metasequences.fasta" ftype="fasta"/>
89 <param name="multiple_species" value="True"/>
90 <param name="output_formats" value="txt,tsv,bed"/>
91 <output name="mga_txt">
92 <assert_contents>
93 <has_text_matching expression="# 1/1\s# gc = 0.275862, rbs = -1\s# self: -" />
94 <has_text_matching expression="gene_1\t1812\t1994\t-\t0\t11\t14.10\d+\tb\t2002\t2007\t2.11\d+" />
95 </assert_contents>
96 </output>
97 <output name="mga_tsv">
98 <assert_contents>
99 <has_text_matching expression="#seq_id\tseq_model\tseq_gc\tseq_rbs" />
100 <has_text_matching expression="1/1\t-\t0.27\d+\t-1\tgene_1\t1812\t1994\t-\t0\t11\t14.1035\tb\t2002\t2007\t2.11\d+" />
101 </assert_contents>
102 </output>
103 <output name="mga_bed">
104 <assert_contents>
105 <has_text_matching expression="1/1\t1811\t1994\t1/1:gene_1\t15\t-\t1811\t1994\t0\t1\t183\t0" />
106 </assert_contents>
107 </output>
108 </test>
109 -->
110 </tests>
111 <help><![CDATA[
112 **MetaGeneAnnotator (mga)**
113
114 A gene-finding program for prokaryote and phage.
115
116 The gene annotations can be used by sixgill_ when generating metapeptides from metagenomics shotgun sequencing.
117
118 .. image:: Sixgill_MetaGeneAnnotator_Workflow.png
119 :height: 213
120 :width: 625
121
122 usage:
123 mga [multi-fasta] <-m/-s>
124
125 -m (multiple species sequences are individually treated)
126 -s (single species sequences are treated as a unit)
127
128 **Input:**
129 *A fasta file of metagenomic sequences*
130
131
132 **Outputs:**
133
134 *MetaGeneAnnotator text report*
135 Output from the MetaGeneAnnotator mga application::
136
137 # 1/1
138 # gc = 0.275862, rbs = -1
139 # self: -
140 gene_1 1812 1994 - 0 11 14.1035 b 2002 2007 2.11797
141 # 2/1
142 # gc = 0.338877, rbs = -1
143 # self: -
144 gene_1 1 414 + 0 01 25.748 b . . .
145 gene_2 614 790 + 0 11 0.774142 b . . .
146 gene_3 822 1079 + 0 11 20.6507 b . . .
147
148 output format description::
149
150 # [sequence name]
151 # gc = [gc%], rbs = [rbs%]
152 # self: [(b)acteria/(a)rchaea/(p)hage/unused(-)]
153 [gene ID] [start pos.] [end pos.] [strand] [frame] [complete/partial] [gene score] [used model] [rbs start] [rbs end] [rbs score]
154
155 explanations of output column:
156 *The value of [frame] (0/1/2) indicates the number of surplus (untranslated) nucleotides at the 5'-end of the predicted ORF.
157 *The value of [score] indicates the estimated score of predicted gene. All predicted genes are more than 0.
158 *The value of [complete/partial] indicates that the predicted gene structure is whether complete (contains both of start and stop codons[11]) or partial (lacks start[01] or stop[10] or both of them[00]).
159 *The value of [model] indicates a selected model ((s)elf/(b)acteria/(a)rchaea/(p)hage) for predicting the gene.
160
161
162 *MetaGeneAnnotator tabular report with sequence columns*
163 The mga output reformated as a tabular file::
164
165 #seq_id seq_model seq_gc seq_rbs gene ID start pos end pos strand frame complete/partial gene score used model rbs start rbs end rbs score
166 1/1 - 0.275862 -1 gene_1 1812 1994 - 0 11 14.1035 b 2002 2007 2.11797
167 2/1 - 0.338877 -1 gene_1 1 414 + 0 01 25.748 b . . .
168 2/1 - 0.338877 -1 gene_2 614 790 + 0 11 0.774142 b . . .
169 2/1 - 0.338877 -1 gene_3 822 1079 + 0 11 20.6507 b . . .
170
171
172 *MetaGeneAnnotator in BED format*
173 The mga output reformatted as a BED file which can be used to extract the DNA sequences for each gene from the fasta file::
174
175 1/1 1811 1994 1/1:gene_1 15 - 1811 1994 0 1 183 0
176 2/1 0 414 2/1:gene_1 26 + 0 414 0 1 414 0
177 2/1 613 790 2/1:gene_2 1 + 613 790 0 1 177 0
178 2/1 821 1079 2/1:gene_3 21 + 821 1079 0 1 258 0
179
180
181 .. _sixgill: https://github.com/dhmay/sixgill
182 ]]></help>
183 <citations>
184 <citation type="doi">10.1093/dnares/dsn027</citation>
185 </citations>
186 </tool>