0
|
1 <tool id="glimmer3_wrapper" name="Glimmer3" version="0.2">
|
|
2 <description></description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="3.02">glimmer</requirement>
|
|
5 </requirements>
|
|
6 <command interpreter="python">
|
|
7 glimmer3_wrapper.py --glSequence $glSequence --glIcm $glIcm
|
|
8 #if $glStartCodons
|
|
9 --glStartCodons="$glStartCodons"
|
|
10 #end if
|
|
11 #if $glRbsPwm
|
|
12 --glRbsPwm=$glRbsPwm
|
|
13 #end if
|
|
14 #if str($glGcPercent)
|
|
15 --glGcPercent=$glGcPercent
|
|
16 #end if
|
|
17 #if $glEntropy.glEntropy_select == 'default'
|
|
18 --glEntropy='#'
|
|
19 #else if $glEntropy.glEntropy_select == 'fromfile'
|
|
20 --glEntropy=${glEntropy.glEntropyFile}
|
|
21 #end if
|
|
22 #if $glFirstCodon
|
|
23 --glFirstCodon
|
|
24 #end if
|
|
25 #if str($glGeneLen)
|
|
26 --glGeneLen=$glGeneLen
|
|
27 #end if
|
|
28 #if $glLinear
|
|
29 --glLinear
|
|
30 #end if
|
|
31 #if $glOrfCoords
|
|
32 --glOrfCoords=$glOrfCoords
|
|
33 #end if
|
|
34 #if $glSeparateGenes
|
|
35 --glSeparateGenes
|
|
36 #end if
|
|
37 #if str($glMaxOverlap)
|
|
38 --glMaxOverlap=$glMaxOverlap
|
|
39 #end if
|
|
40 #if $glStartProbs
|
|
41 --glStartProbs="$glStartProbs"
|
|
42 #end if
|
|
43 #if str($glIgnoreScoreLen)
|
|
44 --glIgnoreScoreLen=$glIgnoreScoreLen
|
|
45 #end if
|
|
46 #if $glNoIndep
|
|
47 --glNoIndep
|
|
48 #end if
|
|
49 #if str($glThreshold)
|
|
50 --glThreshold=$glThreshold
|
|
51 #end if
|
|
52 #if $glExtend
|
|
53 --glExtend
|
|
54 #end if
|
|
55 #if str($glTransTable)
|
|
56 --glTransTable=$glTransTable
|
|
57 #end if
|
|
58 #if $glStopCodons
|
|
59 --glStopCodons="$glStopCodons"
|
|
60 #end if
|
|
61 --glDetail $glDetail --glPredict $glPredict --logfile $logfile
|
|
62 </command>
|
|
63
|
|
64 <inputs>
|
|
65 <param name="glSequence" type="data" format="fasta" label="DNA sequences to be analyzed" help="FASTA format" />
|
|
66
|
|
67 <param name="glIcm" type="data" format="glimmer_icm" label="ICM model produced by Build-ICM" />
|
|
68
|
|
69 <param name="glStartCodons" type="text" value="" optional="true" label="Specify allowable start codons as a comma-separated list (-A, --start_codons)" help="Sample format: 'atg,gtg' . The default start codons are atg, gtg and ttg. Use the 'GC percentage' option to specify the relative proportions of use, elsethe the proportions will be equal." />
|
|
70
|
|
71 <param name="glRbsPwm" type="data" format="glimmer_pwm" optional="true" label="Position weight matrix representing the ribosome binding site for genes (-b, --rbs_pwm)" help="Used to improve the accuracy of start site predictions." />
|
|
72
|
|
73 <param name="glGcPercent" type="float" value="" optional="true" label="GC percentage of the independent model, e.g. 45.2 (-C, --gc_percent)" help="If this option is not specified, the GC percentage will be counted from the input file." />
|
|
74
|
|
75 <conditional name="glEntropy">
|
|
76 <param name="glEntropy_select" type="select" label="Use entropy profiles (-E, --entropy)">
|
|
77 <option value="no">No</option>
|
|
78 <option value="default">Use default entropy profiles, constructed from a wide range of species (-E #)</option>
|
|
79 <option value="fromfile">Use entropy profiles from history</option>
|
|
80 </param>
|
|
81 <when value="no" />
|
|
82 <when value="default" />
|
|
83 <when value="fromfile">
|
|
84 <param name="glEntropyFile" type="data" format="glimmer_entropy_profiles" label="Entropy profiles" help="As generated by Glimmer entropy-profile" />
|
|
85 </when>
|
|
86 </conditional>
|
|
87
|
|
88 <param name="glFirstCodon" type="boolean" checked="false" label="Use the first possible codon in an ORF as the start codon for initial scoring purposes (-f, --first_codon)" help="Otherwise, the highest-scoring codon will be used. This only affects the start positions in the .detail file. The final start predictions in the .predict file are always based on the scoring functions." />
|
|
89
|
|
90 <param name="glGeneLen" type="integer" value="" optional="true" label="Minimum gene length in number of nucleotides (-g, --gene_len)" help="It does not include the bases in the stop codon." />
|
|
91
|
|
92 <param name="glLinear" type="boolean" checked="false" label="Assuming a linear genome (-l, --linear)" help="No 'wrap-around' genes with part at the beginning of the sequence and the rest at the end of the sequence." />
|
|
93
|
|
94 <param name="glOrfCoords" type="data" format="glimmer_coords" optional="true" label="Coordinates file specifying a list of ORFs that should be scored separately, with no overlap rules (-L, --orf_coords)" help="The output with this option goes both to the .predict file and to the .detail file." />
|
|
95
|
|
96 <param name="glSeparateGenes" type="boolean" checked="false" label="Separate genes (-M, --separate_genes)" help="" />
|
|
97
|
|
98 <param name="glMaxOverlap" type="integer" value="" optional="true" label="Maximum overlap length (-o, --max_olap)" help="Overlaps of this many or fewer bases between genes are not regarded as overlaps." />
|
|
99
|
|
100 <param name="glStartProbs" type="text" value="" optional="true" label="Probability of different start codons (-P, --start_probs)" help="If no --codon_list option is given, then there should be 3 values: for atg, gtg and ttg, in that order. Sample format: -P 0.6,0.35,0.05. If --codon_list is specified without --start_probs, then each start codon is equally likely (which is very unusual)." />
|
|
101
|
|
102 <param name="glIgnoreScoreLen" type="integer" value="" optional="true" label="Consider any gene n or more bases long as a potential gene, regardless of its in-frame score (-q, --ignore_score_len)" help="Without this option, this value is calculated automatically to be the length such that the expected number of ORFs this long or longer in a random sequence of a million bases is one." />
|
|
103
|
|
104 <param name="glNoIndep" type="boolean" checked="false" label="Do not use the independent probability score column (-r, --no_indep)" help="Using this option will produce more short gene predictions." />
|
|
105
|
|
106 <param name="glThreshold" type="integer" value="" optional="true" label="Threshold score for consideration as a gene (-t, --threshold)" help="If the in-frame score ≥ n , then the region is given a number and considered a potential gene. Note this is the integer score in the column labelled 'InFrm' in the .detail file, not the decimal score in the column labelled 'Raw'." />
|
|
107
|
|
108 <param name="glExtend" type="boolean" checked="false" label="Also score ORFs that extend off the end of the sequence(s) (-X, --extend)" help="This option presumes that the sequence(s) is linear and not circular. Reported positions off the end of the sequence are the nearest positions in the correct reading frame. Note that this ignores any partial codons at the ends of a sequence. Suppose, for example, that a sequence is 998bp long and an ORF in reading frame +1 starts at position 601 and extends off the end of the sequence. Then the end of that gene/ORF will be reported at position 999, as if the stop codon were in positions 997 ... 999. This is true even if the last two characters of the sequence are, say, cc and cannot possibly be part of a stop codon." />
|
|
109
|
|
110 <param name="glTransTable" type="integer" value="" optional="true" label="Use GenBank translation table number n to specify stop codons (-z, --trans_table)" help="" />
|
|
111
|
|
112 <param name="glStopCodons" type="text" value="" optional="true" label="Specify allowable stop codons as a comma-separated list (-Z, --stop_codons)" help="Sample format: 'tag,tga,taa'. The default stop codons are tag, tga and taa." />
|
|
113 </inputs>
|
|
114
|
|
115 <outputs>
|
|
116 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" />
|
|
117 <data name="glDetail" format="txt" label="${tool.name} on ${on_string}: detail" />
|
|
118 <data name="glPredict" format="txt" label="${tool.name} on ${on_string}: predict" />
|
|
119 </outputs>
|
|
120
|
|
121 <tests>
|
|
122
|
|
123 </tests>
|
|
124 <help>
|
|
125 **What it does**
|
|
126
|
|
127 Read DNA sequences and predict genes in them using an Interpolated Context Model (ICM). Output details go to file *detail* and predictions go to file *predict*.
|
|
128
|
|
129 **License and citation**
|
|
130
|
|
131 This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
|
|
132
|
|
133 .. _CRS4 Srl.: http://www.crs4.it/
|
|
134 .. _MIT license: http://opensource.org/licenses/MIT
|
|
135
|
|
136 If you use this tool in Galaxy, please cite |Cuccuru2013|_.
|
|
137
|
|
138 .. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
|
|
139 .. _Cuccuru2013: http://orione.crs4.it/
|
|
140
|
|
141 This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_.
|
|
142
|
|
143 .. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml
|
|
144 .. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679
|
|
145 .. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673
|
|
146 </help>
|
|
147 </tool>
|