comparison transdecoder.xml @ 4:0db979fead3a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transdecoder commit f0e5f4748034bc0f1faafcec65bffa34b64a4781
author iuc
date Thu, 01 Jun 2017 06:04:12 -0400
parents b408e5a8b137
children c6334cb383ff
comparison
equal deleted inserted replaced
3:b408e5a8b137 4:0db979fead3a
1 <tool id="transdecoder" name="TransDecoder" version="1.1"> 1 <tool id="transdecoder" name="TransDecoder" version="3.0.1">
2 <description>Find coding regions within transcripts</description> 2 <description>Find coding regions within transcripts</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="2.1.0">transdecoder</requirement> 4 <requirement type="package" version="3.0.1">transdecoder</requirement>
5 </requirements> 5 </requirements>
6 6
7 <stdio> 7 <command detect_errors="exit_code"><![CDATA[
8 <exit_code range="1:" level="fatal" description="Error occurred" /> 8 TransDecoder.LongOrfs -t '${input}'
9 </stdio> 9 -m ${min_len}
10 10 ${adv.stranded}
11 <command><![CDATA[ 11 -G ${adv.gen_code}
12 TransDecoder.LongOrfs -t "${input}" 12 #if str($adv.partials)
13 13 -p ${adv.partials}
14 #if ($min_len):
15 -m ${min_len}
16 #end if 14 #end if
17 15 &&
18 ${adv.stranded} 16 TransDecoder.Predict --cpu \${GALAXY_SLOTS:-1} -t '${input}'
19 17 --retain_long_orfs ${adv.retain_long_orfs}
20 #if ($adv.gen_code): 18 ${adv.single_best_orf}
21 -G ${adv.gen_code} 19 #if str( $training_sect.training.training_selector ) == "training_top":
20 -T ${training_sect.training.top_longest}
21 #else
22 --train '${training_sect.training.train}'
22 #end if 23 #end if
23
24 && 24 &&
25 25 mv `basename '${input}'`.transdecoder.pep '$transdecoder_pep' &&
26 TransDecoder.Predict --cpu \${GALAXY_SLOTS:-1} -t "${input}" 26 mv `basename '${input}'`.transdecoder.cds '$transdecoder_cds' &&
27 27 mv `basename '${input}'`.transdecoder.bed '$transdecoder_bed' &&
28 #if ($adv.retain_long_orfs): 28 mv `basename '${input}'`.transdecoder.gff3 '$transdecoder_gff3'
29 --retain_long_orfs ${adv.retain_long_orfs}
30 #end if
31
32 #if str( $training_sect.training.training_selector ) == "training_top":
33 #if ($training_sect.training.top_longest):
34 -T ${training_sect.training.top_longest}
35 #end if
36 #else
37 #if ($training_sect.training.train):
38 --train ${training_sect.training.train}
39 #end if
40 #end if
41
42 &&
43
44 out_prefix=`basename "${input}"`
45
46 mv `basename "${input}"`.transdecoder.pep transcript.transdecoder.pep &&
47 mv `basename "${input}"`.transdecoder.cds transcript.transdecoder.cds &&
48 mv `basename "${input}"`.transdecoder.bed transcript.transdecoder.bed &&
49 mv `basename "${input}"`.transdecoder.gff3 transcript.transdecoder.gff3 &&
50 mv `basename "${input}"`.transdecoder.mRNA transcript.transdecoder.mRNA
51 ]]></command> 29 ]]></command>
52
53 <inputs> 30 <inputs>
54 <param format="fasta" name="input" type="data" label="Transcripts"/> 31 <param name="input" argument="-t" type="data" format="fasta" label="Transcripts" />
55 32 <param name="min_len" argument="-m" type="integer" value="100" label="Minimum protein length" />
56 <param name="min_len" size="5" type="integer" optional='true' value="100" label="Minimum protein length (default: 100aa)"/>
57
58 <section name="adv" title="Advanced Options" expanded="False"> 33 <section name="adv" title="Advanced Options" expanded="False">
59 <param name="stranded" type="boolean" checked="false" truevalue="-S" falsevalue="" label="Strand-specific" help="Only analyzes top strand"/> 34 <param name="stranded" argument="-S" type="boolean" truevalue="-S" falsevalue="" label="Strand-specific" help="Only analyzes top strand" />
60 <param name="gen_code" type="select" label="Genetic code"> 35 <param name="gen_code" argument="-G" type="select" label="Genetic code">
61 <option value="universal" selected="True">universal</option> 36 <option value="universal" selected="True">universal</option>
62 <option value="Euplotes">Euplotes</option> 37 <option value="Euplotes">Euplotes</option>
63 <option value="Tetrahymena">Tetrahymena</option> 38 <option value="Tetrahymena">Tetrahymena</option>
64 <option value="Candida">Candida</option> 39 <option value="Candida">Candida</option>
65 <option value="Acetabularia">Acetabularia</option> 40 <option value="Acetabularia">Acetabularia</option>
73 <option value="Mitochondrial-Platyhelminths">Mitochondrial-Platyhelminths</option> 48 <option value="Mitochondrial-Platyhelminths">Mitochondrial-Platyhelminths</option>
74 <option value="Mitochondrial-Yeasts">Mitochondrial-Yeasts</option> 49 <option value="Mitochondrial-Yeasts">Mitochondrial-Yeasts</option>
75 <option value="Mitochondrial-Euascomycetes">Mitochondrial-Euascomycetes</option> 50 <option value="Mitochondrial-Euascomycetes">Mitochondrial-Euascomycetes</option>
76 <option value="Mitochondrial-Protozoans">Mitochondrial-Protozoans</option> 51 <option value="Mitochondrial-Protozoans">Mitochondrial-Protozoans</option>
77 </param> 52 </param>
78 53 <param name="partials" argument="-p" type="integer" value="" optional="true" label="Shorten potential 5' partials if they are this percentage of the original protein or longer" />
79 <param name="retain_long_orfs" type="integer" optional="true" label="Retain long orfs" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding (default: 900 bp => 300aa)" /> 54 <param name="retain_long_orfs" argument="--retain_long_orfs" type="integer" value="900" label="Retain long ORFs" help="Retain all ORFs found that are equal or longer than these many nucleotides even if no other evidence marks it as coding (default: 900 bp => 300aa)" />
55 <param argument="--single_best_orf" type="boolean" truevalue="--single_best_orf" falsevalue="" label="Retain only the single best ORF per transcript" help="Best is defined as having (optionally Pfam and/or BLAST support) and longest ORF" />
80 </section> 56 </section>
81
82 <section name="training_sect" title="Training Options" expanded="False"> 57 <section name="training_sect" title="Training Options" expanded="False">
83 <conditional name="training"> 58 <conditional name="training">
84 <param name="training_selector" type="select" label="Select the training method"> 59 <param name="training_selector" type="select" label="Select the training method">
85 <option value="training_top" selected="True">Train with the top longest ORFs</option> 60 <option value="training_top" selected="True">Train with the top longest ORFs</option>
86 <option value="training_set">Train with a set of known ORFs</option> 61 <option value="training_set">Train with a set of known ORFs</option>
87 </param> 62 </param>
88 <when value="training_top"> 63 <when value="training_top">
89 <param name="top_longest" type="integer" optional="true" label="Number of top longest ORFs" help="Number of top longest ORFs to train Markov Model (hexamer stats) (default: 500 sequences)" /> 64 <param name="top_longest" argument="-T" type="integer" value="500" label="Number of top longest ORFs" help="Number of top longest ORFs to train Markov Model (hexamer stats). Note, 10x this value are first selected for use with cd-hit to remove redundancies, and then this value of longest ORFs are selected from the non-redundant set" />
90 </when> 65 </when>
91 <when value="training_set"> 66 <when value="training_set">
92 <param format="fasta" name="train" type="data" label="Training set of transcripts" optional="true" help="FASTA file with ORFs to train Markov Mod for protein identification" /> 67 <param name="train" argument="--train" type="data" format="fasta" label="Training set of transcripts" help="FASTA file with ORFs to train Markov Mod for protein identification" />
93 </when> 68 </when>
94 </conditional> 69 </conditional>
95 </section> 70 </section>
96
97 </inputs> 71 </inputs>
98
99
100 <outputs> 72 <outputs>
101 <data name='transdecoder_pep' format='fasta' label="${tool.name} on ${on_string}: pep" from_work_dir="transcript.transdecoder.pep"/> 73 <data name="transdecoder_pep" format="fasta" label="${tool.name} on ${on_string}: pep" />
102 <data name='transdecoder_cds' format='fasta' label="${tool.name} on ${on_string}: cds" from_work_dir="transcript.transdecoder.cds"/> 74 <data name="transdecoder_cds" format="fasta" label="${tool.name} on ${on_string}: cds" />
103 <data name='transdecoder_bed' format='bed' label="${tool.name} on ${on_string}: bed" from_work_dir="transcript.transdecoder.bed"/> 75 <data name="transdecoder_bed" format="bed" label="${tool.name} on ${on_string}: bed" />
104 <data name='transdecoder_gff3' format='gff3' label="${tool.name} on ${on_string}: gff3" from_work_dir="transcript.transdecoder.gff3"/> 76 <data name="transdecoder_gff3" format="gff3" label="${tool.name} on ${on_string}: gff3" />
105 <data name='transdecoder_mRNA' format='fasta' label="${tool.name} on ${on_string}: mRNA" from_work_dir="transcript.transdecoder.mRNA"/>
106 </outputs> 77 </outputs>
107
108
109 <tests> 78 <tests>
110 <test> 79 <test>
111 <param name="input" value="test.fa"/> 80 <param name="input" value="test.fa"/>
112 <output name="transdecoder_gff3" file="raw/test.fa.transdecoder.gff3" compare="sim_size" /> 81 <output name="transdecoder_gff3" file="raw/test.fa.transdecoder.gff3" compare="sim_size" />
113 <output name="transdecoder_bed" file="raw/test.fa.transdecoder.bed" compare="sim_size" /> 82 <output name="transdecoder_bed" file="raw/test.fa.transdecoder.bed" compare="sim_size" />
114 <output name="transdecoder_cds" file="raw/test.fa.transdecoder.cds" compare="sim_size" /> 83 <output name="transdecoder_cds" file="raw/test.fa.transdecoder.cds" compare="sim_size" />
115 <output name="transdecoder_mRNA" file="raw/test.fa.transdecoder.mRNA" compare="sim_size" />
116 <output name="transdecoder_pep" file="raw/test.fa.transdecoder.pep" compare="sim_size" /> 84 <output name="transdecoder_pep" file="raw/test.fa.transdecoder.pep" compare="sim_size" />
117 </test> 85 </test>
118 <test> 86 <test>
119 <param name="input" value="test.fa"/> 87 <param name="input" value="test.fa"/>
120 <param name="training_selector" value="training_top"/> 88 <param name="training_selector" value="training_top"/>
121 <param name="top_longest" value="10"/> 89 <param name="top_longest" value="10"/>
122 <output name="transdecoder_gff3" file="top/test.fa.transdecoder.gff3" compare="sim_size" /> 90 <output name="transdecoder_gff3" file="top/test.fa.transdecoder.gff3" compare="sim_size" />
123 <output name="transdecoder_bed" file="top/test.fa.transdecoder.bed" compare="sim_size" /> 91 <output name="transdecoder_bed" file="top/test.fa.transdecoder.bed" compare="sim_size" />
124 <output name="transdecoder_cds" file="top/test.fa.transdecoder.cds" compare="sim_size" /> 92 <output name="transdecoder_cds" file="top/test.fa.transdecoder.cds" compare="sim_size" />
125 <output name="transdecoder_mRNA" file="top/test.fa.transdecoder.mRNA" compare="sim_size" />
126 <output name="transdecoder_pep" file="top/test.fa.transdecoder.pep" compare="sim_size" /> 93 <output name="transdecoder_pep" file="top/test.fa.transdecoder.pep" compare="sim_size" />
127 </test> 94 </test>
128 <test> 95 <test>
129 <param name="input" value="test.fa"/> 96 <param name="input" value="test.fa"/>
130 <param name="gen_code" value="Mitochondrial-Arthropods"/> 97 <param name="gen_code" value="Mitochondrial-Arthropods"/>
131 <output name="transdecoder_gff3" file="gencode/test.fa.transdecoder.gff3" compare="sim_size" /> 98 <output name="transdecoder_gff3" file="gencode/test.fa.transdecoder.gff3" compare="sim_size" />
132 <output name="transdecoder_bed" file="gencode/test.fa.transdecoder.bed" compare="sim_size" /> 99 <output name="transdecoder_bed" file="gencode/test.fa.transdecoder.bed" compare="sim_size" />
133 <output name="transdecoder_cds" file="gencode/test.fa.transdecoder.cds" compare="sim_size" /> 100 <output name="transdecoder_cds" file="gencode/test.fa.transdecoder.cds" compare="sim_size" />
134 <output name="transdecoder_mRNA" file="gencode/test.fa.transdecoder.mRNA" compare="sim_size" />
135 <output name="transdecoder_pep" file="gencode/test.fa.transdecoder.pep" compare="sim_size" /> 101 <output name="transdecoder_pep" file="gencode/test.fa.transdecoder.pep" compare="sim_size" />
136 </test> 102 </test>
137 <test> 103 <test>
138 <param name="input" value="test.fa"/> 104 <param name="input" value="test.fa"/>
139 <param name="stranded" value="true"/> 105 <param name="stranded" value="true"/>
140 <output name="transdecoder_gff3" file="strand/test.fa.transdecoder.gff3" compare="sim_size" /> 106 <output name="transdecoder_gff3" file="strand/test.fa.transdecoder.gff3" compare="sim_size" />
141 <output name="transdecoder_bed" file="strand/test.fa.transdecoder.bed" compare="sim_size" /> 107 <output name="transdecoder_bed" file="strand/test.fa.transdecoder.bed" compare="sim_size" />
142 <output name="transdecoder_cds" file="strand/test.fa.transdecoder.cds" compare="sim_size" /> 108 <output name="transdecoder_cds" file="strand/test.fa.transdecoder.cds" compare="sim_size" />
143 <output name="transdecoder_mRNA" file="strand/test.fa.transdecoder.mRNA" compare="sim_size" />
144 <output name="transdecoder_pep" file="strand/test.fa.transdecoder.pep" compare="sim_size" /> 109 <output name="transdecoder_pep" file="strand/test.fa.transdecoder.pep" compare="sim_size" />
145 </test> 110 </test>
146 </tests> 111 </tests>
147 <help> 112 <help>
148
149 **What it does** 113 **What it does**
150 114
151 TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks. 115 TransDecoder identifies candidate coding regions within transcript sequences, such as those generated by de novo RNA-Seq transcript assembly using Trinity, or constructed based on RNA-Seq alignments to the genome using Tophat and Cufflinks.
152 116
153 TransDecoder identifies likely coding sequences based on the following criteria: 117 TransDecoder identifies likely coding sequences based on the following criteria:
161 - if a candidate ORF is found fully encapsulated by the coordinates of another candidate ORF, the longer one is reported. However, a single transcript can report multiple ORFs (allowing for operons, chimeras, etc). 125 - if a candidate ORF is found fully encapsulated by the coordinates of another candidate ORF, the longer one is reported. However, a single transcript can report multiple ORFs (allowing for operons, chimeras, etc).
162 126
163 - optional the putative peptide has a match to a Pfam domain above the noise cutoff score. 127 - optional the putative peptide has a match to a Pfam domain above the noise cutoff score.
164 128
165 The software is primarily maintained by Brian Haas at the Broad Institute and Alexie Papanicolaou at the Commonwealth Scientific and Industrial Research Organisation (CSIRO). It is integrated into other related software such as Trinity, PASA, EVidenceModeler, and Trinotate. 129 The software is primarily maintained by Brian Haas at the Broad Institute and Alexie Papanicolaou at the Commonwealth Scientific and Industrial Research Organisation (CSIRO). It is integrated into other related software such as Trinity, PASA, EVidenceModeler, and Trinotate.
166
167 </help> 130 </help>
168 <citations> 131 <citations>
169 <citation type="doi">10.1038/nprot.2013.084</citation> 132 <citation type="doi">10.1038/nprot.2013.084</citation>
170 </citations> 133 </citations>
171 </tool> 134 </tool>