Mercurial > repos > bgruening > trna_prediction
changeset 3:b86f3e5626f4 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit 1b3f92275bb60e606cd4fdc394fb9df95232a5aa
| author | bgruening | 
|---|---|
| date | Tue, 28 Oct 2025 09:15:42 +0000 | 
| parents | 358f58401cd6 | 
| children | |
| files | aragorn.xml aragorn_out_to_gff3.py tRNAscan.py tRNAscan.xml test-data/aragorn_tansl-table-11_introns.gff3 test-data/aragorn_tansl-table-1_tmRNA_tRNA.fasta test-data/aragorn_tansl-table-1_tmRNA_tRNA.gff3 test-data/aragorn_tansl-table-1_tmRNA_tRNA.txt test-data/tRNAscan_eukaryotic_infernal.fasta test-data/tRNAscan_eukaryotic_infernal.tabular test-data/trna_arabidopsis.fasta | 
| diffstat | 11 files changed, 281 insertions(+), 349 deletions(-) [+] | 
line wrap: on
 line diff
--- a/aragorn.xml Wed Jul 26 10:14:05 2017 -0400 +++ b/aragorn.xml Tue Oct 28 09:15:42 2025 +0000 @@ -1,7 +1,13 @@ -<tool id="aragorn_trna" name="tRNA and tmRNA" version="0.6"> +<tool id="aragorn_trna" name="tRNA and tmRNA" version="@TOOL_VERSION@" profile="24.0"> <description>prediction (Aragorn)</description> + <macros> + <token name="@TOOL_VERSION@">1.2.41</token> + </macros> + <xrefs> + <xref type="bio.tools">aragorn</xref> + </xrefs> <requirements> - <requirement type="package" version="1.2.36">aragorn</requirement> + <requirement type="package" version="@TOOL_VERSION@">aragorn</requirement> <requirement type="package" version="2.7">python</requirement> </requirements> <command><![CDATA[ @@ -18,7 +24,7 @@ $secondary_structure $introns #end if - + #if $gff3_output: aragorn '$input' @@ -60,33 +66,27 @@ <option value="-c">Assume that each sequence has a circular topology</option> <option value="-l">Assume that each sequence has a linear topology</option> </param> - <param name='tmRNA' type='boolean' label='Search for tmRNA genes' - truevalue='-m' falsevalue='' checked="true" help='(-m)' /> - <param name='tRNA' type='boolean' label='Search for tRNA genes' - truevalue='-t' falsevalue='' checked="true" help='(-t)' /> - <param name='mtRNA' type='boolean' label='Search for Metazoan mitochondrial tRNA genes' truevalue='-mt' falsevalue='' checked="false" - help='-i switch will be ignored. Composite Metazoan mitochondrial genetic code used. (-mt)' /> - <param name='mam_mtRNA' type='boolean' label='Search for Mammalian mitochondrial tRNA genes' - truevalue='-mtmam' falsevalue='' checked="false" help='-i switch will be ignored. Mammalian mitochondrial genetic code used. (-mtmam)' /> - <param name='introns' type='boolean' label='Search for tRNA genes with introns in anticodon loop' - truevalue='-i' falsevalue='' checked="false" help='With a maximum length of 3000 bases. (-i).' /> - <param name='secondary_structure' type='boolean' label='Print out secondary structure' - truevalue='-fasta' falsevalue='-fon' checked="false" help='(-fasta,-fon)' /> - <param name="gff3_output" type='boolean' label='Convert output to GFF3' truevalue='True' falsevalue='' checked="false" help='' /> - <param name="gff3_model" type='boolean' label='Full gene model for GFF3 data' truevalue='--full' checked='false' help='' /> + <param name="tmRNA" type="boolean" label="Search for tmRNA genes" truevalue="-m" falsevalue="" checked="true" help="(-m)" /> + <param name="tRNA" type="boolean" label="Search for tRNA genes" truevalue="-t" falsevalue="" checked="true" help="(-t)" /> + <param name="mtRNA" type="boolean" label="Search for Metazoan mitochondrial tRNA genes" truevalue="-mt" falsevalue="" checked="false" help="-i switch will be ignored. Composite Metazoan mitochondrial genetic code used. (-mt)" /> + <param name="mam_mtRNA" type="boolean" label="Search for Mammalian mitochondrial tRNA genes" truevalue="-mtmam" falsevalue="" checked="false" help="-i switch will be ignored. Mammalian mitochondrial genetic code used. (-mtmam)" /> + <param name="introns" type="boolean" label="Search for tRNA genes with introns in anticodon loop" truevalue="-i" falsevalue="" checked="false" help="With a maximum length of 3000 bases. (-i)." /> + <param name="secondary_structure" type="boolean" label="Print out secondary structure" truevalue="-fasta" falsevalue="-fon" checked="false" help="(-fasta,-fon)" /> + <param name="gff3_output" type="boolean" label="Convert output to GFF3" truevalue="True" falsevalue="" checked="false" help="" /> + <param name="gff3_model" type="boolean" label="Full gene model for GFF3 data" truevalue="--full" checked="false" help="" /> </inputs> <outputs> - <data name="output" format="fasta"> + <data name="output" format="fasta" label="${tool.name} on ${on_string}: Secondary structure (FASTA)"> <change_format> - <when input="secondary_structure" value="-fasta" format="txt"/> - </change_format> + <when input="secondary_structure" value="-fasta" format="txt"/> + </change_format> </data> - <data format="gff3" name="gff3_output_file" > + <data format="gff3" name="gff3_output_file" label="${tool.name} on ${on_string}: GFF3"> <filter>gff3_output</filter> </data> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" /> <param name="genbank_gencode" value="1" /> <param name="topology" value="-c" /> @@ -95,12 +95,36 @@ <param name="mtRNA" value="False" /> <param name="mam_mtRNA" value="False" /> <param name="introns" value="False" /> - <param name="secondary_structure" value="-fon" /> - <param name="gff3_output" value="false" /> - <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.fasta" ftype="fasta" /> + <param name="secondary_structure" value="True" /> + <param name="gff3_output" value="False" /> + <output name="output" ftype="txt"> + <assert_contents> + <has_line_matching expression="\s{15}a"/> + <has_line_matching expression="\s{13}g-c"/> + <has_line_matching expression="\s{13}g-c"/> + <has_line_matching expression="\s{13}g\+t"/> + <has_line_matching expression="\s{13}g-c"/> + <has_line_matching expression="\s{13}a-t"/> + <has_line_matching expression="\s{13}t-a"/> + <has_line_matching expression="\s{13}g-c tt"/> + <has_line_matching expression="\s{12}t gtccc a"/> + <has_line_matching expression="\s{5}ta a !!!!! g"/> + <has_line_matching expression="\s{4}a ctcg caggg c"/> + <has_line_matching expression="\s{3}t !!!! a tt"/> + <has_line_matching expression="\s{3}g gagc c"/> + <has_line_matching expression="\s{4}gta g g"/> + <has_line_matching expression="\s{12}c-gag"/> + <has_line_matching expression="\s{12}t-a"/> + <has_line_matching expression="\s{12}c-g"/> + <has_line_matching expression="\s{12}g-c"/> + <has_line_matching expression="\s{12}c-g"/> + <has_line_matching expression="\s{11}t t"/> + <has_line_matching expression="\s{11}t a"/> + <has_line_matching expression="\s{12}tgc"/> + </assert_contents> + </output> </test> - - <test> + <test expect_num_outputs="1"> <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" /> <param name="genbank_gencode" value="1" /> <param name="topology" value="-c" /> @@ -109,11 +133,17 @@ <param name="mtRNA" value="False" /> <param name="mam_mtRNA" value="False" /> <param name="introns" value="False" /> - <param name="secondary_structure" value="-fasta" /> - <param name="gff3_output" value="false" /> - <output name="output" file="aragorn_tansl-table-1_tmRNA_tRNA.txt" ftype="txt" lines_diff="2" /> + <param name="secondary_structure" value="False" /> + <param name="gff3_output" value="False" /> + <output name="output" ftype="fasta"> + <assert_contents> + <has_line line=">1-1 tRNA-Ala(tgc) [381,453]"/> + <has_line line="ggggatgtagctcatatggtagagcgctcgctttgcatgcgagaggcaca"/> + <has_line line="gggttcgattccctgcatctcca"/> + </assert_contents> + </output> </test> - <test> + <test expect_num_outputs="2"> <param name="input" value="trna_arabidopsis.fasta" ftype="fasta" /> <param name="genbank_gencode" value="1" /> <param name="topology" value="-c" /> @@ -123,9 +153,13 @@ <param name="mam_mtRNA" value="False" /> <param name="introns" value="False" /> <param name="gff3_output" value="True" /> - <output name="gff3_output_file" file="aragorn_tansl-table-1_tmRNA_tRNA.gff3" ftype="gff3" /> + <output name="gff3_output_file" ftype="gff3"> + <assert_contents> + <has_line_matching expression="gi|240255695:23036500-23037000\taragorn\ttRNA\t381\t453\t.\t+\t.\tID=tRNAgi|240255695:23036500-23037000.1;Name=trnA-UGC;product=tRNA-Ala"/> + </assert_contents> + </output> </test> - <test> + <test expect_num_outputs="2"> <param name="input" value="genome_with_introns.fa" ftype="fasta" /> <param name="genbank_gencode" value="11" /> <param name="topology" value="-c" /> @@ -136,27 +170,37 @@ <param name="introns" value="True" /> <param name="gff3_output" value="True" /> <param name="gff3_model" value="True" /> - <output name="gff3_output_file" file="aragorn_tansl-table-11_introns.gff3" ftype="gff3" /> + <output name="gff3_output_file" ftype="gff3"> + <assert_contents> + <has_n_lines n="32" /> + <has_line_matching expression="genome_with_introns\taragorn\tgene\t1533\t4118\t.\t-\t.\tID=genegenome_with_introns.1;Name=trnK-UUU;product=tRNA-Lys"/> + <has_line_matching expression="genome_with_introns\taragorn\ttRNA\t1533\t4118\t.\t-\t.\tID=tRNAgenome_with_introns.1;Parent=genegenome_with_introns.1;Name=trnK-UUU;product=tRNA-Lys"/> + <has_line_matching expression="genome_with_introns\taragorn\ttRNA\t12597\t12670\t.\t\+\t.\tID=tRNAgenome_with_introns.5;Parent=genegenome_with_introns.5;Name=trnW-CCA;product=tRNA-Trp"/> + <has_line_matching expression="genome_with_introns\taragorn\texon\t12597\t12670\t.\t\+\t.\tParent=tRNAgenome_with_introns.5"/> + <has_line_matching expression="genome_with_introns\taragorn\ttRNA\t29759\t29833\t.\t\+\t.\tID=tRNAgenome_with_introns.9;Parent=genegenome_with_introns.9;Name=trnH-GUG;product=tRNA-His"/> + <has_line_matching expression="genome_with_introns\taragorn\texon\t29759\t29833\t.\t\+\t.\tParent=tRNAgenome_with_introns.9"/> + </assert_contents> + </output> </test> </tests> <help> -<![CDATA[ + <![CDATA[ **What it does** Aragorn_ predicts tRNA (and tmRNA) in nucleotide sequences. -.. _Aragorn: http://mbio-serv2.mbioekol.lu.se/ARAGORN/ +.. _Aragorn: https://www.trna.se/ **Input** As input a genome sequence FASTA file is needed. Select the right genetic code and the topology for your organism and choose what you want to have analyzed. -By default, ARAGORN assumes that each sequence has a circular topology (search wraps around ends), that both strands should be searched, that the progress of the search is not reported, both tRNA and tmRNA genes are detected, and tRNA genes containing C‐loop introns are not detected. +By default, ARAGORN assumes that each sequence has a circular topology (search wraps around ends), that both strands should be searched, that the progress of the search is not reported, both tRNA and tmRNA genes are detected, and tRNA genes containing C‐loop introns are not detected. **Output** -The output of Aragorn reports the proposed tRNA secondary structure and, for tmRNA genes, the secondary structure of the tRNA domain, the tmRNA gene sequence, the tag peptide and a list of organisms with matching tmRNA peptide tags. +The output of Aragorn reports the proposed tRNA secondary structure and, for tmRNA genes, the secondary structure of the tRNA domain, the tmRNA gene sequence, the tag peptide and a list of organisms with matching tmRNA peptide tags. Optionally, your output can be converted to GFF3.
--- a/aragorn_out_to_gff3.py Wed Jul 26 10:14:05 2017 -0400 +++ b/aragorn_out_to_gff3.py Tue Oct 28 09:15:42 2025 +0000 @@ -2,138 +2,168 @@ import sys full_gene_model = False -if '--full' in sys.argv: +if "--full" in sys.argv: full_gene_model = True genome_id = None stdin_data = [] -KEY_ORDER = ('parent', 'source', 'type', 'start', 'end', 'score', 'strand', - '8', 'quals') +KEY_ORDER = ( + "parent", + "source", + "type", + "start", + "end", + "score", + "strand", + "8", + "quals", +) # Table of amino acids aa_table = { - 'Ala' : 'A', - 'Arg' : 'R', - 'Asn' : 'N', - 'Asp' : 'D', - 'Cys' : 'C', - 'Gln' : 'Q', - 'Glu' : 'E', - 'Gly' : 'G', - 'His' : 'H', - 'Ile' : 'I', - 'Leu' : 'L', - 'Lys' : 'K', - 'Met' : 'M', - 'Phe' : 'F', - 'Pro' : 'P', - 'Ser' : 'S', - 'Thr' : 'T', - 'Trp' : 'W', - 'Tyr' : 'Y', - 'Val' : 'V', - 'Pyl' : 'O', - 'seC' : 'U', - '???' : 'X' } + "Ala": "A", + "Arg": "R", + "Asn": "N", + "Asp": "D", + "Cys": "C", + "Gln": "Q", + "Glu": "E", + "Gly": "G", + "His": "H", + "Ile": "I", + "Leu": "L", + "Lys": "K", + "Met": "M", + "Phe": "F", + "Pro": "P", + "Ser": "S", + "Thr": "T", + "Trp": "W", + "Tyr": "Y", + "Val": "V", + "Pyl": "O", + "seC": "U", + "???": "X", +} + def output_line(gff3): - print '\t'.join(str(gff3[x]) for x in KEY_ORDER) + print("\t".join(str(gff3[x]) for x in KEY_ORDER)) -print '##gff-version 3' + +print("##gff-version 3") for line in sys.stdin: - if line.startswith('>'): + if line.startswith(">"): genome_id = line[1:].strip() - if ' ' in genome_id: - genome_id = genome_id[0:genome_id.index(' ')] + if " " in genome_id: + genome_id = genome_id[0: genome_id.index(" ")] else: data = line.split() if len(data) == 5: # Parse data - strand = '-' if data[2].startswith('c') else '+' - start, end = data[2][data[2].index('[') + 1:-1].split(',') + strand = "-" if data[2].startswith("c") else "+" + start, end = data[2][data[2].index("[") + 1: -1].split(",") gff3 = { - 'parent': genome_id, - 'source': 'aragorn', - 'start': int(start), - 'end': int(end), - 'strand': strand, - 'score': '.', - '8': '.', + "parent": genome_id, + "source": "aragorn", + "start": int(start), + "end": int(end), + "strand": strand, + "score": ".", + "8": ".", } aa_long = data[1][5:] aa_short = aa_table[aa_long] - anticodon = data[4][1:data[4].index(")")].upper().replace("T", "U") - name = 'trn{}-{}'.format(aa_short, anticodon) + anticodon = data[4][1: data[4].index(")")].upper().replace("T", "U") + name = "trn{}-{}".format(aa_short, anticodon) if not full_gene_model: - gff3.update({ - 'type': 'tRNA', - 'quals': 'ID=tRNA{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), - }) + gff3.update( + { + "type": "tRNA", + "quals": "ID=tRNA{0}.{1};Name={name};product={2}".format( + genome_id, *data, name=name + ), + } + ) output_line(gff3) else: - gff3.update({ - 'type': 'gene', - 'quals': 'ID=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), - }) + gff3.update( + { + "type": "gene", + "quals": "ID=gene{0}.{1};Name={name};product={2}".format( + genome_id, *data, name=name + ), + } + ) output_line(gff3) - gff3.update({ - 'type': 'tRNA', - 'quals': 'ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), - }) + gff3.update( + { + "type": "tRNA", + "quals": "ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}".format( + genome_id, *data, name=name + ), + } + ) output_line(gff3) # If no introns - if ')i(' not in data[4]: - gff3['type'] = 'exon' - gff3['quals'] = 'Parent=tRNA{0}.{1}'.format(genome_id, *data) + if ")i(" not in data[4]: + gff3["type"] = "exon" + gff3["quals"] = "Parent=tRNA{0}.{1}".format(genome_id, *data) output_line(gff3) else: - intron_location = data[4][data[4].rindex('(') + 1:-1].split(',') + intron_location = data[4][data[4].rindex("(") + 1: -1].split(",") intron_start, intron_length = map(int, intron_location) - if strand == '+': - original_end = gff3['end'] + if strand == "+": + original_end = gff3["end"] else: - original_end = gff3['start'] + original_end = gff3["start"] # EXON - gff3.update({ - 'type': 'exon', - 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), - }) - if strand == '+': - gff3['end'] = gff3['start'] + intron_start - 2 + gff3.update( + { + "type": "exon", + "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), + } + ) + if strand == "+": + gff3["end"] = gff3["start"] + intron_start - 2 else: - gff3['start'] = gff3['end'] - intron_start + 2 + gff3["start"] = gff3["end"] - intron_start + 2 output_line(gff3) # INTRON - gff3.update({ - 'type': 'intron', - 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), - }) - if strand == '+': - gff3['start'] = gff3['end'] + 1 - gff3['end'] = gff3['start'] + intron_length + 2 + gff3.update( + { + "type": "intron", + "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), + } + ) + if strand == "+": + gff3["start"] = gff3["end"] + 1 + gff3["end"] = gff3["start"] + intron_length + 2 else: - gff3['end'] = gff3['start'] - 1 - gff3['start'] = gff3['end'] - intron_length + 1 + gff3["end"] = gff3["start"] - 1 + gff3["start"] = gff3["end"] - intron_length + 1 output_line(gff3) # EXON - gff3.update({ - 'type': 'exon', - 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), - }) - if strand == '+': - gff3['start'] = gff3['end'] + 1 - gff3['end'] = original_end + gff3.update( + { + "type": "exon", + "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), + } + ) + if strand == "+": + gff3["start"] = gff3["end"] + 1 + gff3["end"] = original_end else: - gff3['end'] = gff3['start'] - 1 - gff3['start'] = original_end + gff3["end"] = gff3["start"] - 1 + gff3["start"] = original_end output_line(gff3)
--- a/tRNAscan.py Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -#!/usr/bin/env python - -""" - Converts tRNAScan output back to fasta-sequences. -""" -import sys -from Bio import SeqIO -from Bio.SeqRecord import SeqRecord -import subprocess - -def main(args): - """ - Call from galaxy: - tRNAscan.py $organism $mode $showPrimSecondOpt $disablePseudo $showCodons $tabular_output $inputfile $fasta_output - - tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -Q -y -q -b -o $tabular_output $inputfile; - """ - cmd = """tRNAscan-SE -Q -y -q -b %s""" % ' '.join( args[:-1] ) - child = subprocess.Popen(cmd.split(), - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = child.communicate() - return_code = child.returncode - if return_code: - sys.stdout.write(stdout) - sys.stderr.write(stderr) - sys.stderr.write("Return error code %i from command:\n" % return_code) - sys.stderr.write("%s\n" % cmd) - else: - sys.stdout.write(stdout) - sys.stdout.write(stderr) - - outfile = args[-1] - sequence_file = args[-2] - tRNAScan_file = args[-3] - - with open( sequence_file ) as sequences: - sequence_recs = SeqIO.to_dict(SeqIO.parse(sequences, "fasta")) - - tRNAs = [] - with open(tRNAScan_file) as tRNA_handle: - for line in tRNA_handle: - line = line.strip() - if not line or line.startswith('#'): - continue - cols = line.split() - iid = cols[0].strip() - start = int(cols[2]) - end = int(cols[3]) - aa = cols[4] - codon = cols[5] - rec = sequence_recs[ iid ] - if start > end: - new_rec = rec[end:start] - new_rec.seq = new_rec.seq.reverse_complement() - new_rec.description = "%s %s %s %s %s" % (rec.description, aa, codon, start, end) - new_rec.id = rec.id - new_rec.name = rec.name - tRNAs.append( new_rec ) - else: - new_rec = rec[start:end] - new_rec.id = rec.id - new_rec.name = rec.name - new_rec.description = "%s %s %s %s %s" % (rec.description, aa, codon, start, end) - tRNAs.append( new_rec ) - - SeqIO.write(tRNAs, open(outfile, 'w+'), "fasta") - - -if __name__ == '__main__': - main(sys.argv[1:])
--- a/tRNAscan.xml Wed Jul 26 10:14:05 2017 -0400 +++ b/tRNAscan.xml Tue Oct 28 09:15:42 2025 +0000 @@ -1,74 +1,112 @@ -<tool id="trnascan" name="tRNA prediction" version="0.4"> - <description>(tRNAscan)</description> +<tool id="trnascan" name="tRNA prediction" version="@TOOL_VERSION@" profile="24.0"> + <description>using tRNAscan, EufindtRNA & tRNA covariance models</description> + <macros> + <token name="@TOOL_VERSION@">2.0.12</token> + </macros> + <xrefs> + <xref type="bio.tools">trnascan-se</xref> + </xrefs> <requirements> - <requirement type="package" version="1.3.1">trnascan-se</requirement> - <requirement type="package" version="1.0.2">infernal</requirement> - <requirement type="package" version="1.70">biopython</requirement> - <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="@TOOL_VERSION@">trnascan-se</requirement> </requirements> <command> - <![CDATA[ -python '$__tool_directory__/tRNAscan.py' -#if $organism - $organism -#end if + <![CDATA[ +tRNAscan-SE --forceow --hitsrc --quiet --brief +$organism #if $mode $mode #end if -#if $showPrimSecondOpt - $showPrimSecondOpt +$breakdown +$nopseudo +$codons +#if $missed +--missed '$fasta_missed' #end if -#if $disablePseudo - $disablePseudo -#end if -#if $showCodons - $showCodons +#if $stats +--stats '$stats_txt' #end if -o '$tabular_output' +--fasta '$fasta_output' +--thread \${GALAXY_SLOTS:-4} '$inputfile' -'$fasta_output' ]]> </command> <inputs> <param name="inputfile" type="data" format="fasta" label="Genome Sequence" help="Dataset missing? See TIP below"/> - <param name="organism" type="select" label="Select Organism"> - <option value="" selected="true">Eukaryotic</option> - <option value="-G">general tRNA model</option> + <param name="organism" type="select" label="Select Organism/Domain"> + <option value="-E" selected="true">Eukaryotic</option> <option value="-B">Bacterial</option> <option value="-A">Archaeal</option> - <option value="-O">Mitochondrial/Chloroplast</option> + <option value="-G">General tRNA model (incl. Eukaryotic, Bacterial and Archaeal cytoslic tRNAs)</option> + <option value="-M mammal">Mammalian Mitochondrial tRNAs</option> + <option value="-M vert">Vertebrate Mitochondrial tRNAs</option> + <option value="-O">Other organellar tRNAs</option> </param> <param name="mode" type="select" label="Select Mode"> <option value="" selected="true">Default</option> - <option value="-C">Covariance model analysis only (slow)</option> - <option value="-T">tRNAscan only</option> - <option value="-E">EufindtRNA only</option> - <option value="--infernal">Infernal cm analysis (max sensitivity, very slow)</option> - <option value="--newscan">Infernal and new cm models</option> + <option value="-I">Search using Infernal</option> + <option value="--max">Search using Infernal without hmm filter (very slow)</option> + <option value="-L">Search using the legacy method (tRNAscan, EufindtRNA, and COVE)</option> + <option value="--cove">Covariance model analysis only (legacy, extremely slow)</option> + <option value="--tscan">tRNAscan only</option> + <option value="--eufind">EufindtRNA only</option> </param> - <param name="disablePseudo" type="boolean" label="Disable pseudogene checking" truevalue="-D" falsevalue="" /> - <param name="showPrimSecondOpt" type="boolean" label="Show primary and secondary structure components to Cove scores" truevalue="-H" falsevalue="" /> - <param name="showCodons" type="boolean" label="Show codons instead of tRNA anticodons" truevalue="-N" falsevalue="" /> + <param argument="--nopseudo" type="boolean" label="Disable pseudogene checking" truevalue="--nopseudo" falsevalue="" checked="False"/> + <param argument="--breakdown" type="boolean" label="Show primary and secondary structure components to Cove scores" truevalue="--breakdown" falsevalue="" checked="False"/> + <param argument="--codons" type="boolean" label="Show codons instead of tRNA anticodons" truevalue="--codons" falsevalue="" /> + <param argument="--missed" type="boolean" label="Save all seqs that do NOT have at least one tRNA prediction in them" checked="False"/> + <param argument="--stats" type="boolean" label="Save statistics (speed, # tRNAs found in each part of search, etc)" checked="False"/> </inputs> <outputs> <data format="tabular" name="tabular_output" label="${tool.name} on ${on_string}: tabular" /> - <data format="fasta" name="fasta_output" label="${tool.name} on ${on_string}: fasta" /> + <data format="fasta" name="fasta_output" label="${tool.name} on ${on_string}: predicted fasta" /> + <data format="fasta" name="fasta_missed" label="${tool.name} on ${on_string}: missed fasta"> + <filter>missed is True</filter> + </data> + <data format="txt" name="stats_txt" label="${tool.name} on ${on_string}: stats"> + <filter>stats is True</filter> + </data> </outputs> <tests> - <test> + <test expect_num_outputs="4"> <param name="inputfile" value="trna_arabidopsis.fasta" ftype="fasta" /> - <param name="organism" value="" /> - <param name="mode" value="--infernal" /> <!-- Infernal test not working due to cmsearch error--> - <param name="disablePseudo" value="" /> - <param name="showPrimSecondOpt" value="" /> - <param name="showCodons" value="" /> - <output name="fasta_output" file="tRNAscan_eukaryotic_infernal.fasta" ftype="fasta" /> - <output name="tabular_output" file="tRNAscan_eukaryotic_infernal.tabular" ftype="tabular" /> + <param name="organism" value="-E" /> + <param name="mode" value="-I" /> + <param name="breakdown" value="True" /> + <param name="codons" value="True" /> + <param name="missed" value="True" /> + <param name="stats" value="True" /> + <output name="tabular_output" ftype="tabular"> + <assert_contents> + <has_line_matching expression="gi|240255695:23036500-23037000 \t1\t381\t453\tAla\tGCA\t0\t0\t66.6\tInf\t"/> + </assert_contents> + </output> + <output name="fasta_output" ftype="fasta"> + <assert_contents> + <has_line line=">gi|240255695:23036500-23037000.trna1 gi|240255695:23036500-23037000:381-453 (+) Ala (TGC) 73 bp Sc: 66.6"/> + <has_line line="GGGGATGTAGCTCATATGGTAGAGCGCTCGCTTTGCATGCGAGAGGCACAGGGTTCGATT"/> + </assert_contents> + </output> + <output name="fasta_missed" ftype="fasta"> + <assert_contents> + <has_text text=">dummy_seq"/> + <has_text text="ACTGACTGATCGTAACTAGTAGCGACTGATCGATCGTACTGCATGGCATGTGACTGTCCG"/> + </assert_contents> + </output> + <output name="stats_txt" ftype="txt"> + <assert_contents> + <has_text text="Bases read: 571"/> + <has_line line="Bases in tRNAs: 73"/> + <has_line line="tRNAs predicted: 1"/> + <has_line line="Infernal-confirmed tRNAs: 1"/> + <has_line_matching expression="Ala : 1\t AGC: GGC: CGC: TGC: "/> + </assert_contents> + </output> </test> </tests> <help> -<![CDATA[ + <![CDATA[ **What it does** @@ -79,7 +117,7 @@ eubacterial sequences, but it may be applied more broadly with a slight reduction in sensitivity. -.. _tRNAscan-SE: http://lowelab.ucsc.edu/tRNAscan-SE/ +.. _tRNAscan-SE: https://github.com/UCSC-LoweLab/tRNAscan-SE **Input** @@ -232,6 +270,6 @@ ]]> </help> <citations> - <citation type="doi">10.1093/nar/25.5.0955</citation> + <citation type="doi">10.1093/nar/gkab688</citation> </citations> </tool>
--- a/test-data/aragorn_tansl-table-11_introns.gff3 Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -##gff-version 3 -genome_with_introns aragorn gene 1533 4118 . - . ID=genegenome_with_introns.1;Name=trnK-UUU;product=tRNA-Lys -genome_with_introns aragorn tRNA 1533 4118 . - . ID=tRNAgenome_with_introns.1;Parent=genegenome_with_introns.1;Name=trnK-UUU;product=tRNA-Lys -genome_with_introns aragorn exon 4081 4118 . - . Parent=tRNAgenome_with_introns.1 -genome_with_introns aragorn intron 1569 4080 . - . Parent=tRNAgenome_with_introns.1 -genome_with_introns aragorn exon 1533 1568 . - . Parent=tRNAgenome_with_introns.1 -genome_with_introns aragorn gene 6453 6524 . - . ID=genegenome_with_introns.2;Name=trnQ-UUG;product=tRNA-Gln -genome_with_introns aragorn tRNA 6453 6524 . - . ID=tRNAgenome_with_introns.2;Parent=genegenome_with_introns.2;Name=trnQ-UUG;product=tRNA-Gln -genome_with_introns aragorn exon 6453 6524 . - . Parent=tRNAgenome_with_introns.2 -genome_with_introns aragorn gene 7730 7819 . - . ID=genegenome_with_introns.3;Name=trnS-GCU;product=tRNA-Ser -genome_with_introns aragorn tRNA 7730 7819 . - . ID=tRNAgenome_with_introns.3;Parent=genegenome_with_introns.3;Name=trnS-GCU;product=tRNA-Ser -genome_with_introns aragorn exon 7730 7819 . - . Parent=tRNAgenome_with_introns.3 -genome_with_introns aragorn gene 12356 12431 . + . ID=genegenome_with_introns.4;Name=trnP-UGG;product=tRNA-Pro -genome_with_introns aragorn tRNA 12356 12431 . + . ID=tRNAgenome_with_introns.4;Parent=genegenome_with_introns.4;Name=trnP-UGG;product=tRNA-Pro -genome_with_introns aragorn exon 12356 12431 . + . Parent=tRNAgenome_with_introns.4 -genome_with_introns aragorn gene 12597 12670 . + . ID=genegenome_with_introns.5;Name=trnW-CCA;product=tRNA-Trp -genome_with_introns aragorn tRNA 12597 12670 . + . ID=tRNAgenome_with_introns.5;Parent=genegenome_with_introns.5;Name=trnW-CCA;product=tRNA-Trp -genome_with_introns aragorn exon 12597 12670 . + . Parent=tRNAgenome_with_introns.5 -genome_with_introns aragorn gene 22050 22123 . - . ID=genegenome_with_introns.6;Name=trnR-CCG;product=tRNA-Arg -genome_with_introns aragorn tRNA 22050 22123 . - . ID=tRNAgenome_with_introns.6;Parent=genegenome_with_introns.6;Name=trnR-CCG;product=tRNA-Arg -genome_with_introns aragorn exon 22050 22123 . - . Parent=tRNAgenome_with_introns.6 -genome_with_introns aragorn gene 26588 26660 . - . ID=genegenome_with_introns.7;Name=trnM-CAU;product=tRNA-Met -genome_with_introns aragorn tRNA 26588 26660 . - . ID=tRNAgenome_with_introns.7;Parent=genegenome_with_introns.7;Name=trnM-CAU;product=tRNA-Met -genome_with_introns aragorn exon 26588 26660 . - . Parent=tRNAgenome_with_introns.7 -genome_with_introns aragorn gene 26850 27468 . + . ID=genegenome_with_introns.8;Name=trnY-AUA;product=tRNA-Tyr -genome_with_introns aragorn tRNA 26850 27468 . + . ID=tRNAgenome_with_introns.8;Parent=genegenome_with_introns.8;Name=trnY-AUA;product=tRNA-Tyr -genome_with_introns aragorn exon 26850 26884 . + . Parent=tRNAgenome_with_introns.8 -genome_with_introns aragorn intron 26885 27410 . + . Parent=tRNAgenome_with_introns.8 -genome_with_introns aragorn exon 27411 27468 . + . Parent=tRNAgenome_with_introns.8 -genome_with_introns aragorn gene 29759 29833 . + . ID=genegenome_with_introns.9;Name=trnH-GUG;product=tRNA-His -genome_with_introns aragorn tRNA 29759 29833 . + . ID=tRNAgenome_with_introns.9;Parent=genegenome_with_introns.9;Name=trnH-GUG;product=tRNA-His -genome_with_introns aragorn exon 29759 29833 . + . Parent=tRNAgenome_with_introns.9
--- a/test-data/aragorn_tansl-table-1_tmRNA_tRNA.fasta Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ ->1-1 tRNA-Ala(tgc) [381,453] -ggggatgtagctcatatggtagagcgctcgctttgcatgcgagaggcaca -gggttcgattccctgcatctcca
--- a/test-data/aragorn_tansl-table-1_tmRNA_tRNA.gff3 Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -##gff-version 3 -gi|240255695:23036500-23037000 aragorn tRNA 381 453 . + . ID=tRNAgi|240255695:23036500-23037000.1;Name=trnA-UGC;product=tRNA-Ala
--- a/test-data/aragorn_tansl-table-1_tmRNA_tRNA.txt Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ ------------------------------- -ARAGORN v1.2.36 Dean Laslett ------------------------------- - -Please reference the following paper if you use this -program as part of any published research. - -Laslett, D. and Canback, B. (2004) ARAGORN, a -program for the detection of transfer RNA and -transfer-messenger RNA genes in nucleotide sequences. -Nucleic Acids Research, 32;11-16. - - -Searching for tRNA genes with no introns -Searching for tmRNA genes -Assuming circular topology, search wraps around ends -Searching both strands -Using standard genetic code - - -gi|240255695:23036500-23037000 Arabidopsis thaliana chromosome 3, complete sequence -501 nucleotides in sequence -Mean G+C content = 43.1% - -1. - - - - a - g-c - g-c - g+t - g-c - a-t - t-a - g-c tt - t gtccc a - ta a !!!!! g - a ctcg caggg c - t !!!! a tt - g gagc c - gta g g - c-gag - t-a - c-g - g-c - c-g - t t - t a - tgc - - - - tRNA-Ala(tgc) - 73 bases, %GC = 56.2 - Sequence [381,453] - - - ->tRNA-Ala(tgc) [381,453] -ggggatgtagctcatatggtagagcgctcgctttgcatgcgagaggcaca -gggttcgattccctgcatctcca - - - - -Number of tmRNA genes = 0 - - -Configuration: aragorn /tmp/tmpx1qAPk/files/000/dataset_3.dat -gc1 -m -t -c -o /tmp/tmpx1qAPk/files/000/dataset_4.dat -fasta
--- a/test-data/tRNAscan_eukaryotic_infernal.fasta Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ ->gi|240255695:23036500-23037000 Arabidopsis thaliana chromosome 3, complete sequence Ala TGC 381 453 -GGGATGTAGCTCATATGGTAGAGCGCTCGCTTTGCATGCGAGAGGCACAGGGTTCGATTC -CCTGCATCTCCA
--- a/test-data/tRNAscan_eukaryotic_infernal.tabular Wed Jul 26 10:14:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -gi|240255695:23036500-23037000 1 381 453 Ala TGC 0 0 67.36
--- a/test-data/trna_arabidopsis.fasta Wed Jul 26 10:14:05 2017 -0400 +++ b/test-data/trna_arabidopsis.fasta Tue Oct 28 09:15:42 2025 +0000 @@ -7,4 +7,5 @@ ATTTAGTTAAAAAAGTGTTGAGAATCATTCGGGGATGTAGCTCATATGGTAGAGCGCTCGCTTTGCATGC GAGAGGCACAGGGTTCGATTCCCTGCATCTCCATTTTTATTTTCTTTTTTTTATAACTTTTGGTGAGCTT AATGGCCCAAT - +>dummy_seq +ACTGACTGATCGTAACTAGTAGCGACTGATCGATCGTACTGCATGGCATGTGACTGTCCGTGACTGTACG
