Mercurial > repos > iuc > exonerate
changeset 3:a03dead1bede draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/exonerate commit a141c63903d1a598569985e400125d4e7de42801"
author | iuc |
---|---|
date | Sun, 01 Mar 2020 04:48:34 -0500 |
parents | b03ae2ba8688 |
children | |
files | exonerate.xml exonerategff_to_gff3.py macros.xml test-data/coding2coding.gff test-data/coding2coding.gff3 test-data/est2genome.gff test-data/est2genome.gff3 test-data/est2genome_introns.gff test-data/est2genome_introns.gff3 test-data/out_query.gff test-data/out_query.gff3 test-data/out_target.gff test-data/out_target.gff3 test-data/protein2genome.gff test-data/protein2genome.gff3 |
diffstat | 15 files changed, 256 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/exonerate.xml Fri Jun 28 11:21:08 2019 -0400 +++ b/exonerate.xml Sun Mar 01 04:48:34 2020 -0500 @@ -1,4 +1,4 @@ -<tool id="exonerate" name="Exonerate" profile="16.04" version="@VERSION@+galaxy1"> +<tool id="exonerate" name="Exonerate" profile="16.04" version="@VERSION@+galaxy2"> <description>pairwise sequence comparison</description> <macros> <import>macros.xml</import> @@ -32,6 +32,16 @@ --querytype dna --targettype dna #end if + #if $minintron + --minintron ${minintron} + #end if + + #if $maxintron + --maxintron ${maxintron} + #end if + + --cores \${GALAXY_SLOTS:-1} + #if str($outformat) == "alignment" --showalignment yes --showvulgar no > '${output_ali}' #elif str($outformat) == "targetgff" @@ -40,7 +50,10 @@ --showalignment no --showvulgar no --showtargetgff no --showquerygff yes > '${output_gff}' #end if - --cores \${GALAXY_SLOTS:-1} + ## Produce a more standard GFF3 + #if str($outformat) in ["targetgff", "querygff"] + && python '$__tool_directory__/exonerategff_to_gff3.py' '${output_gff}' > '${output_gff3}' + #end if ]]></command> <inputs> @@ -83,9 +96,14 @@ <param name='score' type='integer' min="0" max="10000" value="100" label="Score threshold for gapped alignment"/> <param name='percent' type='float' min="0" max="100" value="0.0" label="Report alignment over a percentage of the maximum score attainable by each query"/> <param name='bestn' type='integer' min="0" max="10000" value="0" label="Report best N results per query (0 to report all)"/> + <param argument='--minintron' type='integer' optional="true" label="Minimum intron length limit" help="This is not a hard limit - it only affects size of introns which are sought during heuristic alignment."/> + <param argument='--maxintron' type='integer' optional="true" label="Maximum intron length limit" help="This is not a hard limit - it only affects size of introns which are sought during heuristic alignment."/> </inputs> <outputs> - <data name="output_gff" format="gff" label="${tool.name} on $on_string"> + <data name="output_gff" format="gff" label="${tool.name} on $on_string - Raw GFF"> + <filter>outformat != 'alignment'</filter> + </data> + <data name="output_gff3" format="gff3" label="${tool.name} on $on_string - GFF3"> <filter>outformat != 'alignment'</filter> </data> <data name="output_ali" format="txt" label="${tool.name} on $on_string"> @@ -101,6 +119,7 @@ </conditional> <param name="outformat" value="targetgff"/> <output name="output_gff" file="out_target.gff" lines_diff="8"/> + <output name="output_gff3" file="out_target.gff3" lines_diff="8"/> </test> <test> <param name="query" value="genome.fa"/> @@ -110,6 +129,7 @@ </conditional> <param name="outformat" value="targetgff"/> <output name="output_gff" file="out_target.gff" lines_diff="8"/> + <output name="output_gff3" file="out_target.gff3" lines_diff="8"/> </test> <test> <param name="query" value="genome.fa"/> @@ -119,6 +139,7 @@ </conditional> <param name="outformat" value="querygff"/> <output name="output_gff" file="out_query.gff" lines_diff="8"/> + <output name="output_gff3" file="out_query.gff3" lines_diff="8"/> </test> <test> <param name="query" value="genome.fa"/> @@ -138,6 +159,7 @@ <param name="model" value="est2genome"/> <param name="outformat" value="targetgff"/> <output name="output_gff" file="est2genome.gff" lines_diff="4"/> + <output name="output_gff3" file="est2genome.gff3" lines_diff="4"/> </test> <test> <param name="query" value="proteome.fa"/> @@ -148,6 +170,7 @@ <param name="model" value="protein2genome"/> <param name="outformat" value="targetgff"/> <output name="output_gff" file="protein2genome.gff" lines_diff="2"/> + <output name="output_gff3" file="protein2genome.gff3" lines_diff="2"/> </test> <test> <param name="query" value="genome.fa"/> @@ -158,6 +181,20 @@ <param name="model" value="coding2coding"/> <param name="outformat" value="targetgff"/> <output name="output_gff" file="coding2coding.gff" lines_diff="4"/> + <output name="output_gff3" file="coding2coding.gff3" lines_diff="4"/> + </test> + <test> + <param name="query" value="transcriptome.fa"/> + <conditional name="ref_seq"> + <param name="ref_seq_selector" value="personal"/> + <param name="input_fasta" value="genome.fa"/> + </conditional> + <param name="model" value="est2genome"/> + <param name="outformat" value="targetgff"/> + <param name="minintron" value="100"/> + <param name="maxintron" value="200"/> + <output name="output_gff" file="est2genome_introns.gff" lines_diff="6"/> + <output name="output_gff3" file="est2genome_introns.gff3" lines_diff="4"/> </test> </tests> <help><![CDATA[
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/exonerategff_to_gff3.py Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +""" +Converts a GFF produced by exonerate into a more standard GFF3 (e.g. usable in JBrowse) +""" + +import argparse +import sys + +from BCBio import GFF +from Bio.SeqFeature import FeatureLocation, SeqFeature + +parser = argparse.ArgumentParser() +parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) +parser.add_argument('outfile', nargs='?', type=argparse.FileType('a'), default=sys.stdout) +args = parser.parse_args() + + +scaffs = [] +gene_number = 0 +for scaff in GFF.parse(args.infile): + scaff.annotations = {} + scaff.seq = "" + kept_features = [] + current_gene = None + exon_number = 0 + last_utr = None + + for feature in scaff.features: + + if feature.type == "gene": + gene_number += 1 + mrna_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="mRNA", strand=feature.location.strand) + mrna_feature.sub_features = [] + mrna_feature.qualifiers['source'] = feature.qualifiers['source'] + mrna_id = "mRNA_" + str(gene_number) + mrna_feature.qualifiers['ID'] = mrna_id + feature.sub_features = [mrna_feature] + feature.qualifiers['ID'] = "gene_" + str(gene_number) + if 'gene_orientation' in feature.qualifiers: + del feature.qualifiers['gene_orientation'] + if current_gene: + kept_features.append(current_gene) + + current_gene = feature + exon_number = 0 + last_utr = None + + elif feature.type == 'utr5': + feature.type = 'five_prime_UTR' + feature.qualifiers['ID'] = '%s_five_prime_UTR' % (mrna_id) + mrna_feature.sub_features.append(feature) + last_utr = {'start': feature.location.start, 'end': feature.location.end} + + elif feature.type == 'utr3': + feature.type = 'three_prime_UTR' + feature.qualifiers['ID'] = '%s_three_prime_UTR' % (mrna_id) + mrna_feature.sub_features.append(feature) + last_utr = {'start': feature.location.start, 'end': feature.location.end} + + elif feature.type == 'exon': + exon_number += 1 + feature.qualifiers['ID'] = '%s_exon_%s' % (mrna_id, exon_number) + mrna_feature.sub_features.append(feature) + + if last_utr is None: + cds_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="CDS", strand=feature.location.strand) + cds_feature.sub_features = [] + cds_feature.qualifiers['source'] = feature.qualifiers['source'] + cds_feature.qualifiers['ID'] = mrna_id + "_CDS" + mrna_feature.sub_features.append(cds_feature) + elif feature.location.start != last_utr['start'] or feature.location.end != last_utr['end']: + if feature.location.start > last_utr['start']: + cds_feature = SeqFeature(FeatureLocation(feature.location.start, last_utr['start']), type="CDS", strand=feature.location.strand) + cds_feature.sub_features = [] + cds_feature.qualifiers['source'] = feature.qualifiers['source'] + cds_feature.qualifiers['ID'] = mrna_id + "_CDS" + mrna_feature.sub_features.append(cds_feature) + if feature.location.end < last_utr['end']: + cds_feature = SeqFeature(FeatureLocation(feature.location.end, last_utr['end']), type="CDS", strand=feature.location.strand) + cds_feature.sub_features = [] + cds_feature.qualifiers['source'] = feature.qualifiers['source'] + cds_feature.qualifiers['ID'] = mrna_id + "_CDS" + mrna_feature.sub_features.append(cds_feature) + + last_utr = None + + elif feature.type == 'similarity': + if current_gene is None: + # We haven't seen any gene, just convert similarity to match + feature.type = 'match' + kept_features.append(feature) + + last_utr = None + + elif feature.type not in ['splice3', 'splice5', 'similarity', 'intron']: + mrna_feature.sub_features.append(feature) + last_utr = None + + # For the last one + if current_gene: + kept_features.append(current_gene) + + scaff.features = kept_features + + if len(kept_features): + GFF.write([scaff], args.outfile)
--- a/macros.xml Fri Jun 28 11:21:08 2019 -0400 +++ b/macros.xml Sun Mar 01 04:48:34 2020 -0500 @@ -3,6 +3,8 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">exonerate</requirement> + <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="0.6.6">bcbiogff</requirement> <yield /> </requirements> </xml>
--- a/test-data/coding2coding.gff Fri Jun 28 11:21:08 2019 -0400 +++ b/test-data/coding2coding.gff Sun Mar 01 04:48:34 2020 -0500 @@ -3,7 +3,7 @@ # ##gff-version 2 ##source-version exonerate:coding2coding 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -17,7 +17,7 @@ # ##gff-version 2 ##source-version exonerate:coding2coding 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # #
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/coding2coding.gff3 Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,3 @@ +##gff-version 3 +sample exonerate:coding2coding match 1 3536 6207 + . Align=1 1 2463,2466 2464 3,2469 2469 1068;Query=sample;alignment_id=1 +sample exonerate:coding2coding match 2 3535 6151 - . Align=3536 3536 3534;Query=sample;alignment_id=1
--- a/test-data/est2genome.gff Fri Jun 28 11:21:08 2019 -0400 +++ b/test-data/est2genome.gff Sun Mar 01 04:48:34 2020 -0500 @@ -3,7 +3,7 @@ # ##gff-version 2 ##source-version exonerate:est2genome 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -19,7 +19,7 @@ # ##gff-version 2 ##source-version exonerate:est2genome 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # #
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/est2genome.gff3 Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,11 @@ +##gff-version 3 +sample exonerate:est2genome gene 2742 2819 102 - . ID=gene_1;gene_id=1;identity=58.97;sequence=sample;similarity=58.97 +sample exonerate:est2genome mRNA 2742 2819 . - . ID=mRNA_1;Parent=gene_1 +sample exonerate:est2genome exon 2742 2819 . - . ID=mRNA_1_exon_1;Parent=mRNA_1;deletions=0;identity=58.97;insertions=0;similarity=58.97 +sample exonerate:est2genome CDS 2742 2819 . - 0 ID=mRNA_1_CDS;Parent=mRNA_1 +sample exonerate:est2genome gene 758 3050 7309 + . ID=gene_2;gene_id=1;identity=99.93;sequence=sample;similarity=99.93 +sample exonerate:est2genome mRNA 758 3050 . + . ID=mRNA_2;Parent=gene_2 +sample exonerate:est2genome five_prime_UTR 758 1332 . + . ID=mRNA_2_five_prime_UTR;Parent=mRNA_2 +sample exonerate:est2genome exon 758 1332 . + . ID=mRNA_2_exon_1;Parent=mRNA_2;deletions=2;identity=99.83;insertions=0;similarity=99.83 +sample exonerate:est2genome exon 2152 3050 . + . ID=mRNA_2_exon_2;Parent=mRNA_2;deletions=0;identity=100.00;insertions=0;similarity=100.00 +sample exonerate:est2genome CDS 2152 3050 . + 0 ID=mRNA_2_CDS;Parent=mRNA_2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/est2genome_introns.gff Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,48 @@ +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:est2genome 2.4.0 +##date 2020-02-26 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:est2genome gene 2742 2819 102 - . gene_id 1 ; sequence sample ; gene_orientation . ; identity 58.97 ; similarity 58.97 +sample exonerate:est2genome exon 2742 2819 . - . insertions 0 ; deletions 0 ; identity 58.97 ; similarity 58.97 +sample exonerate:est2genome similarity 2742 2819 102 - . alignment_id 1 ; Query sample ; Align 2820 1168 78 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:est2genome 2.4.0 +##date 2020-02-26 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:est2genome gene 2146 3050 4525 + . gene_id 1 ; sequence sample ; gene_orientation . ; identity 100.00 ; similarity 100.00 +sample exonerate:est2genome exon 2146 3050 . + . insertions 0 ; deletions 0 ; identity 100.00 ; similarity 100.00 +sample exonerate:est2genome similarity 2146 3050 4525 + . alignment_id 1 ; Query sample ; Align 2146 572 905 +# --- END OF GFF DUMP --- +# +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:est2genome 2.4.0 +##date 2020-02-26 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +sample exonerate:est2genome gene 758 1357 2861 + . gene_id 2 ; sequence sample ; gene_orientation . ; identity 98.33 ; similarity 98.33 +sample exonerate:est2genome exon 758 1357 . + . insertions 1 ; deletions 4 ; identity 98.33 ; similarity 98.33 +sample exonerate:est2genome similarity 758 1357 2861 + . alignment_id 2 ; Query sample ; Align 758 1 572 ; Align 1330 575 14 ; Align 1345 589 8 ; Align 1353 599 5 +# --- END OF GFF DUMP --- +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/est2genome_introns.gff3 Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,13 @@ +##gff-version 3 +sample exonerate:est2genome gene 2742 2819 102 - . ID=gene_1;gene_id=1;identity=58.97;sequence=sample;similarity=58.97 +sample exonerate:est2genome mRNA 2742 2819 . - . ID=mRNA_1;Parent=gene_1 +sample exonerate:est2genome exon 2742 2819 . - . ID=mRNA_1_exon_1;Parent=mRNA_1;deletions=0;identity=58.97;insertions=0;similarity=58.97 +sample exonerate:est2genome CDS 2742 2819 . - 0 ID=mRNA_1_CDS;Parent=mRNA_1 +sample exonerate:est2genome gene 2146 3050 4525 + . ID=gene_2;gene_id=1;identity=100.00;sequence=sample;similarity=100.00 +sample exonerate:est2genome mRNA 2146 3050 . + . ID=mRNA_2;Parent=gene_2 +sample exonerate:est2genome exon 2146 3050 . + . ID=mRNA_2_exon_1;Parent=mRNA_2;deletions=0;identity=100.00;insertions=0;similarity=100.00 +sample exonerate:est2genome CDS 2146 3050 . + 0 ID=mRNA_2_CDS;Parent=mRNA_2 +sample exonerate:est2genome gene 758 1357 2861 + . ID=gene_3;gene_id=2;identity=98.33;sequence=sample;similarity=98.33 +sample exonerate:est2genome mRNA 758 1357 . + . ID=mRNA_3;Parent=gene_3 +sample exonerate:est2genome exon 758 1357 . + . ID=mRNA_3_exon_1;Parent=mRNA_3;deletions=4;identity=98.33;insertions=1;similarity=98.33 +sample exonerate:est2genome CDS 758 1357 . + 0 ID=mRNA_3_CDS;Parent=mRNA_3
--- a/test-data/out_query.gff Fri Jun 28 11:21:08 2019 -0400 +++ b/test-data/out_query.gff Sun Mar 01 04:48:34 2020 -0500 @@ -3,7 +3,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -17,7 +17,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -31,7 +31,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -45,7 +45,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # #
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_query.gff3 Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,5 @@ +##gff-version 3 +sample exonerate:ungapped:dna2dna match 1 3536 17680 + . Align=1 1 3536;Target=sample;alignment_id=1 +sample exonerate:ungapped:dna2dna match 608 634 108 + . Align=608 596 27;Target=sample;alignment_id=2 +sample exonerate:ungapped:dna2dna match 596 622 108 + . Align=596 608 27;Target=sample;alignment_id=3 +sample exonerate:ungapped:dna2dna match 2742 2819 102 + . Align=2742 2820 78;Target=sample;alignment_id=1
--- a/test-data/out_target.gff Fri Jun 28 11:21:08 2019 -0400 +++ b/test-data/out_target.gff Sun Mar 01 04:48:34 2020 -0500 @@ -3,7 +3,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -17,7 +17,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -31,7 +31,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # # @@ -45,7 +45,7 @@ # ##gff-version 2 ##source-version exonerate:ungapped:dna2dna 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # #
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_target.gff3 Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,5 @@ +##gff-version 3 +sample exonerate:ungapped:dna2dna match 1 3536 17680 + . Align=1 1 3536;Query=sample;alignment_id=1 +sample exonerate:ungapped:dna2dna match 596 622 108 + . Align=596 608 27;Query=sample;alignment_id=2 +sample exonerate:ungapped:dna2dna match 608 634 108 + . Align=608 596 27;Query=sample;alignment_id=3 +sample exonerate:ungapped:dna2dna match 2742 2819 102 - . Align=2820 2742 78;Query=sample;alignment_id=1
--- a/test-data/protein2genome.gff Fri Jun 28 11:21:08 2019 -0400 +++ b/test-data/protein2genome.gff Sun Mar 01 04:48:34 2020 -0500 @@ -3,7 +3,7 @@ # ##gff-version 2 ##source-version exonerate:protein2genome:local 2.4.0 -##date 2018-08-02 +##date 2020-02-26 ##type DNA # #
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein2genome.gff3 Sun Mar 01 04:48:34 2020 -0500 @@ -0,0 +1,9 @@ +##gff-version 3 +sample exonerate:protein2genome:local gene 1181 2291 456 + . ID=gene_1;gene_id=1;identity=98.97;sequence=sample;similarity=100.00 +sample exonerate:protein2genome:local mRNA 1181 2291 . + . ID=mRNA_1;Parent=gene_1 +sample exonerate:protein2genome:local cds 1181 1326 . + . Parent=mRNA_1 +sample exonerate:protein2genome:local exon 1181 1326 . + . ID=mRNA_1_exon_1;Parent=mRNA_1;deletions=0;identity=100.00;insertions=0;similarity=100.00 +sample exonerate:protein2genome:local CDS 1181 1326 . + 0 ID=mRNA_1_CDS;Parent=mRNA_1 +sample exonerate:protein2genome:local cds 2144 2291 . + . Parent=mRNA_1 +sample exonerate:protein2genome:local exon 2144 2291 . + . ID=mRNA_1_exon_2;Parent=mRNA_1;deletions=0;identity=97.96;insertions=0;similarity=100.00 +sample exonerate:protein2genome:local CDS 2144 2291 . + 0 ID=mRNA_1_CDS;Parent=mRNA_1