# HG changeset patch # User crs4 # Date 1382705991 14400 # Node ID 4bafaa074484a4c590e286d80d90be0ba8ac112b # Parent 4b6f16a79fe4a3bfe31f439a4cc942bbbe3966d5 Merge with Lionel Guy's wrapper. Directly call prokka, remove prokka.py . Add locustag, increment, gffver, compliant, addgenes, genus, species, strain, plasmid, gcode, usegenus, metagenome, fast, evalue, norrna, notrna params. Update BLAST+ dependency to 2.2.28. Add dependencies on prodigal and barrnap. Add readme.rst . diff -r 4b6f16a79fe4 -r 4bafaa074484 COPYING --- a/COPYING Thu Sep 26 12:39:52 2013 -0400 +++ b/COPYING Fri Oct 25 08:59:51 2013 -0400 @@ -1,7 +1,9 @@ Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Copyright © 2013 Lionel Guy Created by: Paolo Uva Nicola Soranzo +Lionel Guy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff -r 4b6f16a79fe4 -r 4bafaa074484 prokka.py --- a/prokka.py Thu Sep 26 12:39:52 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Wrapper for Prokka - Prokaryotic annotation tool -Author: Paolo Uva paolo dot uva at crs4 dot it -Date: February 14, 2013 -Update: March 14, 2013 - Added more options -""" - -import optparse -import shutil -import subprocess -import sys - - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option('--cpus', dest='cpus', type='int', help='Number of CPUs to use [0=all]') - parser.add_option('--fasta', dest='fasta', help='FASTA file with contigs') - parser.add_option('--kingdom', dest='kingdom', choices=['Archaea', 'Bacteria', 'Viruses'], default='Bacteria', help='Kingdom') - parser.add_option('--mincontig', dest='mincontig', type='int', help='Minimun contig size') - parser.add_option('--rfam', action="store_true", dest="rfam", help="Enable searching for ncRNAs") - parser.add_option('--centre', dest="centre", default="CRS4", help="Sequencing centre") - parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations") - parser.add_option('--gbk', dest="gbk", help="This is a standard GenBank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence") - parser.add_option('--fna', dest="fna", help="Nucleotide FASTA file of the input contig sequences") - parser.add_option('--faa', dest="faa", help="Protein FASTA file of the translated CDS sequences") - parser.add_option('--ffn', dest="ffn", help="Nucleotide FASTA file of all the annotated sequences, not just CDS") - parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication, etc.") - parser.add_option('--fsa', dest="fsa", help="Nucleotide FASTA file of the input contig sequences, used by tbl2asn to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines") - parser.add_option('--tbl', dest="tbl", help="Feature Table file, used by tbl2asn to create the .sqn file") - parser.add_option('--err', dest="err", help="Unacceptable annotations - the NCBI discrepancy report") - parser.add_option('--txt', dest='txt', help='Statistics relating to the annotated features found') - parser.add_option('--log', dest="log", help="Contains all the output that Prokka produced during its run") - (options, args) = parser.parse_args() - if len(args) > 0: - parser.error('Wrong number of arguments') - - # Build command - cpus = "--cpus %d" % (options.cpus) if options.cpus is not None else '' - rfam = '--rfam' if options.rfam else '' - mincontig = "--mincontig %d" % options.mincontig if options.mincontig is not None else '' - - cl = "prokka --force --outdir . --prefix prokka --kingdom %s %s --centre %s %s %s %s" % (options.kingdom, mincontig, options.centre, rfam, cpus, options.fasta) - print '\nProkka command to be executed:\n %s' % cl - - # Run command - log = open(options.log, 'w') if options.log else sys.stdout - try: - subprocess.check_call(cl, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because prokka writes many logging info there - finally: - if log != sys.stdout: - log.close() - - # Rename output files - suffix = ['gff', 'gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'txt'] - for s in suffix: - shutil.move('prokka.' + s, getattr(options, s)) - -if __name__ == "__main__": - __main__() diff -r 4b6f16a79fe4 -r 4bafaa074484 prokka.xml --- a/prokka.xml Thu Sep 26 12:39:52 2013 -0400 +++ b/prokka.xml Fri Oct 25 08:59:51 2013 -0400 @@ -1,76 +1,156 @@ - - Prokaryotic Annotation + + prokaryotic genome annotation - blast+ + blast+ hmmer aragorn + prodigal + tbl2asn + parallel + barrnap infernal prokka prokka --version - - prokka.py + + prokka \${PROKKA_SITE_OPTIONS:---cpus 8} - ## Reads in FASTA format - --fasta=$fasta_file - ## Additional inputs - --kingdom $kingdom_type.kingdom - #if str($mincontig) - --mincontig $mincontig + --quiet ## to avoid non-error messages written to stderr + --outdir outdir --prefix prokka ## used in outputs section + #if $locustag + --locustag "$locustag" + #end if + #if str($increment) + --increment $increment #end if - #if $rfam - --rfam + --gffver $gffver + #if $compliant.compliant_select == "no" + #if $compliant.addgenes + --addgenes + #end if + #if str($compliant.mincontig) + --mincontig $compliant.mincontig + #end if + #else + --compliant #end if #if $centre --centre "$centre" #end if - ## Output files - --gff=$out_gff - --gbk=$out_gbk - --fna=$out_fna - --faa=$out_faa - --ffn=$out_ffn - --sqn=$out_sqn - --fsa=$out_fsa - --tbl=$out_tbl - --err=$out_err - --txt=$out_txt - --log=$out_log + #if $genus + --genus "$genus" + #end if + #if $species + --species "$species" + #end if + #if $strain + --strain "$strain" + #end if + #if $plasmid + --plasmid "$plasmid" + #end if + --kingdom $kingdom.kingdom_select + #if str($kingdom.gcode) + --gcode $kingdom.gcode + #end if + #if $usegenus + --usegenus + #end if + #if $metagenome + --metagenome + #end if + #if $fast + --fast + #end if + #if str($evalue) + --evalue $evalue + #end if + #if $rfam + --rfam + #end if + #if $norrna + --norrna + #end if + #if $notrna + --notrna + #end if + $input - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + + + + + + + + + + - - - + + + + + + + + + - - - - - - - - - - - + + + + + + + + + + + - **What it does** -Prokka_ is a software tool to annotate bacterial, archaeal and viral genomes very rapidly, and produce output files that require only minor tweaking to submit to GenBank/ENA/DDBJ. +Prokka_ is a software tool to rapidly annotate bacterial, archaeal and viral genomes, and produce output files that require only minor tweaking to submit to GenBank/ENA/DDBJ. .. _Prokka: http://www.vicbioinformatics.com/software.prokka.shtml @@ -79,7 +159,7 @@ Prokka creates several output files: gff - This is the master annotation in GFF3 format, containing both sequences and annotations + This is the master annotation in GFF format, containing both sequences and annotations gbk This is a standard GenBank file derived from the master .gff . If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence fna @@ -103,7 +183,7 @@ **License and citation** -This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_, Lionel Guy and is released under the `MIT license`_. .. _CRS4 Srl.: http://www.crs4.it/ .. _MIT license: http://opensource.org/licenses/MIT diff -r 4b6f16a79fe4 -r 4bafaa074484 readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Fri Oct 25 08:59:51 2013 -0400 @@ -0,0 +1,35 @@ +Prokka wrapper +============== + +Warning +------- + +Prokka includes custom databases and is thus about a 2.0 GB download! + +Dependencies of Prokka which needs to be installed separately +------------------------------------------------------------- + +- Perl core modules: File\::Copy, FindBin, Getopt::Long, List::Util, Scalar::Util, Time::Piece, Time::Seconds; +- Perl modules: Bio::SeqIO from BioPerl_ >= 1.6.900, `XML::Simple`_; +- `GNU Parallel`_ >= 20130422 is required, but is shipped with Prokka and thus is not managed by the tool dependency system; +- tbl2asn_ >= 21.0 is required. This dependency is not managed here since versions are increasing very rapidly; +- SignalP_ >= 3.0 is an optional dependency to find signal peptides. For licensing reasons, it is not used in the tool wrapper. + +.. _BioPerl: http://search.cpan.org/dist/BioPerl/ +.. _XML::Simple: http://search.cpan.org/dist/XML-Simple/ +.. _GNU Parallel: http://www.gnu.org/software/parallel/ +.. _tbl2asn: http://www.ncbi.nlm.nih.gov/genbank/tbl2asn2/ +.. _SignalP: http://www.cbs.dtu.dk/services/SignalP/ + +Configuration +------------- + +Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus). + +Version history +--------------- + +- v0.1 (LG): initial release in the toolshed, supports Prokka 1.6. +- v0.2 (LG): added this readme file, supports Prokka 1.7, and adds dependencies management. +- v1.1.0: merge the wrappers by CRS4 and Lionel Guy, add COPYING file with MIT license, make all params optional, add gffver param, correctly quote text params in command, use float type for 'evalue' param, describe output files in help, upgrade BLAST+ dependency to version 2.2.28, depend on package_aragorn_1_2_36 instead of trna_prediction, depend on package_prodigal_2_60 instead of prodigal, depend on package_barrnap_0_2 instead of barrnap, add PROKKA_SITE_OPTIONS to env.sh and remove 'cpus' param. + diff -r 4b6f16a79fe4 -r 4bafaa074484 tool_dependencies.xml --- a/tool_dependencies.xml Thu Sep 26 12:39:52 2013 -0400 +++ b/tool_dependencies.xml Fri Oct 25 08:59:51 2013 -0400 @@ -1,7 +1,7 @@ - - + + @@ -9,13 +9,19 @@ + + + + + + - http://www.vicbioinformatics.com/prokka-1.7.tar.gz + http://www.vicbioinformatics.com/prokka-1.7.tar.gz . $INSTALL_DIR @@ -29,6 +35,8 @@ +Warning: Prokka includes custom databases and is thus about a 2.0 GB download! + Dependencies of Prokka which needs to be installed separately: - Perl core modules: File::Copy, FindBin, Getopt::Long, List::Util, Scalar::Util, Time::Piece, Time::Seconds; - Perl modules: Bio::SeqIO from BioPerl ( http://search.cpan.org/dist/BioPerl/ ) >= 1.6.900, XML::Simple ( http://search.cpan.org/dist/XML-Simple/ ); @@ -37,9 +45,7 @@ - GNU Parallel ( http://www.gnu.org/software/parallel/ ) >= 20130422 ; - Barrnap ( http://www.vicbioinformatics.com/software.barrnap.shtml ) >= 0.1 . -Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus). - -Note: Prokka is about a 2.0 GB download due to included custom databases. +Configuration: Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus).