annotate prokka.py @ 0:95505a9fa26f draft

Uploaded
author crs4
date Tue, 10 Sep 2013 13:11:26 -0400
parents
children 4b6f16a79fe4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
1 # -*- coding: utf-8 -*-
95505a9fa26f Uploaded
crs4
parents:
diff changeset
2 """
95505a9fa26f Uploaded
crs4
parents:
diff changeset
3 Wrapper for Prokka - Prokaryotic annotation tool
95505a9fa26f Uploaded
crs4
parents:
diff changeset
4 Author: Paolo Uva paolo dot uva at crs4 dot it
95505a9fa26f Uploaded
crs4
parents:
diff changeset
5 Date: February 14, 2013
95505a9fa26f Uploaded
crs4
parents:
diff changeset
6 Update: March 14, 2013 - Added more options
95505a9fa26f Uploaded
crs4
parents:
diff changeset
7 """
95505a9fa26f Uploaded
crs4
parents:
diff changeset
8
95505a9fa26f Uploaded
crs4
parents:
diff changeset
9 import optparse
95505a9fa26f Uploaded
crs4
parents:
diff changeset
10 import shutil
95505a9fa26f Uploaded
crs4
parents:
diff changeset
11 import subprocess
95505a9fa26f Uploaded
crs4
parents:
diff changeset
12 import sys
95505a9fa26f Uploaded
crs4
parents:
diff changeset
13
95505a9fa26f Uploaded
crs4
parents:
diff changeset
14
95505a9fa26f Uploaded
crs4
parents:
diff changeset
15 def __main__():
95505a9fa26f Uploaded
crs4
parents:
diff changeset
16 #Parse Command Line
95505a9fa26f Uploaded
crs4
parents:
diff changeset
17 parser = optparse.OptionParser()
95505a9fa26f Uploaded
crs4
parents:
diff changeset
18 parser.add_option('--cpus', dest='cpus', type='int', help='Number of CPUs to use [0=all]')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
19 parser.add_option('--fasta', dest='fasta', help='FASTA file with contigs')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
20 parser.add_option('--kingdom', dest='kingdom', choices=['Archaea', 'Bacteria', 'Viruses'], default='Bacteria', help='Kingdom')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
21 parser.add_option('--mincontig', dest='mincontig', type='int', help='Minimun contig size')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
22 parser.add_option('--rfam', action="store_true", dest="rfam", help="Enable searching for ncRNAs")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
23 parser.add_option('--centre', dest="centre", default="CRS4", help="Sequencing centre")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
24 parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations. It can be viewed directly in Artemis or IGV")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
25 parser.add_option('--gbk', dest="gbk", help="This is a standard Genbank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-Genbank, with one record for each sequence")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
26 parser.add_option('--fna', dest="fna", help="Nucleotide FASTA file of the input contig sequences")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
27 parser.add_option('--faa', dest="faa", help="Protein FASTA file of the translated CDS sequences")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
28 parser.add_option('--ffn', dest="ffn", help="Nucleotide FASTA file of all the annotated sequences, not just CDS")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
29 parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to Genbank. It needs to be edited to set the correct taxonomy, authors, related publication etc")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
30 parser.add_option('--fsa', dest="fsa", help="Nucleotide FASTA file of the input contig sequences, used by tbl2asn to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
31 parser.add_option('--tbl', dest="tbl", help="Feature Table file, used by tbl2asn to create the .sqn file")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
32 parser.add_option('--err', dest="err", help="Unacceptable annotations - the NCBI discrepancy report")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
33 parser.add_option('--log', dest="log", help="Contains all the output that Prokka produced during its run")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
34 (options, args) = parser.parse_args()
95505a9fa26f Uploaded
crs4
parents:
diff changeset
35 if len(args) > 0:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
36 parser.error('Wrong number of arguments')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
37
95505a9fa26f Uploaded
crs4
parents:
diff changeset
38 # Build command
95505a9fa26f Uploaded
crs4
parents:
diff changeset
39 cpus = "--cpus %d" % (options.cpus) if options.cpus is not None else ''
95505a9fa26f Uploaded
crs4
parents:
diff changeset
40 rfam = '--rfam' if options.rfam else ''
95505a9fa26f Uploaded
crs4
parents:
diff changeset
41 mincontig = "--mincontig %d" % options.mincontig if options.mincontig is not None else ''
95505a9fa26f Uploaded
crs4
parents:
diff changeset
42
95505a9fa26f Uploaded
crs4
parents:
diff changeset
43 cl = "prokka --force --outdir . --prefix prokka --kingdom %s %s --centre %s %s %s %s" % (options.kingdom, mincontig, options.centre, rfam, cpus, options.fasta)
95505a9fa26f Uploaded
crs4
parents:
diff changeset
44 print '\nProkka command to be executed: \n %s' % cl
95505a9fa26f Uploaded
crs4
parents:
diff changeset
45
95505a9fa26f Uploaded
crs4
parents:
diff changeset
46 # Run command
95505a9fa26f Uploaded
crs4
parents:
diff changeset
47 log = open(options.log, 'w') if options.log else sys.stdout
95505a9fa26f Uploaded
crs4
parents:
diff changeset
48 try:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
49 subprocess.check_call(cl, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because prokka writes many logging info there
95505a9fa26f Uploaded
crs4
parents:
diff changeset
50 finally:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
51 if log != sys.stdout:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
52 log.close()
95505a9fa26f Uploaded
crs4
parents:
diff changeset
53
95505a9fa26f Uploaded
crs4
parents:
diff changeset
54 # Rename output files
95505a9fa26f Uploaded
crs4
parents:
diff changeset
55 suffix = ['gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'gff']
95505a9fa26f Uploaded
crs4
parents:
diff changeset
56 for s in suffix:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
57 shutil.move( 'prokka.' + s, getattr(options, s))
95505a9fa26f Uploaded
crs4
parents:
diff changeset
58
95505a9fa26f Uploaded
crs4
parents:
diff changeset
59 if __name__ == "__main__":
95505a9fa26f Uploaded
crs4
parents:
diff changeset
60 __main__()