annotate prokka.py @ 1:4b6f16a79fe4 draft

Add txt output file. Use a definition list instead of a block quote in <help>. Correct 2 dependency minimum versions.
author crs4
date Thu, 26 Sep 2013 12:39:52 -0400
parents 95505a9fa26f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
1 # -*- coding: utf-8 -*-
95505a9fa26f Uploaded
crs4
parents:
diff changeset
2 """
95505a9fa26f Uploaded
crs4
parents:
diff changeset
3 Wrapper for Prokka - Prokaryotic annotation tool
95505a9fa26f Uploaded
crs4
parents:
diff changeset
4 Author: Paolo Uva paolo dot uva at crs4 dot it
95505a9fa26f Uploaded
crs4
parents:
diff changeset
5 Date: February 14, 2013
95505a9fa26f Uploaded
crs4
parents:
diff changeset
6 Update: March 14, 2013 - Added more options
95505a9fa26f Uploaded
crs4
parents:
diff changeset
7 """
95505a9fa26f Uploaded
crs4
parents:
diff changeset
8
95505a9fa26f Uploaded
crs4
parents:
diff changeset
9 import optparse
95505a9fa26f Uploaded
crs4
parents:
diff changeset
10 import shutil
95505a9fa26f Uploaded
crs4
parents:
diff changeset
11 import subprocess
95505a9fa26f Uploaded
crs4
parents:
diff changeset
12 import sys
95505a9fa26f Uploaded
crs4
parents:
diff changeset
13
95505a9fa26f Uploaded
crs4
parents:
diff changeset
14
95505a9fa26f Uploaded
crs4
parents:
diff changeset
15 def __main__():
95505a9fa26f Uploaded
crs4
parents:
diff changeset
16 #Parse Command Line
95505a9fa26f Uploaded
crs4
parents:
diff changeset
17 parser = optparse.OptionParser()
95505a9fa26f Uploaded
crs4
parents:
diff changeset
18 parser.add_option('--cpus', dest='cpus', type='int', help='Number of CPUs to use [0=all]')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
19 parser.add_option('--fasta', dest='fasta', help='FASTA file with contigs')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
20 parser.add_option('--kingdom', dest='kingdom', choices=['Archaea', 'Bacteria', 'Viruses'], default='Bacteria', help='Kingdom')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
21 parser.add_option('--mincontig', dest='mincontig', type='int', help='Minimun contig size')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
22 parser.add_option('--rfam', action="store_true", dest="rfam", help="Enable searching for ncRNAs")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
23 parser.add_option('--centre', dest="centre", default="CRS4", help="Sequencing centre")
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
24 parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations")
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
25 parser.add_option('--gbk', dest="gbk", help="This is a standard GenBank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence")
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
26 parser.add_option('--fna', dest="fna", help="Nucleotide FASTA file of the input contig sequences")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
27 parser.add_option('--faa', dest="faa", help="Protein FASTA file of the translated CDS sequences")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
28 parser.add_option('--ffn', dest="ffn", help="Nucleotide FASTA file of all the annotated sequences, not just CDS")
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
29 parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication, etc.")
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
30 parser.add_option('--fsa', dest="fsa", help="Nucleotide FASTA file of the input contig sequences, used by tbl2asn to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
31 parser.add_option('--tbl', dest="tbl", help="Feature Table file, used by tbl2asn to create the .sqn file")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
32 parser.add_option('--err', dest="err", help="Unacceptable annotations - the NCBI discrepancy report")
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
33 parser.add_option('--txt', dest='txt', help='Statistics relating to the annotated features found')
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
34 parser.add_option('--log', dest="log", help="Contains all the output that Prokka produced during its run")
95505a9fa26f Uploaded
crs4
parents:
diff changeset
35 (options, args) = parser.parse_args()
95505a9fa26f Uploaded
crs4
parents:
diff changeset
36 if len(args) > 0:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
37 parser.error('Wrong number of arguments')
95505a9fa26f Uploaded
crs4
parents:
diff changeset
38
95505a9fa26f Uploaded
crs4
parents:
diff changeset
39 # Build command
95505a9fa26f Uploaded
crs4
parents:
diff changeset
40 cpus = "--cpus %d" % (options.cpus) if options.cpus is not None else ''
95505a9fa26f Uploaded
crs4
parents:
diff changeset
41 rfam = '--rfam' if options.rfam else ''
95505a9fa26f Uploaded
crs4
parents:
diff changeset
42 mincontig = "--mincontig %d" % options.mincontig if options.mincontig is not None else ''
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
43
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
44 cl = "prokka --force --outdir . --prefix prokka --kingdom %s %s --centre %s %s %s %s" % (options.kingdom, mincontig, options.centre, rfam, cpus, options.fasta)
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
45 print '\nProkka command to be executed:\n %s' % cl
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
46
95505a9fa26f Uploaded
crs4
parents:
diff changeset
47 # Run command
95505a9fa26f Uploaded
crs4
parents:
diff changeset
48 log = open(options.log, 'w') if options.log else sys.stdout
95505a9fa26f Uploaded
crs4
parents:
diff changeset
49 try:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
50 subprocess.check_call(cl, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because prokka writes many logging info there
95505a9fa26f Uploaded
crs4
parents:
diff changeset
51 finally:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
52 if log != sys.stdout:
95505a9fa26f Uploaded
crs4
parents:
diff changeset
53 log.close()
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
54
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
55 # Rename output files
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
56 suffix = ['gff', 'gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'txt']
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
57 for s in suffix:
1
4b6f16a79fe4 Add txt output file.
crs4
parents: 0
diff changeset
58 shutil.move('prokka.' + s, getattr(options, s))
0
95505a9fa26f Uploaded
crs4
parents:
diff changeset
59
95505a9fa26f Uploaded
crs4
parents:
diff changeset
60 if __name__ == "__main__":
95505a9fa26f Uploaded
crs4
parents:
diff changeset
61 __main__()