annotate tRNAscan.py @ 1:65d282ef088e draft

Uploaded
author bjoern-gruening
date Tue, 19 Mar 2013 16:56:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
1 #!/usr/bin/env python
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
2
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
3 """
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
4
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
5 Converts tRNAScan output back to fasta-sequences.
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
6
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
7 """
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
8
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
9 import sys
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
10 from Bio import SeqIO
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
11 from Bio.SeqRecord import SeqRecord
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
12 import subprocess
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
13
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
14
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
15 def main(args):
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
16 """
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
17 Call from galaxy:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
18 tRNAscan.py $organism $mode $showPrimSecondOpt $disablePseudo $showCodons $tabular_output $inputfile $fasta_output
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
19
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
20 tRNAscan-SE $organism $mode $showPrimSecondOpt $disablePseudo $showCodons -d -Q -y -q -b -o $tabular_output $inputfile;
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
21 """
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
22 cmd = """tRNAscan-SE -Q -y -q -b %s""" % ' '.join( args[:-1] )
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
23 child = subprocess.Popen(cmd.split(),
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
24 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
25 stdout, stderr = child.communicate()
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
26 return_code = child.returncode
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
27 if return_code:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
28 sys.stdout.write(stdout)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
29 sys.stderr.write(stderr)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
30 sys.stderr.write("Return error code %i from command:\n" % return_code)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
31 sys.stderr.write("%s\n" % cmd)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
32 else:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
33 sys.stdout.write(stdout)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
34 sys.stdout.write(stderr)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
35
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
36 outfile = args[-1]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
37 sequence_file = args[-2]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
38 tRNAScan_file = args[-3]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
39
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
40 with open( sequence_file ) as sequences:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
41 sequence_recs = SeqIO.to_dict(SeqIO.parse(sequences, "fasta"))
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
42
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
43 tRNAs = []
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
44 with open(tRNAScan_file) as tRNA_handle:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
45 for line in tRNA_handle:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
46 line = line.strip()
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
47 if not line or line.startswith('#'):
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
48 continue
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
49 cols = line.split()
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
50 iid = cols[0].strip()
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
51 start = int(cols[2])
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
52 end = int(cols[3])
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
53 aa = cols[4]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
54 codon = cols[5]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
55 rec = sequence_recs[ iid ]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
56 if start > end:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
57 new_rec = rec[end:start]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
58 new_rec.seq = new_rec.seq.reverse_complement()
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
59 new_rec.description = "%s %s %s %s %s" % (rec.description, aa, codon, start, end)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
60 new_rec.id = rec.id
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
61 new_rec.name = rec.name
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
62 tRNAs.append( new_rec )
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
63 else:
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
64 new_rec = rec[start:end]
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
65 new_rec.id = rec.id
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
66 new_rec.name = rec.name
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
67 new_rec.description = "%s %s %s %s %s" % (rec.description, aa, codon, start, end)
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
68 tRNAs.append( new_rec )
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
69
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
70 SeqIO.write(tRNAs, open(outfile, 'w+'), "fasta")
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
71
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
72
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
73 if __name__ == '__main__':
65d282ef088e Uploaded
bjoern-gruening
parents:
diff changeset
74 main(sys.argv[1:])