Mercurial > repos > iuc > transtermhp
comparison transtermhp.py @ 0:c28817831a24 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
author | iuc |
---|---|
date | Fri, 09 Oct 2015 09:22:42 -0400 |
parents | |
children | 1a1ec22a7e28 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c28817831a24 |
---|---|
1 #!/usr/bin/env python | |
2 import sys | |
3 import re | |
4 import subprocess | |
5 from Bio import SeqIO | |
6 from BCBio import GFF | |
7 from Bio.SeqFeature import SeqFeature, FeatureLocation | |
8 | |
9 | |
10 def main(expterm, fasta, gff3): | |
11 with open(fasta, 'r') as handle: | |
12 seq_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) | |
13 | |
14 # Build coords file | |
15 with open(gff3, 'r') as handle: | |
16 for rec in GFF.parse(handle, base_dict=seq_dict): | |
17 with open('tmp.coords', 'w') as coords: | |
18 for feat in rec.features: | |
19 if feat.type == 'gene': | |
20 coords.write('\t'.join([ | |
21 feat.id, | |
22 str(feat.location.start + 1), | |
23 str(feat.location.end), | |
24 rec.id, | |
25 ]) + '\n') | |
26 with open('tmp.fasta', 'w') as fasta_handle: | |
27 SeqIO.write(rec, fasta_handle, 'fasta') | |
28 | |
29 cmd = ['transterm', '-p', expterm, fasta, 'tmp.coords'] | |
30 output = subprocess.check_output(cmd) | |
31 # TERM 1 4342 - 4366 + F 93 -11.5 -3.22878 | opp_overlap 4342, overlap 4340 4357 | |
32 ttre = re.compile( | |
33 '^ (?P<name>.*) (?P<start>\d+) - (?P<end>\d+)\s+' | |
34 '(?P<strand>[-+])\s+(?P<loc>[GFRTHNgfr]+)\s+' | |
35 '(?P<conf>\d+)\s+(?P<hp>[0-9.-]+)\s+(?P<tail>[0-9.-]+)' | |
36 ) | |
37 | |
38 rec.features = [] | |
39 batches = output.split('SEQUENCE ') | |
40 for batch in batches[1:]: | |
41 batch_lines = batch.split('\n') | |
42 # Strip the header | |
43 interesting = batch_lines[2:] | |
44 unformatted = [x for x in interesting if x.startswith(' ')][0::2] | |
45 for terminator in unformatted: | |
46 m = ttre.match(terminator) | |
47 if m: | |
48 start = int(m.group('start')) - 1 | |
49 end = int(m.group('end')) | |
50 if m.group('strand') == '+': | |
51 strand = 1 | |
52 else: | |
53 strand = 0 | |
54 | |
55 feature = SeqFeature( | |
56 FeatureLocation(start, end), | |
57 type="terminator", | |
58 strand=strand, | |
59 qualifiers={ | |
60 "source": "TransTermHP_2.09", | |
61 "score": m.group('conf'), | |
62 "ID": m.group('name'), | |
63 } | |
64 ) | |
65 rec.features.append(feature) | |
66 yield rec | |
67 | |
68 if __name__ == '__main__': | |
69 for record in main(*sys.argv[1:4]): | |
70 GFF.write([record], sys.stdout) |