Mercurial > repos > iuc > transtermhp
annotate transtermhp.py @ 0:c28817831a24 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
author | iuc |
---|---|
date | Fri, 09 Oct 2015 09:22:42 -0400 |
parents | |
children | 1a1ec22a7e28 |
rev | line source |
---|---|
0
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
2 import sys |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
3 import re |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
4 import subprocess |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
5 from Bio import SeqIO |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
6 from BCBio import GFF |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
7 from Bio.SeqFeature import SeqFeature, FeatureLocation |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
8 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
9 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
10 def main(expterm, fasta, gff3): |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
11 with open(fasta, 'r') as handle: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
12 seq_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
13 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
14 # Build coords file |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
15 with open(gff3, 'r') as handle: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
16 for rec in GFF.parse(handle, base_dict=seq_dict): |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
17 with open('tmp.coords', 'w') as coords: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
18 for feat in rec.features: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
19 if feat.type == 'gene': |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
20 coords.write('\t'.join([ |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
21 feat.id, |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
22 str(feat.location.start + 1), |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
23 str(feat.location.end), |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
24 rec.id, |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
25 ]) + '\n') |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
26 with open('tmp.fasta', 'w') as fasta_handle: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
27 SeqIO.write(rec, fasta_handle, 'fasta') |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
28 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
29 cmd = ['transterm', '-p', expterm, fasta, 'tmp.coords'] |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
30 output = subprocess.check_output(cmd) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
31 # TERM 1 4342 - 4366 + F 93 -11.5 -3.22878 | opp_overlap 4342, overlap 4340 4357 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
32 ttre = re.compile( |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
33 '^ (?P<name>.*) (?P<start>\d+) - (?P<end>\d+)\s+' |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
34 '(?P<strand>[-+])\s+(?P<loc>[GFRTHNgfr]+)\s+' |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
35 '(?P<conf>\d+)\s+(?P<hp>[0-9.-]+)\s+(?P<tail>[0-9.-]+)' |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
36 ) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
37 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
38 rec.features = [] |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
39 batches = output.split('SEQUENCE ') |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
40 for batch in batches[1:]: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
41 batch_lines = batch.split('\n') |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
42 # Strip the header |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
43 interesting = batch_lines[2:] |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
44 unformatted = [x for x in interesting if x.startswith(' ')][0::2] |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
45 for terminator in unformatted: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
46 m = ttre.match(terminator) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
47 if m: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
48 start = int(m.group('start')) - 1 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
49 end = int(m.group('end')) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
50 if m.group('strand') == '+': |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
51 strand = 1 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
52 else: |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
53 strand = 0 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
54 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
55 feature = SeqFeature( |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
56 FeatureLocation(start, end), |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
57 type="terminator", |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
58 strand=strand, |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
59 qualifiers={ |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
60 "source": "TransTermHP_2.09", |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
61 "score": m.group('conf'), |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
62 "ID": m.group('name'), |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
63 } |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
64 ) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
65 rec.features.append(feature) |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
66 yield rec |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
67 |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
68 if __name__ == '__main__': |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
69 for record in main(*sys.argv[1:4]): |
c28817831a24
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
iuc
parents:
diff
changeset
|
70 GFF.write([record], sys.stdout) |