Mercurial > repos > iuc > transtermhp
diff transtermhp.py @ 0:c28817831a24 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transtermhp commit 799339e22181d28cb2b145454d353d6025779636
author | iuc |
---|---|
date | Fri, 09 Oct 2015 09:22:42 -0400 |
parents | |
children | 1a1ec22a7e28 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/transtermhp.py Fri Oct 09 09:22:42 2015 -0400 @@ -0,0 +1,70 @@ +#!/usr/bin/env python +import sys +import re +import subprocess +from Bio import SeqIO +from BCBio import GFF +from Bio.SeqFeature import SeqFeature, FeatureLocation + + +def main(expterm, fasta, gff3): + with open(fasta, 'r') as handle: + seq_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) + + # Build coords file + with open(gff3, 'r') as handle: + for rec in GFF.parse(handle, base_dict=seq_dict): + with open('tmp.coords', 'w') as coords: + for feat in rec.features: + if feat.type == 'gene': + coords.write('\t'.join([ + feat.id, + str(feat.location.start + 1), + str(feat.location.end), + rec.id, + ]) + '\n') + with open('tmp.fasta', 'w') as fasta_handle: + SeqIO.write(rec, fasta_handle, 'fasta') + + cmd = ['transterm', '-p', expterm, fasta, 'tmp.coords'] + output = subprocess.check_output(cmd) + # TERM 1 4342 - 4366 + F 93 -11.5 -3.22878 | opp_overlap 4342, overlap 4340 4357 + ttre = re.compile( + '^ (?P<name>.*) (?P<start>\d+) - (?P<end>\d+)\s+' + '(?P<strand>[-+])\s+(?P<loc>[GFRTHNgfr]+)\s+' + '(?P<conf>\d+)\s+(?P<hp>[0-9.-]+)\s+(?P<tail>[0-9.-]+)' + ) + + rec.features = [] + batches = output.split('SEQUENCE ') + for batch in batches[1:]: + batch_lines = batch.split('\n') + # Strip the header + interesting = batch_lines[2:] + unformatted = [x for x in interesting if x.startswith(' ')][0::2] + for terminator in unformatted: + m = ttre.match(terminator) + if m: + start = int(m.group('start')) - 1 + end = int(m.group('end')) + if m.group('strand') == '+': + strand = 1 + else: + strand = 0 + + feature = SeqFeature( + FeatureLocation(start, end), + type="terminator", + strand=strand, + qualifiers={ + "source": "TransTermHP_2.09", + "score": m.group('conf'), + "ID": m.group('name'), + } + ) + rec.features.append(feature) + yield rec + +if __name__ == '__main__': + for record in main(*sys.argv[1:4]): + GFF.write([record], sys.stdout)