comparison coverage2gff.py @ 15:3151a72a6671 draft

Uploaded
author petr-novak
date Tue, 03 Sep 2019 05:20:02 -0400
parents
children 0e820310d4dc
comparison
equal deleted inserted replaced
14:a6c55d1bdb6c 15:3151a72a6671
1 #!/usr/bin/env python3
2 import argparse
3 import tempfile
4 import shutil
5 import sys
6
7 def parse_args():
8 '''Argument parsin'''
9 description = """
10 parsing cap3 assembly aln output
11 """
12
13 parser = argparse.ArgumentParser(
14 description=description,
15 formatter_class=argparse.RawTextHelpFormatter)
16 parser.add_argument(
17 '-g',
18 '--gff_file',
19 default=None,
20 required=True,
21 help="input gff3 file for appending coverage information",
22 type=str,
23 action='store')
24 parser.add_argument(
25 '-p',
26 '--profile',
27 default=None,
28 required=True,
29 help="output file for coverage profile",
30 type=str,
31 action="store")
32 return parser.parse_args()
33
34 def read_coverage(profile):
35 with open(profile) as p:
36 d = {}
37 for name, prof in zip(p, p):
38 d[name[1:].strip()] = [int(i) for i in prof.split()]
39 print(d, file=sys.stderr)
40 return d
41
42
43 def main():
44 args = parse_args()
45 coverage_hash = read_coverage(args.profile)
46 gff_tmp = tempfile.NamedTemporaryFile()
47 with open(args.gff_file) as f, open(gff_tmp.name, 'w') as out:
48 for line in f:
49 if line[0] == "#":
50 out.write(line)
51 else:
52 line_parts = line.split()
53 start = int(line_parts[3])
54 end = int(line_parts[4])
55 coverage = round( sum(coverage_hash[line_parts[0]][(
56 start - 1):end]) / (end - start + 1), 3)
57 new_line = "{};Coverage={}\n".format(line.strip(), coverage)
58 out.write(new_line)
59
60 shutil.copyfile(gff_tmp.name, args.gff_file)
61
62
63 if __name__ == "__main__":
64
65 main()