annotate coverage2gff.py @ 22:1eabd42e00ef draft

Uploaded
author petr-novak
date Fri, 03 Apr 2020 07:27:59 -0400
parents 0e820310d4dc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
2 import argparse
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
3 import tempfile
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
4 import shutil
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
5 import sys
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
6
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
7 def parse_args():
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
8 '''Argument parsin'''
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
9 description = """
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
10 parsing cap3 assembly aln output
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
11 """
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
12
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
13 parser = argparse.ArgumentParser(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
14 description=description,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
15 formatter_class=argparse.RawTextHelpFormatter)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
16 parser.add_argument(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
17 '-g',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
18 '--gff_file',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
19 default=None,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
20 required=True,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
21 help="input gff3 file for appending coverage information",
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
22 type=str,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
23 action='store')
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
24 parser.add_argument(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
25 '-p',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
26 '--profile',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
27 default=None,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
28 required=True,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
29 help="output file for coverage profile",
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
30 type=str,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
31 action="store")
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
32 return parser.parse_args()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
33
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
34 def read_coverage(profile):
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
35 with open(profile) as p:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
36 d = {}
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
37 for name, prof in zip(p, p):
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
38 d[name[1:].strip()] = [int(i) for i in prof.split()]
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
39 return d
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
40
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
41
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
42 def main():
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
43 args = parse_args()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
44 coverage_hash = read_coverage(args.profile)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
45 gff_tmp = tempfile.NamedTemporaryFile()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
46 with open(args.gff_file) as f, open(gff_tmp.name, 'w') as out:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
47 for line in f:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
48 if line[0] == "#":
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
49 out.write(line)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
50 else:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
51 line_parts = line.split()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
52 start = int(line_parts[3])
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
53 end = int(line_parts[4])
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
54 coverage = round( sum(coverage_hash[line_parts[0]][(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
55 start - 1):end]) / (end - start + 1), 3)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
56 new_line = "{};Coverage={}\n".format(line.strip(), coverage)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
57 out.write(new_line)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
58
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
59 shutil.copyfile(gff_tmp.name, args.gff_file)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
60
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
61
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
62 if __name__ == "__main__":
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
63
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
64 main()