annotate coverage2gff.py @ 15:3151a72a6671 draft

Uploaded
author petr-novak
date Tue, 03 Sep 2019 05:20:02 -0400
parents
children 0e820310d4dc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
2 import argparse
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
3 import tempfile
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
4 import shutil
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
5 import sys
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
6
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
7 def parse_args():
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
8 '''Argument parsin'''
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
9 description = """
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
10 parsing cap3 assembly aln output
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
11 """
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
12
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
13 parser = argparse.ArgumentParser(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
14 description=description,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
15 formatter_class=argparse.RawTextHelpFormatter)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
16 parser.add_argument(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
17 '-g',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
18 '--gff_file',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
19 default=None,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
20 required=True,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
21 help="input gff3 file for appending coverage information",
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
22 type=str,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
23 action='store')
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
24 parser.add_argument(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
25 '-p',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
26 '--profile',
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
27 default=None,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
28 required=True,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
29 help="output file for coverage profile",
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
30 type=str,
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
31 action="store")
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
32 return parser.parse_args()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
33
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
34 def read_coverage(profile):
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
35 with open(profile) as p:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
36 d = {}
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
37 for name, prof in zip(p, p):
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
38 d[name[1:].strip()] = [int(i) for i in prof.split()]
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
39 print(d, file=sys.stderr)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
40 return d
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
41
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
42
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
43 def main():
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
44 args = parse_args()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
45 coverage_hash = read_coverage(args.profile)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
46 gff_tmp = tempfile.NamedTemporaryFile()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
47 with open(args.gff_file) as f, open(gff_tmp.name, 'w') as out:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
48 for line in f:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
49 if line[0] == "#":
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
50 out.write(line)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
51 else:
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
52 line_parts = line.split()
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
53 start = int(line_parts[3])
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
54 end = int(line_parts[4])
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
55 coverage = round( sum(coverage_hash[line_parts[0]][(
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
56 start - 1):end]) / (end - start + 1), 3)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
57 new_line = "{};Coverage={}\n".format(line.strip(), coverage)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
58 out.write(new_line)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
59
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
60 shutil.copyfile(gff_tmp.name, args.gff_file)
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
61
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
62
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
63 if __name__ == "__main__":
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
64
3151a72a6671 Uploaded
petr-novak
parents:
diff changeset
65 main()