Mercurial > repos > petr-novak > dante
diff coverage2gff.py @ 15:3151a72a6671 draft
Uploaded
author | petr-novak |
---|---|
date | Tue, 03 Sep 2019 05:20:02 -0400 |
parents | |
children | 0e820310d4dc |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coverage2gff.py Tue Sep 03 05:20:02 2019 -0400 @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import argparse +import tempfile +import shutil +import sys + +def parse_args(): + '''Argument parsin''' + description = """ + parsing cap3 assembly aln output + """ + + parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawTextHelpFormatter) + parser.add_argument( + '-g', + '--gff_file', + default=None, + required=True, + help="input gff3 file for appending coverage information", + type=str, + action='store') + parser.add_argument( + '-p', + '--profile', + default=None, + required=True, + help="output file for coverage profile", + type=str, + action="store") + return parser.parse_args() + +def read_coverage(profile): + with open(profile) as p: + d = {} + for name, prof in zip(p, p): + d[name[1:].strip()] = [int(i) for i in prof.split()] + print(d, file=sys.stderr) + return d + + +def main(): + args = parse_args() + coverage_hash = read_coverage(args.profile) + gff_tmp = tempfile.NamedTemporaryFile() + with open(args.gff_file) as f, open(gff_tmp.name, 'w') as out: + for line in f: + if line[0] == "#": + out.write(line) + else: + line_parts = line.split() + start = int(line_parts[3]) + end = int(line_parts[4]) + coverage = round( sum(coverage_hash[line_parts[0]][( + start - 1):end]) / (end - start + 1), 3) + new_line = "{};Coverage={}\n".format(line.strip(), coverage) + out.write(new_line) + + shutil.copyfile(gff_tmp.name, args.gff_file) + + +if __name__ == "__main__": + + main()