Mercurial > repos > davidvanzessen > shm_csr
diff igm_naive_mutations.py @ 92:cf8ad181628f draft
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
author | rhpvorderman |
---|---|
date | Mon, 12 Dec 2022 12:32:44 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igm_naive_mutations.py Mon Dec 12 12:32:44 2022 +0000 @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +""" +Find naive mutations (< 2% mutated) for IGM genes +""" + +import argparse +import contextlib + + +def find_naive_mutations(mutation_file, naive_file, naive_memory_file, + percentage_cutoff=0.02): + # A compound with statement throws a syntax error with the included python + # 3.7.1 in the container, so use an exit stack instead. + with contextlib.ExitStack() as stack: + mutations = stack.enter_context(open(mutation_file, "rt")) + naive = stack.enter_context(open(naive_file, "wt")) + naive_memory = stack.enter_context(open(naive_memory_file, "wt")) + header = next(mutations) + naive.write(header) + naive_memory.write(header) + for line in mutations: + sequence_id, best_match, mutation_no, region_length, _ = \ + line.strip('\n').split('\t') + if best_match != "IGM": + continue + mutation_no = int(mutation_no) + region_length = int(region_length) + if (mutation_no / region_length) < percentage_cutoff: + naive.write(line) + else: + naive_memory.write(line) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("mutation_file", help="scatter.txt") + parser.add_argument("naive_file") + parser.add_argument("naive_memory_file") + args = parser.parse_args() + find_naive_mutations(args.mutation_file, args.naive_file, + args.naive_memory_file) + + +if __name__ == "__main__": + main() \ No newline at end of file