Mercurial > repos > davidvanzessen > shm_csr
comparison igm_naive_mutations.py @ 92:cf8ad181628f draft
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
| author | rhpvorderman |
|---|---|
| date | Mon, 12 Dec 2022 12:32:44 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 91:f387cc1580c6 | 92:cf8ad181628f |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 | |
| 3 """ | |
| 4 Find naive mutations (< 2% mutated) for IGM genes | |
| 5 """ | |
| 6 | |
| 7 import argparse | |
| 8 import contextlib | |
| 9 | |
| 10 | |
| 11 def find_naive_mutations(mutation_file, naive_file, naive_memory_file, | |
| 12 percentage_cutoff=0.02): | |
| 13 # A compound with statement throws a syntax error with the included python | |
| 14 # 3.7.1 in the container, so use an exit stack instead. | |
| 15 with contextlib.ExitStack() as stack: | |
| 16 mutations = stack.enter_context(open(mutation_file, "rt")) | |
| 17 naive = stack.enter_context(open(naive_file, "wt")) | |
| 18 naive_memory = stack.enter_context(open(naive_memory_file, "wt")) | |
| 19 header = next(mutations) | |
| 20 naive.write(header) | |
| 21 naive_memory.write(header) | |
| 22 for line in mutations: | |
| 23 sequence_id, best_match, mutation_no, region_length, _ = \ | |
| 24 line.strip('\n').split('\t') | |
| 25 if best_match != "IGM": | |
| 26 continue | |
| 27 mutation_no = int(mutation_no) | |
| 28 region_length = int(region_length) | |
| 29 if (mutation_no / region_length) < percentage_cutoff: | |
| 30 naive.write(line) | |
| 31 else: | |
| 32 naive_memory.write(line) | |
| 33 | |
| 34 | |
| 35 def main(): | |
| 36 parser = argparse.ArgumentParser() | |
| 37 parser.add_argument("mutation_file", help="scatter.txt") | |
| 38 parser.add_argument("naive_file") | |
| 39 parser.add_argument("naive_memory_file") | |
| 40 args = parser.parse_args() | |
| 41 find_naive_mutations(args.mutation_file, args.naive_file, | |
| 42 args.naive_memory_file) | |
| 43 | |
| 44 | |
| 45 if __name__ == "__main__": | |
| 46 main() |
