annotate igm_naive_mutations.py @ 98:d714f5ea83d7 draft default tip

planemo upload commit 1a01065a084a817382872154f779b94090a35ebf
author rhpvorderman
date Wed, 10 Jan 2024 12:32:47 +0000
parents cf8ad181628f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
92
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
1 #!/usr/bin/env python3
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
2
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
3 """
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
4 Find naive mutations (< 2% mutated) for IGM genes
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
5 """
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
6
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
7 import argparse
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
8 import contextlib
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
9
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
10
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
11 def find_naive_mutations(mutation_file, naive_file, naive_memory_file,
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
12 percentage_cutoff=0.02):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
13 # A compound with statement throws a syntax error with the included python
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
14 # 3.7.1 in the container, so use an exit stack instead.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
15 with contextlib.ExitStack() as stack:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
16 mutations = stack.enter_context(open(mutation_file, "rt"))
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
17 naive = stack.enter_context(open(naive_file, "wt"))
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
18 naive_memory = stack.enter_context(open(naive_memory_file, "wt"))
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
19 header = next(mutations)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
20 naive.write(header)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
21 naive_memory.write(header)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
22 for line in mutations:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
23 sequence_id, best_match, mutation_no, region_length, _ = \
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
24 line.strip('\n').split('\t')
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
25 if best_match != "IGM":
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
26 continue
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
27 mutation_no = int(mutation_no)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
28 region_length = int(region_length)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
29 if (mutation_no / region_length) < percentage_cutoff:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
30 naive.write(line)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
31 else:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
32 naive_memory.write(line)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
33
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
34
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
35 def main():
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
36 parser = argparse.ArgumentParser()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
37 parser.add_argument("mutation_file", help="scatter.txt")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
38 parser.add_argument("naive_file")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
39 parser.add_argument("naive_memory_file")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
40 args = parser.parse_args()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
41 find_naive_mutations(args.mutation_file, args.naive_file,
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
42 args.naive_memory_file)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
43
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
44
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
45 if __name__ == "__main__":
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
46 main()