Mercurial > repos > davidvanzessen > shm_csr
annotate igm_naive_mutations.py @ 94:84e9e5c8c101 draft
"planemo upload commit d4be85014b638f1d50b318d4b735be7f6e973140"
author | rhpvorderman |
---|---|
date | Fri, 24 Mar 2023 16:58:28 +0000 |
parents | cf8ad181628f |
children |
rev | line source |
---|---|
92
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
2 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
3 """ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
4 Find naive mutations (< 2% mutated) for IGM genes |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
5 """ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
6 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
7 import argparse |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
8 import contextlib |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
9 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
10 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
11 def find_naive_mutations(mutation_file, naive_file, naive_memory_file, |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
12 percentage_cutoff=0.02): |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
13 # A compound with statement throws a syntax error with the included python |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
14 # 3.7.1 in the container, so use an exit stack instead. |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
15 with contextlib.ExitStack() as stack: |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
16 mutations = stack.enter_context(open(mutation_file, "rt")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
17 naive = stack.enter_context(open(naive_file, "wt")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
18 naive_memory = stack.enter_context(open(naive_memory_file, "wt")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
19 header = next(mutations) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
20 naive.write(header) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
21 naive_memory.write(header) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
22 for line in mutations: |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
23 sequence_id, best_match, mutation_no, region_length, _ = \ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
24 line.strip('\n').split('\t') |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
25 if best_match != "IGM": |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
26 continue |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
27 mutation_no = int(mutation_no) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
28 region_length = int(region_length) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
29 if (mutation_no / region_length) < percentage_cutoff: |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
30 naive.write(line) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
31 else: |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
32 naive_memory.write(line) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
33 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
34 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
35 def main(): |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
36 parser = argparse.ArgumentParser() |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
37 parser.add_argument("mutation_file", help="scatter.txt") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
38 parser.add_argument("naive_file") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
39 parser.add_argument("naive_memory_file") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
40 args = parser.parse_args() |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
41 find_naive_mutations(args.mutation_file, args.naive_file, |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
42 args.naive_memory_file) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
43 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
44 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
45 if __name__ == "__main__": |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
46 main() |