comparison shm_csr.py @ 96:385dea3c6cb5 draft

planemo upload commit 423a48569c69301fdbf893ac3a649128404dfff5
author rhpvorderman
date Fri, 05 Jan 2024 08:53:22 +0000
parents 6809c63d9161
children
comparison
equal deleted inserted replaced
95:d63eff357515 96:385dea3c6cb5
1 import argparse 1 import argparse
2 import logging 2 import logging
3 import sys 3 import sys
4 import os 4 import os
5 import traceback
5 import typing 6 import typing
6 from typing import Optional 7 from typing import Optional
7 8
8 from collections import defaultdict 9 from collections import defaultdict
9 10
10 REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2") 11 REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2", "None")
11 12
13
14 def int_or_zero(value: typing.Any):
15 try:
16 return int(value)
17 except ValueError:
18 return 0
12 19
13 class Mutation(typing.NamedTuple): 20 class Mutation(typing.NamedTuple):
14 """Represent a mutation type as a tuple""" 21 """Represent a mutation type as a tuple"""
15 frm: str # 'from' is a reserved python keyword. 22 frm: str # 'from' is a reserved python keyword.
16 where: int 23 where: int
175 mutationdic[ID + "_FR3"] = [Mutation.from_string(x) for x in linesplt[fr3Index].split("|") if x] 182 mutationdic[ID + "_FR3"] = [Mutation.from_string(x) for x in linesplt[fr3Index].split("|") if x]
176 183
177 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 184 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
178 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 185 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
179 186
180 fr1Length = int(linesplt[fr1LengthIndex]) 187 fr1Length = int_or_zero(linesplt[fr1LengthIndex])
181 fr2Length = int(linesplt[fr2LengthIndex]) 188 fr2Length = int_or_zero(linesplt[fr2LengthIndex])
182 fr3Length = int(linesplt[fr3LengthIndex]) 189 fr3Length = int_or_zero(linesplt[fr3LengthIndex])
183 cdr1Length = int(linesplt[cdr1LengthIndex]) 190 cdr1Length = int_or_zero(linesplt[cdr1LengthIndex])
184 cdr2Length = int(linesplt[cdr2LengthIndex]) 191 cdr2Length = int_or_zero(linesplt[cdr2LengthIndex])
185 LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length) 192 LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length)
186 193
187 cdr1AALengthDic[ID] = int(linesplt[cdr1AALengthIndex]) 194 cdr1AALengthDic[ID] = int_or_zero(linesplt[cdr1AALengthIndex])
188 cdr2AALengthDic[ID] = int(linesplt[cdr2AALengthIndex]) 195 cdr2AALengthDic[ID] = int_or_zero(linesplt[cdr2AALengthIndex])
189 196
190 IDlist += [ID] 197 IDlist += [ID]
191 print("len(mutationdic) =", len(mutationdic)) 198 print("len(mutationdic) =", len(mutationdic))
192 199
193 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle: 200 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle:
220 # sum(region_lengths[0:]) (Equivalent to everything) 227 # sum(region_lengths[0:]) (Equivalent to everything)
221 # sum(region_lengths[1:]) Gets everything except FR1 etc. 228 # sum(region_lengths[1:]) Gets everything except FR1 etc.
222 # We determine the position to start summing below. 229 # We determine the position to start summing below.
223 # This returns 0 for leader, 1 for FR1 etc. 230 # This returns 0 for leader, 1 for FR1 etc.
224 length_start_pos = REGION_FILTERS.index(empty_region_filter) 231 length_start_pos = REGION_FILTERS.index(empty_region_filter)
232 if empty_region_filter == "None":
233 length_start_pos = 0
225 234
226 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") 235 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n")
227 for ID in IDlist: 236 for ID in IDlist:
228 mutations = mutationListByID[ID] 237 mutations = mutationListByID[ID]
229 region_length = sum(LengthDic[ID][length_start_pos:]) 238 region_length = sum(LengthDic[ID][length_start_pos:])