Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 11:84a1a3f70407 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Mon, 15 Feb 2021 21:53:24 +0000 |
parents | e18c5293aac7 |
children | 7a418148319d |
comparison
equal
deleted
inserted
replaced
10:e18c5293aac7 | 11:84a1a3f70407 |
---|---|
8 Looks for reads with mutation at known | 8 Looks for reads with mutation at known |
9 positions and calculates frequencies and stats. | 9 positions and calculates frequencies and stats. |
10 | 10 |
11 ======= ========== ================= ================================ | 11 ======= ========== ================= ================================ |
12 Version Date Author Description | 12 Version Date Author Description |
13 0.2.1 2019-10-27 Gundula Povysil - | 13 2.0.0 2020-10-30 Gundula Povysil - |
14 ======= ========== ================= ================================ | 14 ======= ========== ================= ================================ |
15 | 15 |
16 | 16 |
17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam | 17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam |
18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json | 18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json |
21 """ | 21 """ |
22 | 22 |
23 from __future__ import division | 23 from __future__ import division |
24 | 24 |
25 import argparse | 25 import argparse |
26 import itertools | |
27 import json | 26 import json |
28 import operator | 27 import operator |
29 import os | 28 import os |
30 import re | 29 import re |
31 import sys | 30 import sys |
45 parser.add_argument('--inputJson', | 44 parser.add_argument('--inputJson', |
46 help='JSON file with data collected by mut2read.py.') | 45 help='JSON file with data collected by mut2read.py.') |
47 parser.add_argument('--sscsJson', | 46 parser.add_argument('--sscsJson', |
48 help='JSON file with SSCS counts collected by mut2sscs.py.') | 47 help='JSON file with SSCS counts collected by mut2sscs.py.') |
49 parser.add_argument('--outputFile', | 48 parser.add_argument('--outputFile', |
50 help='Output xlsx file with summary of mutations.') | 49 help='Output xlsx file of mutation details.') |
51 parser.add_argument('--outputFile2', | |
52 help='Output xlsx file with allele frequencies of mutations.') | |
53 parser.add_argument('--outputFile3', | |
54 help='Output xlsx file with examples of the tier classification.') | |
55 parser.add_argument('--thresh', type=int, default=0, | 50 parser.add_argument('--thresh', type=int, default=0, |
56 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') | 51 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') |
57 parser.add_argument('--phred', type=int, default=20, | 52 parser.add_argument('--phred', type=int, default=20, |
58 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | 53 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') |
59 parser.add_argument('--trim', type=int, default=10, | 54 parser.add_argument('--trim', type=int, default=10, |
60 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') | 55 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') |
61 parser.add_argument('--chimera_correction', action="store_true", | 56 parser.add_argument('--chimera_correction', action="store_true", |
62 help='Count chimeric variants and correct the variant frequencies') | 57 help='Count chimeric variants and correct the variant frequencies') |
63 parser.add_argument('--softclipping_dist', type=int, default=15, | |
64 help='Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the read.') | |
65 parser.add_argument('--reads_threshold', type=float, default=1.0, | |
66 help='Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped.') | |
67 return parser | 58 return parser |
68 | 59 |
69 | 60 |
70 def safe_div(x, y): | 61 def safe_div(x, y): |
71 if y == 0: | 62 if y == 0: |
79 file1 = args.mutFile | 70 file1 = args.mutFile |
80 file2 = args.bamFile | 71 file2 = args.bamFile |
81 json_file = args.inputJson | 72 json_file = args.inputJson |
82 sscs_json = args.sscsJson | 73 sscs_json = args.sscsJson |
83 outfile = args.outputFile | 74 outfile = args.outputFile |
84 outfile2 = args.outputFile2 | |
85 outfile3 = args.outputFile3 | |
86 thresh = args.thresh | 75 thresh = args.thresh |
87 phred_score = args.phred | 76 phred_score = args.phred |
88 trim = args.trim | 77 trim = args.trim |
89 chimera_correction = args.chimera_correction | 78 chimera_correction = args.chimera_correction |
90 thr = args.softclipping_dist | |
91 threshold_reads = args.reads_threshold | |
92 | 79 |
93 if os.path.isfile(file1) is False: | 80 if os.path.isfile(file1) is False: |
94 sys.exit("Error: Could not find '{}'".format(file1)) | 81 sys.exit("Error: Could not find '{}'".format(file1)) |
95 if os.path.isfile(file2) is False: | 82 if os.path.isfile(file2) is False: |
96 sys.exit("Error: Could not find '{}'".format(file2)) | 83 sys.exit("Error: Could not find '{}'".format(file2)) |
100 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) | 87 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) |
101 if phred_score < 0: | 88 if phred_score < 0: |
102 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) | 89 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) |
103 if trim < 0: | 90 if trim < 0: |
104 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) | 91 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) |
105 if thr <= 0: | |
106 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr)) | |
107 | 92 |
108 # load dicts | 93 # load dicts |
109 with open(json_file, "r") as f: | 94 with open(json_file, "r") as f: |
110 (tag_dict, cvrg_dict) = json.load(f) | 95 (tag_dict, cvrg_dict) = json.load(f) |
111 | 96 |
112 with open(sscs_json, "r") as f: | 97 with open(sscs_json, "r") as f: |
113 (mut_pos_dict, ref_pos_dict) = json.load(f) | 98 (mut_pos_dict, ref_pos_dict) = json.load(f) |
114 | 99 |
115 # read bam file | 100 # read bam file |
116 # pysam.index(file2) | |
117 bam = pysam.AlignmentFile(file2, "rb") | 101 bam = pysam.AlignmentFile(file2, "rb") |
118 | 102 |
119 # create mut_dict | 103 # create mut_dict |
120 mut_dict = {} | 104 mut_dict = {} |
121 mut_read_pos_dict = {} | 105 mut_read_pos_dict = {} |
122 mut_read_dict = {} | 106 mut_read_dict = {} |
123 reads_dict = {} | 107 reads_dict = {} |
124 mut_read_cigar_dict = {} | |
125 i = 0 | 108 i = 0 |
126 mut_array = [] | 109 mut_array = [] |
127 | 110 |
128 for count, variant in enumerate(VCF(file1)): | 111 for variant in VCF(file1): |
129 #if count == 2000: | |
130 # break | |
131 chrom = variant.CHROM | 112 chrom = variant.CHROM |
132 stop_pos = variant.start | 113 stop_pos = variant.start |
133 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | 114 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
134 ref = variant.REF | 115 ref = variant.REF |
135 if len(variant.ALT) == 0: | 116 alt = variant.ALT[0] |
136 continue | |
137 else: | |
138 alt = variant.ALT[0] | |
139 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt | 117 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt |
140 | 118 |
141 if len(ref) == len(alt): | 119 if len(ref) == len(alt): |
142 mut_array.append([chrom, stop_pos, ref, alt]) | 120 mut_array.append([chrom, stop_pos, ref, alt]) |
143 i += 1 | 121 i += 1 |
144 mut_dict[chrom_stop_pos] = {} | 122 mut_dict[chrom_stop_pos] = {} |
145 mut_read_pos_dict[chrom_stop_pos] = {} | 123 mut_read_pos_dict[chrom_stop_pos] = {} |
146 reads_dict[chrom_stop_pos] = {} | 124 reads_dict[chrom_stop_pos] = {} |
147 mut_read_cigar_dict[chrom_stop_pos] = {} | |
148 | 125 |
149 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): | 126 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): |
150 if pileupcolumn.reference_pos == stop_pos: | 127 if pileupcolumn.reference_pos == stop_pos: |
151 count_alt = 0 | 128 count_alt = 0 |
152 count_ref = 0 | 129 count_ref = 0 |
153 count_indel = 0 | 130 count_indel = 0 |
154 count_n = 0 | 131 count_n = 0 |
155 count_other = 0 | 132 count_other = 0 |
156 count_lowq = 0 | 133 count_lowq = 0 |
157 n = 0 | 134 n = 0 |
158 #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | 135 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), |
159 # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | 136 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) |
160 for pileupread in pileupcolumn.pileups: | 137 for pileupread in pileupcolumn.pileups: |
161 n += 1 | 138 n += 1 |
162 if not pileupread.is_del and not pileupread.is_refskip: | 139 if not pileupread.is_del and not pileupread.is_refskip: |
163 tag = pileupread.alignment.query_name | 140 tag = pileupread.alignment.query_name |
164 nuc = pileupread.alignment.query_sequence[pileupread.query_position] | 141 nuc = pileupread.alignment.query_sequence[pileupread.query_position] |
170 if nuc in mut_dict[chrom_stop_pos][tag]: | 147 if nuc in mut_dict[chrom_stop_pos][tag]: |
171 mut_dict[chrom_stop_pos][tag][nuc] += 1 | 148 mut_dict[chrom_stop_pos][tag][nuc] += 1 |
172 else: | 149 else: |
173 mut_dict[chrom_stop_pos][tag][nuc] = 1 | 150 mut_dict[chrom_stop_pos][tag][nuc] = 1 |
174 if tag not in mut_read_pos_dict[chrom_stop_pos]: | 151 if tag not in mut_read_pos_dict[chrom_stop_pos]: |
175 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] | 152 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 |
176 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] | 153 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) |
177 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] | |
178 else: | 154 else: |
179 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) | 155 mut_read_pos_dict[chrom_stop_pos][tag] = np.append( |
180 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) | 156 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) |
181 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) | 157 reads_dict[chrom_stop_pos][tag] = np.append( |
158 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) | |
159 | |
182 if nuc == alt: | 160 if nuc == alt: |
183 count_alt += 1 | 161 count_alt += 1 |
184 if tag not in mut_read_dict: | 162 if tag not in mut_read_dict: |
185 mut_read_dict[tag] = {} | 163 mut_read_dict[tag] = {} |
186 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | 164 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) |
195 else: | 173 else: |
196 count_other += 1 | 174 count_other += 1 |
197 else: | 175 else: |
198 count_indel += 1 | 176 count_indel += 1 |
199 | 177 |
200 #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | 178 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) |
201 #else: | 179 else: |
202 # print("indels are currently not evaluated") | 180 print("indels are currently not evaluated") |
181 | |
203 mut_array = np.array(mut_array) | 182 mut_array = np.array(mut_array) |
204 for read in bam.fetch(until_eof=True): | 183 for read in bam.fetch(until_eof=True): |
205 if read.is_unmapped: | 184 if read.is_unmapped: |
206 pure_tag = read.query_name[:-5] | 185 pure_tag = read.query_name[:-5] |
207 nuc = "na" | 186 nuc = "na" |
217 bam.close() | 196 bam.close() |
218 | 197 |
219 # create pure_tags_dict | 198 # create pure_tags_dict |
220 pure_tags_dict = {} | 199 pure_tags_dict = {} |
221 for key1, value1 in sorted(mut_dict.items()): | 200 for key1, value1 in sorted(mut_dict.items()): |
222 #if len(np.where(np.array(['#'.join(str(i) for i in z) | |
223 # for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: | |
224 # continue | |
225 | |
226 i = np.where(np.array(['#'.join(str(i) for i in z) | 201 i = np.where(np.array(['#'.join(str(i) for i in z) |
227 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 202 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
228 ref = mut_array[i, 2] | 203 ref = mut_array[i, 2] |
229 alt = mut_array[i, 3] | 204 alt = mut_array[i, 3] |
230 pure_tags_dict[key1] = {} | 205 pure_tags_dict[key1] = {} |
244 if len(value) < thresh: | 219 if len(value) < thresh: |
245 pure_tags_dict_short[key] = value | 220 pure_tags_dict_short[key] = value |
246 else: | 221 else: |
247 pure_tags_dict_short = pure_tags_dict | 222 pure_tags_dict_short = pure_tags_dict |
248 | 223 |
249 # whole_array = [] | |
250 # for k in pure_tags_dict.values(): | |
251 # if len(k) != 0: | |
252 # keys = k.keys() | |
253 # if len(keys) > 1: | |
254 # for k1 in keys: | |
255 # whole_array.append(k1) | |
256 # else: | |
257 # whole_array.append(keys[0]) | |
258 | |
259 # output summary with threshold | 224 # output summary with threshold |
260 workbook = xlsxwriter.Workbook(outfile) | 225 workbook = xlsxwriter.Workbook(outfile) |
261 workbook2 = xlsxwriter.Workbook(outfile2) | 226 workbook2 = xlsxwriter.Workbook(outfile2) |
262 workbook3 = xlsxwriter.Workbook(outfile3) | 227 workbook3 = xlsxwriter.Workbook(outfile3) |
263 ws1 = workbook.add_worksheet("Results") | 228 ws1 = workbook.add_worksheet("Results") |
282 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 247 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
283 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 248 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
284 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 249 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
285 'in phase', 'chimeric tag') | 250 'in phase', 'chimeric tag') |
286 ws1.write_row(0, 0, header_line) | 251 ws1.write_row(0, 0, header_line) |
287 | |
288 counter_tier11 = 0 | 252 counter_tier11 = 0 |
289 counter_tier12 = 0 | 253 counter_tier12 = 0 |
290 counter_tier21 = 0 | 254 counter_tier21 = 0 |
291 counter_tier22 = 0 | 255 counter_tier22 = 0 |
292 counter_tier23 = 0 | 256 counter_tier23 = 0 |
293 counter_tier24 = 0 | 257 counter_tier24 = 0 |
294 counter_tier31 = 0 | 258 counter_tier31 = 0 |
295 counter_tier32 = 0 | 259 counter_tier32 = 0 |
296 counter_tier41 = 0 | 260 counter_tier41 = 0 |
297 counter_tier42 = 0 | 261 counter_tier42 = 0 |
298 # if chimera_correction: | 262 counter_tier5 = 0 |
299 # counter_tier43 = 0 | |
300 counter_tier51 = 0 | |
301 counter_tier52 = 0 | |
302 counter_tier53 = 0 | |
303 counter_tier54 = 0 | |
304 counter_tier55 = 0 | |
305 counter_tier6 = 0 | |
306 | |
307 row = 1 | 263 row = 1 |
308 tier_dict = {} | 264 tier_dict = {} |
309 chimera_dict = {} | 265 chimera_dict = {} |
310 for key1, value1 in sorted(mut_dict.items()): | 266 for key1, value1 in sorted(mut_dict.items()): |
311 counts_mut = 0 | 267 counts_mut = 0 |
312 chimeric_tag_list = [] | |
313 chimeric_tag = {} | 268 chimeric_tag = {} |
314 if key1 in pure_tags_dict_short.keys(): | 269 if key1 in pure_tags_dict_short.keys(): |
315 i = np.where(np.array(['#'.join(str(i) for i in z) | 270 i = np.where(np.array(['#'.join(str(i) for i in z) |
316 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 271 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
317 ref = mut_array[i, 2] | 272 ref = mut_array[i, 2] |
318 alt = mut_array[i, 3] | 273 alt = mut_array[i, 3] |
319 dcs_median = cvrg_dict[key1][2] | 274 dcs_median = cvrg_dict[key1][2] |
320 whole_array = pure_tags_dict_short[key1].keys() | 275 whole_array = list(pure_tags_dict_short[key1].keys()) |
321 | 276 |
322 tier_dict[key1] = {} | 277 tier_dict[key1] = {} |
323 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), | 278 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)] |
324 ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), | |
325 ("tier 6", 0)] | |
326 for k, v in values_tier_dict: | 279 for k, v in values_tier_dict: |
327 tier_dict[key1][k] = v | 280 tier_dict[key1][k] = v |
328 | 281 |
329 used_keys = [] | 282 used_keys = [] |
330 if 'ab' in mut_pos_dict[key1].keys(): | 283 if 'ab' in mut_pos_dict[key1].keys(): |
349 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): | 302 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): |
350 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): | 303 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): |
351 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) | 304 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) |
352 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 305 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
353 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] | 306 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] |
354 # na1f = na1/total1 | 307 else: |
355 else: | |
356 # na1 = na1f = 0 | |
357 na1 = 0 | 308 na1 = 0 |
358 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 309 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
359 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] | 310 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] |
360 # lowq1f = lowq1 / total1 | 311 else: |
361 else: | |
362 # lowq1 = lowq1f = 0 | |
363 lowq1 = 0 | 312 lowq1 = 0 |
364 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 313 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
365 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] | 314 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] |
366 ref1f = ref1 / (total1 - na1 - lowq1) | 315 ref1f = ref1 / (total1 - na1 - lowq1) |
367 else: | 316 else: |
392 | 341 |
393 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): | 342 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): |
394 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) | 343 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) |
395 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 344 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
396 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] | 345 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] |
397 # na2f = na2 / total2 | 346 else: |
398 else: | |
399 # na2 = na2f = 0 | |
400 na2 = 0 | 347 na2 = 0 |
401 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 348 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
402 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] | 349 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] |
403 # lowq2f = lowq2 / total2 | 350 else: |
404 else: | |
405 # lowq2 = lowq2f = 0 | |
406 lowq2 = 0 | 351 lowq2 = 0 |
407 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 352 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
408 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] | 353 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] |
409 ref2f = ref2 / (total2 - na2 - lowq2) | 354 ref2f = ref2 / (total2 - na2 - lowq2) |
410 else: | 355 else: |
435 | 380 |
436 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): | 381 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): |
437 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) | 382 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) |
438 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 383 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
439 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] | 384 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] |
440 # na3f = na3 / total3 | 385 else: |
441 else: | |
442 # na3 = na3f = 0 | |
443 na3 = 0 | 386 na3 = 0 |
444 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 387 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
445 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] | 388 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] |
446 # lowq3f = lowq3 / total3 | 389 else: |
447 else: | |
448 # lowq3 = lowq3f = 0 | |
449 lowq3 = 0 | 390 lowq3 = 0 |
450 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 391 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
451 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] | 392 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] |
452 ref3f = ref3 / (total3 - na3 - lowq3) | 393 ref3f = ref3 / (total3 - na3 - lowq3) |
453 else: | 394 else: |
474 | 415 |
475 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): | 416 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): |
476 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) | 417 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) |
477 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 418 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
478 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] | 419 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] |
479 # na4f = na4 / total4 | 420 else: |
480 else: | |
481 # na4 = na4f = 0 | |
482 na4 = 0 | 421 na4 = 0 |
483 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 422 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
484 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] | 423 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] |
485 # lowq4f = lowq4 / total4 | 424 else: |
486 else: | |
487 # lowq4 = lowq4f = 0 | |
488 lowq4 = 0 | 425 lowq4 = 0 |
489 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 426 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
490 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] | 427 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] |
491 ref4f = ref4 / (total4 - na4 - lowq4) | 428 ref4f = ref4 / (total4 - na4 - lowq4) |
492 else: | 429 else: |
511 total4 = total4new = na4 = lowq4 = 0 | 448 total4 = total4new = na4 = lowq4 = 0 |
512 ref4 = alt4 = ref4f = alt4f = 0 | 449 ref4 = alt4 = ref4f = alt4f = 0 |
513 | 450 |
514 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 | 451 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 |
515 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 | 452 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 |
516 cigars_dcs1 = cigars_dcs2 = cigars_dcs3 = cigars_dcs4 = [] | 453 |
517 pos_read1 = pos_read2 = pos_read3 = pos_read4 = [] | |
518 end_read1 = end_read2 = end_read3 = end_read4 = [] | |
519 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): | 454 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): |
520 read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1'])) | 455 read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) |
521 read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1'])) | 456 read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) |
522 cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'] | |
523 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']) | |
524 pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1'] | |
525 #print(cigars_dcs1) | |
526 end_read1 = reads_dict[key1][key2[:-5] + '.ab.1'] | |
527 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): | 457 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): |
528 read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2'])) | 458 read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) |
529 read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2'])) | 459 read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) |
530 cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2'] | |
531 pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2'] | |
532 end_read2 = reads_dict[key1][key2[:-5] + '.ab.2'] | |
533 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): | 460 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): |
534 read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1'])) | 461 read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) |
535 read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1'])) | 462 read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) |
536 cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1'] | |
537 pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1'] | |
538 end_read3 = reads_dict[key1][key2[:-5] + '.ba.1'] | |
539 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): | 463 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): |
540 read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2'])) | 464 read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) |
541 read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2'])) | 465 read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) |
542 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']) | |
543 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] | |
544 | |
545 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] | |
546 #print(cigars_dcs4) | |
547 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] | |
548 | 466 |
549 used_keys.append(key2[:-5]) | 467 used_keys.append(key2[:-5]) |
550 counts_mut += 1 | 468 counts_mut += 1 |
551 if (alt1f + alt2f + alt3f + alt4f) > 0.5: | 469 if (alt1f + alt2f + alt3f + alt4f) > 0.5: |
552 if total1new == 0: | 470 if total1new == 0: |
575 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | 493 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) |
576 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 494 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
577 | 495 |
578 trimmed = False | 496 trimmed = False |
579 contradictory = False | 497 contradictory = False |
580 softclipped_mutation_allMates = False | 498 |
581 softclipped_mutation_oneOfTwoMates = False | 499 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): |
582 softclipped_mutation_oneOfTwoSSCS = False | |
583 softclipped_mutation_oneMate = False | |
584 softclipped_mutation_oneMateOneSSCS = False | |
585 print() | |
586 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) | |
587 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] | |
588 dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = [] | |
589 ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False | |
590 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False | |
591 | |
592 # mate 1 - SSCS ab | |
593 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] | |
594 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads | |
595 | |
596 if any(ij is True for ij in softclipped_idx1): | |
597 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] | |
598 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] | |
599 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] | |
600 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] | |
601 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] | |
602 | |
603 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
604 if any(ij is True for ij in softclipped_both_ends_idx1): | |
605 print(softclipped_both_ends_idx1) | |
606 for nr, indx in enumerate(softclipped_both_ends_idx1): | |
607 if indx: | |
608 if dist_start_read1[nr] <= dist_end_read1[nr]: | |
609 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number | |
610 else: | |
611 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number | |
612 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads | |
613 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads | |
614 print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1) | |
615 | |
616 # mate 1 - SSCS ba | |
617 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] | |
618 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads | |
619 if any(ij is True for ij in softclipped_idx4): | |
620 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] | |
621 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] | |
622 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] | |
623 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] | |
624 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] | |
625 | |
626 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
627 if any(ij is True for ij in softclipped_both_ends_idx4): | |
628 print(softclipped_both_ends_idx4) | |
629 for nr, indx in enumerate(softclipped_both_ends_idx4): | |
630 if indx: | |
631 if dist_start_read4[nr] <= dist_end_read4[nr]: | |
632 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number | |
633 else: | |
634 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number | |
635 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads | |
636 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads | |
637 print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4) | |
638 | |
639 # mate 2 - SSCS ab | |
640 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] | |
641 #print(sum(softclipped_idx2)) | |
642 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads | |
643 if any(ij is True for ij in softclipped_idx2): | |
644 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] | |
645 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] | |
646 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] | |
647 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] | |
648 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] | |
649 | |
650 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
651 if any(ij is True for ij in softclipped_both_ends_idx2): | |
652 print(softclipped_both_ends_idx2) | |
653 for nr, indx in enumerate(softclipped_both_ends_idx2): | |
654 if indx: | |
655 if dist_start_read2[nr] <= dist_end_read2[nr]: | |
656 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number | |
657 else: | |
658 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number | |
659 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads | |
660 #print(ratio_dist_end2) | |
661 #print([True if x <= thr else False for x in ratio_dist_end2]) | |
662 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads | |
663 print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2) | |
664 | |
665 # mate 2 - SSCS ba | |
666 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] | |
667 ratio3 = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) >= threshold_reads | |
668 if any(ij is True for ij in softclipped_idx3): | |
669 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] | |
670 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] | |
671 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] | |
672 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] | |
673 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] | |
674 | |
675 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
676 if any(ij is True for ij in softclipped_both_ends_idx3): | |
677 print(softclipped_both_ends_idx3) | |
678 for nr, indx in enumerate(softclipped_both_ends_idx3): | |
679 if indx: | |
680 if dist_start_read3[nr] <= dist_end_read3[nr]: | |
681 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh | |
682 else: | |
683 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number | |
684 #print([True if x <= thr else False for x in dist_start_read3]) | |
685 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads | |
686 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads | |
687 print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3) | |
688 | |
689 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant | |
690 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | | |
691 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & | |
692 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): | |
693 alt1ff = 0 | 500 alt1ff = 0 |
694 alt4ff = 0 | 501 alt4ff = 0 |
695 alt2ff = 0 | 502 alt2ff = 0 |
696 alt3ff = 0 | 503 alt3ff = 0 |
697 trimmed = False | 504 trimmed = False |
698 contradictory = True | 505 contradictory = True |
699 # softclipping tiers | |
700 # information of both mates available --> all reads for both mates and SSCS are softclipped | |
701 elif (ratio1 & ratio4 & ratio2 & ratio3 & | |
702 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | |
703 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
704 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
705 softclipped_mutation_allMates = True | |
706 softclipped_mutation_oneOfTwoMates = False | |
707 softclipped_mutation_oneOfTwoSSCS = False | |
708 softclipped_mutation_oneMate = False | |
709 softclipped_mutation_oneMateOneSSCS = False | |
710 alt1ff = 0 | |
711 alt4ff = 0 | |
712 alt2ff = 0 | |
713 alt3ff = 0 | |
714 trimmed = False | |
715 contradictory = False | |
716 print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates) | |
717 # information of both mates available --> only one mate softclipped | |
718 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | | |
719 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & | |
720 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
721 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
722 softclipped_mutation_allMates = False | |
723 softclipped_mutation_oneOfTwoMates = True | |
724 softclipped_mutation_oneOfTwoSSCS = False | |
725 softclipped_mutation_oneMate = False | |
726 softclipped_mutation_oneMateOneSSCS = False | |
727 alt1ff = 0 | |
728 alt4ff = 0 | |
729 alt2ff = 0 | |
730 alt3ff = 0 | |
731 trimmed = False | |
732 contradictory = False | |
733 print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates) | |
734 # information of both mates available --> only one mate softclipped | |
735 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | |
736 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | |
737 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
738 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
739 softclipped_mutation_allMates = False | |
740 softclipped_mutation_oneOfTwoMates = False | |
741 softclipped_mutation_oneOfTwoSSCS = True | |
742 softclipped_mutation_oneMate = False | |
743 softclipped_mutation_oneMateOneSSCS = False | |
744 alt1ff = 0 | |
745 alt4ff = 0 | |
746 alt2ff = 0 | |
747 alt3ff = 0 | |
748 trimmed = False | |
749 contradictory = False | |
750 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) | |
751 # information of one mate available --> all reads of one mate are softclipped | |
752 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & | |
753 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | | |
754 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | |
755 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available | |
756 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
757 #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) & | |
758 # ((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) | | |
759 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & | |
760 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): | |
761 softclipped_mutation_allMates = False | |
762 softclipped_mutation_oneOfTwoMates = False | |
763 softclipped_mutation_oneOfTwoSSCS = False | |
764 softclipped_mutation_oneMate = True | |
765 softclipped_mutation_oneMateOneSSCS = False | |
766 alt1ff = 0 | |
767 alt4ff = 0 | |
768 alt2ff = 0 | |
769 alt3ff = 0 | |
770 trimmed = False | |
771 contradictory = False | |
772 print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate) | |
773 # information of one mate available --> only one SSCS is softclipped | |
774 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | |
775 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | | |
776 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | |
777 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available | |
778 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
779 #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) | | |
780 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) | | |
781 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | | |
782 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): | |
783 softclipped_mutation_allMates = False | |
784 softclipped_mutation_oneOfTwoMates = False | |
785 softclipped_mutation_oneOfTwoSSCS = False | |
786 softclipped_mutation_oneMate = False | |
787 softclipped_mutation_oneMateOneSSCS = True | |
788 alt1ff = 0 | |
789 alt4ff = 0 | |
790 alt2ff = 0 | |
791 alt3ff = 0 | |
792 trimmed = False | |
793 contradictory = False | |
794 print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS) | |
795 | |
796 else: | 506 else: |
797 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | 507 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): |
798 beg1 = total1new | 508 beg1 = total1new |
799 total1new = 0 | 509 total1new = 0 |
800 alt1ff = 0 | 510 alt1ff = 0 |
823 trimmed = True | 533 trimmed = True |
824 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | 534 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) |
825 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 535 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
826 | 536 |
827 # assign tiers | 537 # assign tiers |
828 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & | 538 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | (all(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): |
829 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
830 (all(int(ij) >= 3 for ij in [total2new, total3new]) & | |
831 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
832 tier = "1.1" | 539 tier = "1.1" |
833 counter_tier11 += 1 | 540 counter_tier11 += 1 |
834 tier_dict[key1]["tier 1.1"] += 1 | 541 tier_dict[key1]["tier 1.1"] += 1 |
835 | 542 |
836 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & | 543 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) |
837 any(int(ij) >= 3 for ij in [total1new, total4new]) & | 544 & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): |
838 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
839 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
840 tier = "1.2" | 545 tier = "1.2" |
841 counter_tier12 += 1 | 546 counter_tier12 += 1 |
842 tier_dict[key1]["tier 1.2"] += 1 | 547 tier_dict[key1]["tier 1.2"] += 1 |
843 | 548 |
844 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 549 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
845 any(int(ij) >= 3 for ij in [total1new, total4new]) & | 550 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): |
846 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
847 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
848 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
849 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
850 tier = "2.1" | 551 tier = "2.1" |
851 counter_tier21 += 1 | 552 counter_tier21 += 1 |
852 tier_dict[key1]["tier 2.1"] += 1 | 553 tier_dict[key1]["tier 2.1"] += 1 |
853 | 554 |
854 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & | 555 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): |
855 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
856 tier = "2.2" | 556 tier = "2.2" |
857 counter_tier22 += 1 | 557 counter_tier22 += 1 |
858 tier_dict[key1]["tier 2.2"] += 1 | 558 tier_dict[key1]["tier 2.2"] += 1 |
859 | 559 |
860 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 560 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) |
861 any(int(ij) >= 3 for ij in [total2new, total3new]) & | 561 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): |
862 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & | |
863 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | | |
864 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
865 any(int(ij) >= 3 for ij in [total1new, total4new]) & | |
866 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & | |
867 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): | |
868 tier = "2.3" | 562 tier = "2.3" |
869 counter_tier23 += 1 | 563 counter_tier23 += 1 |
870 tier_dict[key1]["tier 2.3"] += 1 | 564 tier_dict[key1]["tier 2.3"] += 1 |
871 | 565 |
872 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 566 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
873 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | 567 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): |
874 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
875 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
876 tier = "2.4" | 568 tier = "2.4" |
877 counter_tier24 += 1 | 569 counter_tier24 += 1 |
878 tier_dict[key1]["tier 2.4"] += 1 | 570 tier_dict[key1]["tier 2.4"] += 1 |
879 | 571 |
880 elif ((len(pure_tags_dict_short[key1]) > 1) & | 572 elif ((len(pure_tags_dict_short[key1]) > 1) & (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): |
881 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | | |
882 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
883 tier = "3.1" | 573 tier = "3.1" |
884 counter_tier31 += 1 | 574 counter_tier31 += 1 |
885 tier_dict[key1]["tier 3.1"] += 1 | 575 tier_dict[key1]["tier 3.1"] += 1 |
886 | 576 |
887 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | 577 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) |
888 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | | 578 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): |
889 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
890 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
891 tier = "3.2" | 579 tier = "3.2" |
892 counter_tier32 += 1 | 580 counter_tier32 += 1 |
893 tier_dict[key1]["tier 3.2"] += 1 | 581 tier_dict[key1]["tier 3.2"] += 1 |
894 | 582 |
895 elif (trimmed): | 583 elif (trimmed): |
900 elif (contradictory): | 588 elif (contradictory): |
901 tier = "4.2" | 589 tier = "4.2" |
902 counter_tier42 += 1 | 590 counter_tier42 += 1 |
903 tier_dict[key1]["tier 4.2"] += 1 | 591 tier_dict[key1]["tier 4.2"] += 1 |
904 | 592 |
905 elif softclipped_mutation_allMates: | 593 else: |
906 tier = "5.1" | 594 tier = "5" |
907 counter_tier51 += 1 | 595 counter_tier5 += 1 |
908 tier_dict[key1]["tier 5.1"] += 1 | 596 tier_dict[key1]["tier 5"] += 1 |
909 | |
910 elif softclipped_mutation_oneOfTwoMates: | |
911 tier = "5.2" | |
912 counter_tier52 += 1 | |
913 tier_dict[key1]["tier 5.2"] += 1 | |
914 | |
915 elif softclipped_mutation_oneOfTwoSSCS: | |
916 tier = "5.3" | |
917 counter_tier53 += 1 | |
918 tier_dict[key1]["tier 5.3"] += 1 | |
919 | |
920 elif softclipped_mutation_oneMate: | |
921 tier = "5.4" | |
922 counter_tier54 += 1 | |
923 tier_dict[key1]["tier 5.4"] += 1 | |
924 | |
925 elif softclipped_mutation_oneMateOneSSCS: | |
926 tier = "5.5" | |
927 counter_tier55 += 1 | |
928 tier_dict[key1]["tier 5.5"] += 1 | |
929 | |
930 else: | |
931 tier = "6" | |
932 counter_tier6 += 1 | |
933 tier_dict[key1]["tier 6"] += 1 | |
934 | 597 |
935 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) | 598 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
936 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 599 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) |
937 sample_tag = key2[:-5] | 600 sample_tag = key2[:-5] |
938 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | 601 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time |
939 # exclude identical tag from array2, to prevent comparison to itself | 602 # exclude identical tag from array2, to prevent comparison to itself |
940 same_tag = np.where(array2 == sample_tag) | 603 same_tag = np.where(array2 == sample_tag) |
941 index_array2 = np.arange(0, len(array2), 1) | 604 index_array2 = np.arange(0, len(array2), 1) |
960 half1_mate1 = array1_half2 | 623 half1_mate1 = array1_half2 |
961 half2_mate1 = array1_half | 624 half2_mate1 = array1_half |
962 half1_mate2 = array2_half2 | 625 half1_mate2 = array2_half2 |
963 half2_mate2 = array2_half | 626 half2_mate2 = array2_half |
964 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 627 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
965 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) | 628 dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2]) |
966 min_index = np.where(dist == dist.min()) # get index of min HD | 629 min_index = np.where(dist == dist.min()) # get index of min HD |
967 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 630 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
968 min_tag_half2 = half2_mate2[min_index] | 631 min_tag_half2 = half2_mate2[min_index] |
969 min_tag_array2 = array2[min_index] # get whole tag with min HD | 632 min_tag_array2 = array2[min_index] # get whole tag with min HD |
970 min_value = dist.min() | 633 min_value = dist.min() |
971 # calculate HD of "b" to all "b's" or "a" to all "a's" | 634 # calculate HD of "b" to all "b's" or "a" to all "a's" |
972 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) | 635 dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e)) |
973 for e in min_tag_half2]) | 636 for e in min_tag_half2]) |
974 | 637 |
975 dist2 = dist_second_half.max() | 638 dist2 = dist_second_half.max() |
976 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 639 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
977 max_tag = min_tag_array2[max_index] | 640 max_tag = min_tag_array2[max_index] |
978 | 641 |
979 # tags which have identical parts: | 642 # tags which have identical parts: |
980 if min_value == 0 or dist2 == 0: | 643 if min_value == 0 or dist2 == 0: |
981 min_tags_list_zeros.append(tag) | 644 min_tags_list_zeros.append(tag) |
982 chimera_tags.append(max_tag) | 645 chimera_tags.append(max_tag) |
983 # chimeric = True | 646 |
984 # else: | |
985 # chimeric = False | |
986 | |
987 # if mate_b is False: | |
988 # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric) | |
989 # else: | |
990 # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric) | |
991 i += 1 | 647 i += 1 |
992 chimera_tags = [x for x in chimera_tags if x != []] | 648 chimera_tags = [x for x in chimera_tags if x != []] |
993 chimera_tags_new = [] | 649 chimera_tags_new = [] |
994 for i in chimera_tags: | 650 for i in chimera_tags: |
995 if len(i) > 1: | 651 if len(i) > 1: |
1025 | 681 |
1026 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 682 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1027 {'type': 'formula', | 683 {'type': 'formula', |
1028 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 684 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
1029 'format': format1, | 685 'format': format1, |
1030 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 686 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) |
1031 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 687 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1032 {'type': 'formula', | 688 {'type': 'formula', |
1033 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), | 689 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), |
1034 'format': format3, | 690 'format': format3, |
1035 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 691 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) |
1036 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 692 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1037 {'type': 'formula', | 693 {'type': 'formula', |
1038 'criteria': '=$B${}>="3"'.format(row + 1), | 694 'criteria': '=$B${}>="3"'.format(row + 1), |
1039 'format': format2, | 695 'format': format2, |
1040 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 696 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) |
1041 | |
1042 row += 3 | 697 row += 3 |
698 | |
1043 if chimera_correction: | 699 if chimera_correction: |
1044 chimeric_dcs_high_tiers = 0 | 700 chimeric_dcs_high_tiers = 0 |
1045 chimeric_dcs = 0 | 701 chimeric_dcs = 0 |
1046 for keys_chimera in chimeric_tag.keys(): | 702 for keys_chimera in chimeric_tag.keys(): |
1047 tiers = chimeric_tag[keys_chimera] | 703 tiers = chimeric_tag[keys_chimera] |
1050 if high_tiers == len(tiers): | 706 if high_tiers == len(tiers): |
1051 chimeric_dcs_high_tiers += high_tiers - 1 | 707 chimeric_dcs_high_tiers += high_tiers - 1 |
1052 else: | 708 else: |
1053 chimeric_dcs_high_tiers += high_tiers | 709 chimeric_dcs_high_tiers += high_tiers |
1054 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | 710 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) |
711 | |
1055 # sheet 2 | 712 # sheet 2 |
1056 if chimera_correction: | 713 if chimera_correction: |
1057 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 714 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
1058 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 715 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
1059 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 716 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
1060 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') | 717 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') |
1061 else: | 718 else: |
1062 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 719 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
1063 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 720 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
1064 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 721 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
1065 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') | 722 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') |
1066 | 723 |
1067 ws2.write_row(0, 0, header_line2) | 724 ws2.write_row(0, 0, header_line2) |
1068 row = 0 | 725 row = 0 |
1069 | 726 |
1070 for key1, value1 in sorted(tier_dict.items()): | 727 for key1, value1 in sorted(tier_dict.items()): |
1076 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) | 733 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
1077 ref_count = cvrg_dict[key1][0] | 734 ref_count = cvrg_dict[key1][0] |
1078 alt_count = cvrg_dict[key1][1] | 735 alt_count = cvrg_dict[key1][1] |
1079 cvrg = ref_count + alt_count | 736 cvrg = ref_count + alt_count |
1080 | 737 |
1081 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) | 738 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) |
1082 lst = [var_id, cvrg] | 739 lst = [var_id, cvrg] |
1083 used_tiers = [] | 740 used_tiers = [] |
1084 cum_af = [] | 741 cum_af = [] |
1085 for key2, value2 in sorted(value1.items()): | 742 for key2, value2 in sorted(value1.items()): |
1086 # calculate cummulative AF | 743 # calculate cummulative AF |
1087 used_tiers.append(value2) | 744 used_tiers.append(value2) |
1088 if len(used_tiers) > 1: | 745 if len(used_tiers) > 1: |
1089 cum = safe_div(sum(used_tiers), cvrg) | 746 cum = safe_div(sum(used_tiers), cvrg) |
1090 cum_af.append(cum) | 747 cum_af.append(cum) |
1091 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place | |
1092 continue | |
1093 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) | 748 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) |
1094 if chimera_correction: | 749 if chimera_correction: |
1095 chimeras_all = chimera_dict[key1][0] | 750 chimeras_all = chimera_dict[key1][0] |
1096 new_alt = sum(used_tiers) - chimeras_all | 751 new_alt = sum(used_tiers) - chimeras_all |
1097 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) | 752 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) |
1112 lst.extend(used_tiers) | 767 lst.extend(used_tiers) |
1113 lst.extend(cum_af) | 768 lst.extend(cum_af) |
1114 lst = tuple(lst) | 769 lst = tuple(lst) |
1115 ws2.write_row(row + 1, 0, lst) | 770 ws2.write_row(row + 1, 0, lst) |
1116 if chimera_correction: | 771 if chimera_correction: |
1117 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format12, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) | 772 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) |
1118 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format32, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) | 773 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) |
1119 ws2.conditional_format('V{}:AE{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format22, 'multi_range': 'V{}:AE{} V1:AE1'.format(row + 2, row + 2)}) | 774 ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)}) |
1120 else: | 775 else: |
1121 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) | 776 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) |
1122 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) | 777 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) |
1123 ws2.conditional_format('P{}:Y{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format22, 'multi_range': 'P{}:Y{} P1:Y1'.format(row + 2, row + 2)}) | 778 ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)}) |
1124 row += 1 | 779 row += 1 |
1125 | 780 |
1126 # sheet 3 | 781 # sheet 3 |
1127 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), | 782 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), |
1128 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), | 783 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), |
1129 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), | 784 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), |
1130 ("tier 4.2", counter_tier42), ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), | 785 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)] |
1131 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6)] | |
1132 | 786 |
1133 header = ("tier", "count") | 787 header = ("tier", "count") |
1134 ws3.write_row(0, 0, header) | 788 ws3.write_row(0, 0, header) |
1135 | 789 |
1136 for i in range(len(sheet3)): | 790 for i in range(len(sheet3)): |
1146 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | 800 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), |
1147 {'type': 'formula', | 801 {'type': 'formula', |
1148 'criteria': '=$A${}>="3"'.format(i + 2), | 802 'criteria': '=$A${}>="3"'.format(i + 2), |
1149 'format': format2}) | 803 'format': format2}) |
1150 | 804 |
1151 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), | 805 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")] |
1152 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), | |
1153 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), | |
1154 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), | |
1155 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), | |
1156 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | |
1157 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | |
1158 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | |
1159 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), | |
1160 ("Tier 5.1", "variant is close to softclipping in both mates"), | |
1161 ("Tier 5.2", "variant is close to softclipping in one of the mates"), | |
1162 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), | |
1163 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), | |
1164 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), | |
1165 ("Tier 6", "remaining variants")] | |
1166 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 806 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", |
1167 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 807 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
1168 "4081", "4098", "5", "10", "", ""), | 808 "4081", "4098", "5", "10", "", ""), |
1169 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 809 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
1170 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, | 810 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, |
1226 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", | 866 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", |
1227 "0", "0", "0", "1", "1", "5348", "5350", "", ""), | 867 "0", "0", "0", "1", "1", "5348", "5350", "", ""), |
1228 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", | 868 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", |
1229 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", | 869 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", |
1230 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 870 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
1231 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], | 871 [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", |
1232 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | |
1233 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", | 872 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", |
1234 "0", "1", "1", "5348", "5350", "", ""), | 873 "0", "1", "1", "5348", "5350", "", ""), |
1235 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 874 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
1236 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 875 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
1237 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | 876 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] |
1238 | 877 |
1239 start_row = 20 | 878 start_row = 15 |
1240 ws3.write(start_row, 0, "Description of tiers with examples") | 879 ws3.write(start_row, 0, "Description of tiers with examples") |
1241 ws3.write_row(start_row + 1, 0, header_line) | 880 ws3.write_row(start_row + 1, 0, header_line) |
1242 row = 0 | 881 row = 0 |
1243 for i in range(len(description_tiers)): | 882 for i in range(len(description_tiers)): |
1244 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) | 883 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) |
1245 ex = examples_tiers[i] | 884 ex = examples_tiers[i] |
1246 for k in range(len(ex)): | 885 for k in range(len(ex)): |
1247 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) | 886 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) |
1248 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format13, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 887 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
1249 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), | 888 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
1250 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), | 889 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), |
1251 'format': format33, | 890 'format': format3, |
1252 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 891 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
1253 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), | 892 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
1254 {'type': 'formula', | 893 {'type': 'formula', |
1255 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), | 894 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), |
1256 'format': format23, | 895 'format': format2, |
1257 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) | 896 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
1258 row += 3 | 897 row += 3 |
1259 workbook.close() | 898 workbook.close() |
1260 workbook2.close() | 899 workbook2.close() |
1261 workbook3.close() | 900 workbook3.close() |
1262 | 901 |
1263 | 902 |
1264 if __name__ == '__main__': | 903 if __name__ == '__main__': |
1265 sys.exit(read2mut(sys.argv)) | 904 sys.exit(read2mut(sys.argv)) |
1266 |