Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 43:d21960b45a6b draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Tue, 02 Mar 2021 15:32:41 +0000 |
parents | da224c392a54 |
children | 29226ceba7cd |
comparison
equal
deleted
inserted
replaced
42:da224c392a54 | 43:d21960b45a6b |
---|---|
8 Looks for reads with mutation at known | 8 Looks for reads with mutation at known |
9 positions and calculates frequencies and stats. | 9 positions and calculates frequencies and stats. |
10 | 10 |
11 ======= ========== ================= ================================ | 11 ======= ========== ================= ================================ |
12 Version Date Author Description | 12 Version Date Author Description |
13 2.0.0 2020-10-30 Gundula Povysil - | 13 0.2.1 2019-10-27 Gundula Povysil - |
14 ======= ========== ================= ================================ | 14 ======= ========== ================= ================================ |
15 | 15 |
16 | 16 |
17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam | 17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam |
18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json | 18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json |
19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim5 10 --trim3 10 --chimera_correction | 19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 --chimera_correction |
20 | 20 |
21 """ | 21 """ |
22 | 22 |
23 from __future__ import division | 23 from __future__ import division |
24 | 24 |
25 import argparse | 25 import argparse |
26 import csv | 26 import itertools |
27 import json | 27 import json |
28 import operator | 28 import operator |
29 import os | 29 import os |
30 import re | 30 import re |
31 import sys | 31 import sys |
32 | |
33 | 32 |
34 import numpy as np | 33 import numpy as np |
35 import pysam | 34 import pysam |
36 import xlsxwriter | 35 import xlsxwriter |
37 from cyvcf2 import VCF | 36 from cyvcf2 import VCF |
47 help='JSON file with data collected by mut2read.py.') | 46 help='JSON file with data collected by mut2read.py.') |
48 parser.add_argument('--sscsJson', | 47 parser.add_argument('--sscsJson', |
49 help='JSON file with SSCS counts collected by mut2sscs.py.') | 48 help='JSON file with SSCS counts collected by mut2sscs.py.') |
50 parser.add_argument('--outputFile', | 49 parser.add_argument('--outputFile', |
51 help='Output xlsx file with summary of mutations.') | 50 help='Output xlsx file with summary of mutations.') |
52 parser.add_argument('--outputFile_csv', | |
53 help='Output csv file with summary of mutations.') | |
54 parser.add_argument('--outputFile2', | 51 parser.add_argument('--outputFile2', |
55 help='Output xlsx file with allele frequencies of mutations.') | 52 help='Output xlsx file with allele frequencies of mutations.') |
56 parser.add_argument('--outputFile3', | 53 parser.add_argument('--outputFile3', |
57 help='Output xlsx file with examples of the tier classification.') | 54 help='Output xlsx file with examples of the tier classification.') |
58 parser.add_argument('--thresh', type=int, default=0, | 55 parser.add_argument('--thresh', type=int, default=0, |
59 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') | 56 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') |
60 parser.add_argument('--phred', type=int, default=20, | 57 parser.add_argument('--phred', type=int, default=20, |
61 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | 58 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') |
62 parser.add_argument('--trim5', type=int, default=10, | 59 parser.add_argument('--trim', type=int, default=10, |
63 help='Integer threshold for assigning mutations at the beginning of the reads to lower tier. Default 10.') | 60 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') |
64 parser.add_argument('--trim3', type=int, default=10, | |
65 help='Integer threshold for assigning mutations at the end of the reads to lower tier. Default 10.') | |
66 parser.add_argument('--chimera_correction', action="store_true", | 61 parser.add_argument('--chimera_correction', action="store_true", |
67 help='Count chimeric variants and correct the variant frequencies') | 62 help='Count chimeric variants and correct the variant frequencies') |
63 parser.add_argument('--softclipping_dist', type=int, default=15, | |
64 help='Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the read.') | |
65 parser.add_argument('--reads_threshold', type=float, default=1.0, | |
66 help='Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped.') | |
68 return parser | 67 return parser |
69 | 68 |
70 | 69 |
71 def safe_div(x, y): | 70 def safe_div(x, y): |
72 if y == 0: | 71 if y == 0: |
82 json_file = args.inputJson | 81 json_file = args.inputJson |
83 sscs_json = args.sscsJson | 82 sscs_json = args.sscsJson |
84 outfile = args.outputFile | 83 outfile = args.outputFile |
85 outfile2 = args.outputFile2 | 84 outfile2 = args.outputFile2 |
86 outfile3 = args.outputFile3 | 85 outfile3 = args.outputFile3 |
87 outputFile_csv = args.outputFile_csv | |
88 thresh = args.thresh | 86 thresh = args.thresh |
89 phred_score = args.phred | 87 phred_score = args.phred |
90 trim5 = args.trim5 | 88 trim = args.trim |
91 trim3 = args.trim3 | |
92 chimera_correction = args.chimera_correction | 89 chimera_correction = args.chimera_correction |
90 thr = args.softclipping_dist | |
91 threshold_reads = args.reads_threshold | |
93 | 92 |
94 if os.path.isfile(file1) is False: | 93 if os.path.isfile(file1) is False: |
95 sys.exit("Error: Could not find '{}'".format(file1)) | 94 sys.exit("Error: Could not find '{}'".format(file1)) |
96 if os.path.isfile(file2) is False: | 95 if os.path.isfile(file2) is False: |
97 sys.exit("Error: Could not find '{}'".format(file2)) | 96 sys.exit("Error: Could not find '{}'".format(file2)) |
99 sys.exit("Error: Could not find '{}'".format(json_file)) | 98 sys.exit("Error: Could not find '{}'".format(json_file)) |
100 if thresh < 0: | 99 if thresh < 0: |
101 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) | 100 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) |
102 if phred_score < 0: | 101 if phred_score < 0: |
103 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) | 102 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) |
104 if trim5 < 0: | 103 if trim < 0: |
105 sys.exit("Error: trim5 is '{}', but only non-negative integers allowed".format(trim5)) | 104 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) |
106 if trim3 < 0: | 105 if thr <= 0: |
107 sys.exit("Error: trim3 is '{}', but only non-negative integers allowed".format(trim3)) | 106 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thr)) |
108 | 107 |
109 # load dicts | 108 # load dicts |
110 with open(json_file, "r") as f: | 109 with open(json_file, "r") as f: |
111 (tag_dict, cvrg_dict) = json.load(f) | 110 (tag_dict, cvrg_dict) = json.load(f) |
112 | 111 |
113 with open(sscs_json, "r") as f: | 112 with open(sscs_json, "r") as f: |
114 (mut_pos_dict, ref_pos_dict) = json.load(f) | 113 (mut_pos_dict, ref_pos_dict) = json.load(f) |
115 | 114 |
116 # read bam file | 115 # read bam file |
116 # pysam.index(file2) | |
117 bam = pysam.AlignmentFile(file2, "rb") | 117 bam = pysam.AlignmentFile(file2, "rb") |
118 | 118 |
119 # create mut_dict | 119 # create mut_dict |
120 mut_dict = {} | 120 mut_dict = {} |
121 mut_read_pos_dict = {} | 121 mut_read_pos_dict = {} |
122 mut_read_dict = {} | 122 mut_read_dict = {} |
123 reads_dict = {} | 123 reads_dict = {} |
124 mut_read_cigar_dict = {} | |
124 i = 0 | 125 i = 0 |
125 mut_array = [] | 126 mut_array = [] |
126 | 127 |
127 for variant in VCF(file1): | 128 for count, variant in enumerate(VCF(file1)): |
129 #if count == 2000: | |
130 # break | |
128 chrom = variant.CHROM | 131 chrom = variant.CHROM |
129 stop_pos = variant.start | 132 stop_pos = variant.start |
130 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | 133 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
131 ref = variant.REF | 134 ref = variant.REF |
132 alt = variant.ALT[0] | 135 if len(variant.ALT) == 0: |
136 continue | |
137 else: | |
138 alt = variant.ALT[0] | |
133 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt | 139 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt |
134 | 140 |
135 if len(ref) == len(alt): | 141 if len(ref) == len(alt): |
136 mut_array.append([chrom, stop_pos, ref, alt]) | 142 mut_array.append([chrom, stop_pos, ref, alt]) |
137 i += 1 | 143 i += 1 |
138 mut_dict[chrom_stop_pos] = {} | 144 mut_dict[chrom_stop_pos] = {} |
139 mut_read_pos_dict[chrom_stop_pos] = {} | 145 mut_read_pos_dict[chrom_stop_pos] = {} |
140 reads_dict[chrom_stop_pos] = {} | 146 reads_dict[chrom_stop_pos] = {} |
147 mut_read_cigar_dict[chrom_stop_pos] = {} | |
141 | 148 |
142 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): | 149 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): |
143 if pileupcolumn.reference_pos == stop_pos: | 150 if pileupcolumn.reference_pos == stop_pos: |
144 count_alt = 0 | 151 count_alt = 0 |
145 count_ref = 0 | 152 count_ref = 0 |
146 count_indel = 0 | 153 count_indel = 0 |
147 count_n = 0 | 154 count_n = 0 |
148 count_other = 0 | 155 count_other = 0 |
149 count_lowq = 0 | 156 count_lowq = 0 |
150 n = 0 | 157 n = 0 |
151 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | 158 #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), |
152 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | 159 # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) |
153 for pileupread in pileupcolumn.pileups: | 160 for pileupread in pileupcolumn.pileups: |
154 n += 1 | 161 n += 1 |
155 if not pileupread.is_del and not pileupread.is_refskip: | 162 if not pileupread.is_del and not pileupread.is_refskip: |
156 tag = pileupread.alignment.query_name | 163 tag = pileupread.alignment.query_name |
157 nuc = pileupread.alignment.query_sequence[pileupread.query_position] | 164 nuc = pileupread.alignment.query_sequence[pileupread.query_position] |
163 if nuc in mut_dict[chrom_stop_pos][tag]: | 170 if nuc in mut_dict[chrom_stop_pos][tag]: |
164 mut_dict[chrom_stop_pos][tag][nuc] += 1 | 171 mut_dict[chrom_stop_pos][tag][nuc] += 1 |
165 else: | 172 else: |
166 mut_dict[chrom_stop_pos][tag][nuc] = 1 | 173 mut_dict[chrom_stop_pos][tag][nuc] = 1 |
167 if tag not in mut_read_pos_dict[chrom_stop_pos]: | 174 if tag not in mut_read_pos_dict[chrom_stop_pos]: |
168 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 | 175 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] |
169 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) | 176 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] |
177 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] | |
178 | |
179 #alignedRefPositions = pileupread.get_reference_positions()[0] | |
170 else: | 180 else: |
171 mut_read_pos_dict[chrom_stop_pos][tag] = np.append( | 181 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) |
172 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) | 182 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) |
173 reads_dict[chrom_stop_pos][tag] = np.append( | 183 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) |
174 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) | |
175 | |
176 if nuc == alt: | 184 if nuc == alt: |
177 count_alt += 1 | 185 count_alt += 1 |
178 if tag not in mut_read_dict: | 186 if tag not in mut_read_dict: |
179 mut_read_dict[tag] = {} | 187 mut_read_dict[tag] = {} |
180 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) | 188 mut_read_dict[tag][chrom_stop_pos] = (alt, ref) |
189 else: | 197 else: |
190 count_other += 1 | 198 count_other += 1 |
191 else: | 199 else: |
192 count_indel += 1 | 200 count_indel += 1 |
193 | 201 |
194 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | 202 #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) |
195 else: | 203 #else: |
196 print("indels are currently not evaluated") | 204 # print("indels are currently not evaluated") |
197 | |
198 mut_array = np.array(mut_array) | 205 mut_array = np.array(mut_array) |
199 for read in bam.fetch(until_eof=True): | 206 for read in bam.fetch(until_eof=True): |
200 if read.is_unmapped: | 207 if read.is_unmapped: |
201 pure_tag = read.query_name[:-5] | 208 pure_tag = read.query_name[:-5] |
202 nuc = "na" | 209 nuc = "na" |
212 bam.close() | 219 bam.close() |
213 | 220 |
214 # create pure_tags_dict | 221 # create pure_tags_dict |
215 pure_tags_dict = {} | 222 pure_tags_dict = {} |
216 for key1, value1 in sorted(mut_dict.items()): | 223 for key1, value1 in sorted(mut_dict.items()): |
224 #if len(np.where(np.array(['#'.join(str(i) for i in z) | |
225 # for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: | |
226 # continue | |
227 | |
217 i = np.where(np.array(['#'.join(str(i) for i in z) | 228 i = np.where(np.array(['#'.join(str(i) for i in z) |
218 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 229 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
219 ref = mut_array[i, 2] | 230 ref = mut_array[i, 2] |
220 alt = mut_array[i, 3] | 231 alt = mut_array[i, 3] |
221 pure_tags_dict[key1] = {} | 232 pure_tags_dict[key1] = {} |
235 if len(value) < thresh: | 246 if len(value) < thresh: |
236 pure_tags_dict_short[key] = value | 247 pure_tags_dict_short[key] = value |
237 else: | 248 else: |
238 pure_tags_dict_short = pure_tags_dict | 249 pure_tags_dict_short = pure_tags_dict |
239 | 250 |
240 csv_data = open(outputFile_csv, "wb") | 251 # whole_array = [] |
241 csv_writer = csv.writer(csv_data, delimiter=",") | 252 # for k in pure_tags_dict.values(): |
253 # if len(k) != 0: | |
254 # keys = k.keys() | |
255 # if len(keys) > 1: | |
256 # for k1 in keys: | |
257 # whole_array.append(k1) | |
258 # else: | |
259 # whole_array.append(keys[0]) | |
242 | 260 |
243 # output summary with threshold | 261 # output summary with threshold |
244 workbook = xlsxwriter.Workbook(outfile) | 262 workbook = xlsxwriter.Workbook(outfile) |
245 workbook2 = xlsxwriter.Workbook(outfile2) | 263 workbook2 = xlsxwriter.Workbook(outfile2) |
246 workbook3 = xlsxwriter.Workbook(outfile3) | 264 workbook3 = xlsxwriter.Workbook(outfile3) |
266 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 284 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
267 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 285 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
268 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 286 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
269 'in phase', 'chimeric tag') | 287 'in phase', 'chimeric tag') |
270 ws1.write_row(0, 0, header_line) | 288 ws1.write_row(0, 0, header_line) |
271 csv_writer.writerow(header_line) | 289 |
272 counter_tier11 = 0 | 290 counter_tier11 = 0 |
273 counter_tier12 = 0 | 291 counter_tier12 = 0 |
274 counter_tier21 = 0 | 292 counter_tier21 = 0 |
275 counter_tier22 = 0 | 293 counter_tier22 = 0 |
276 counter_tier23 = 0 | 294 counter_tier23 = 0 |
277 counter_tier24 = 0 | 295 counter_tier24 = 0 |
278 counter_tier31 = 0 | 296 counter_tier31 = 0 |
279 counter_tier32 = 0 | 297 counter_tier32 = 0 |
280 counter_tier41 = 0 | 298 counter_tier33 = 0 |
281 counter_tier42 = 0 | 299 counter_tier4 = 0 |
282 counter_tier5 = 0 | 300 # if chimera_correction: |
301 # counter_tier43 = 0 | |
302 counter_tier51 = 0 | |
303 counter_tier52 = 0 | |
304 counter_tier53 = 0 | |
305 counter_tier54 = 0 | |
306 counter_tier55 = 0 | |
283 counter_tier6 = 0 | 307 counter_tier6 = 0 |
308 counter_tier7 = 0 | |
309 | |
284 row = 1 | 310 row = 1 |
285 tier_dict = {} | 311 tier_dict = {} |
286 chimera_dict = {} | 312 chimera_dict = {} |
287 for key1, value1 in sorted(mut_dict.items()): | 313 for key1, value1 in sorted(mut_dict.items()): |
288 counts_mut = 0 | 314 counts_mut = 0 |
315 chimeric_tag_list = [] | |
289 chimeric_tag = {} | 316 chimeric_tag = {} |
290 if key1 in pure_tags_dict_short.keys(): | 317 if key1 in pure_tags_dict_short.keys(): |
291 i = np.where(np.array(['#'.join(str(i) for i in z) | 318 i = np.where(np.array(['#'.join(str(i) for i in z) |
292 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 319 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
293 ref = mut_array[i, 2] | 320 ref = mut_array[i, 2] |
294 alt = mut_array[i, 3] | 321 alt = mut_array[i, 3] |
295 dcs_median = cvrg_dict[key1][2] | 322 dcs_median = cvrg_dict[key1][2] |
296 whole_array = list(pure_tags_dict_short[key1].keys()) | 323 whole_array = pure_tags_dict_short[key1].keys() |
297 | 324 |
298 tier_dict[key1] = {} | 325 tier_dict[key1] = {} |
299 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), | 326 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), |
300 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0), ("tier 6", 0)] | 327 ("tier 3.2", 0), ("tier 3.3", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), |
328 ("tier 6", 0), ("tier 7", 0)] | |
301 for k, v in values_tier_dict: | 329 for k, v in values_tier_dict: |
302 tier_dict[key1][k] = v | 330 tier_dict[key1][k] = v |
303 | 331 |
304 used_keys = [] | 332 used_keys = [] |
305 if 'ab' in mut_pos_dict[key1].keys(): | 333 if 'ab' in mut_pos_dict[key1].keys(): |
324 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): | 352 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): |
325 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): | 353 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): |
326 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) | 354 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) |
327 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 355 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
328 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] | 356 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] |
329 else: | 357 # na1f = na1/total1 |
358 else: | |
359 # na1 = na1f = 0 | |
330 na1 = 0 | 360 na1 = 0 |
331 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 361 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
332 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] | 362 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] |
333 else: | 363 # lowq1f = lowq1 / total1 |
364 else: | |
365 # lowq1 = lowq1f = 0 | |
334 lowq1 = 0 | 366 lowq1 = 0 |
335 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | 367 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): |
336 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] | 368 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] |
337 ref1f = ref1 / (total1 - na1 - lowq1) | 369 ref1f = ref1 / (total1 - na1 - lowq1) |
338 else: | 370 else: |
363 | 395 |
364 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): | 396 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): |
365 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) | 397 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) |
366 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 398 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
367 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] | 399 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] |
368 else: | 400 # na2f = na2 / total2 |
401 else: | |
402 # na2 = na2f = 0 | |
369 na2 = 0 | 403 na2 = 0 |
370 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 404 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
371 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] | 405 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] |
372 else: | 406 # lowq2f = lowq2 / total2 |
407 else: | |
408 # lowq2 = lowq2f = 0 | |
373 lowq2 = 0 | 409 lowq2 = 0 |
374 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | 410 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): |
375 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] | 411 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] |
376 ref2f = ref2 / (total2 - na2 - lowq2) | 412 ref2f = ref2 / (total2 - na2 - lowq2) |
377 else: | 413 else: |
402 | 438 |
403 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): | 439 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): |
404 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) | 440 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) |
405 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 441 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
406 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] | 442 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] |
407 else: | 443 # na3f = na3 / total3 |
444 else: | |
445 # na3 = na3f = 0 | |
408 na3 = 0 | 446 na3 = 0 |
409 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 447 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
410 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] | 448 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] |
411 else: | 449 # lowq3f = lowq3 / total3 |
450 else: | |
451 # lowq3 = lowq3f = 0 | |
412 lowq3 = 0 | 452 lowq3 = 0 |
413 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | 453 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): |
414 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] | 454 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] |
415 ref3f = ref3 / (total3 - na3 - lowq3) | 455 ref3f = ref3 / (total3 - na3 - lowq3) |
416 else: | 456 else: |
437 | 477 |
438 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): | 478 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): |
439 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) | 479 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) |
440 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 480 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
441 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] | 481 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] |
442 else: | 482 # na4f = na4 / total4 |
483 else: | |
484 # na4 = na4f = 0 | |
443 na4 = 0 | 485 na4 = 0 |
444 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 486 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
445 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] | 487 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] |
446 else: | 488 # lowq4f = lowq4 / total4 |
489 else: | |
490 # lowq4 = lowq4f = 0 | |
447 lowq4 = 0 | 491 lowq4 = 0 |
448 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | 492 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): |
449 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] | 493 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] |
450 ref4f = ref4 / (total4 - na4 - lowq4) | 494 ref4f = ref4 / (total4 - na4 - lowq4) |
451 else: | 495 else: |
470 total4 = total4new = na4 = lowq4 = 0 | 514 total4 = total4new = na4 = lowq4 = 0 |
471 ref4 = alt4 = ref4f = alt4f = 0 | 515 ref4 = alt4 = ref4f = alt4f = 0 |
472 | 516 |
473 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 | 517 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 |
474 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 | 518 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 |
475 | 519 cigars_dcs1 = cigars_dcs2 = cigars_dcs3 = cigars_dcs4 = [] |
520 pos_read1 = pos_read2 = pos_read3 = pos_read4 = [] | |
521 end_read1 = end_read2 = end_read3 = end_read4 = [] | |
476 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): | 522 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): |
477 read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) | 523 read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1'])) |
478 read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) | 524 read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1'])) |
525 cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'] | |
526 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']) | |
527 pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1'] | |
528 #print(cigars_dcs1) | |
529 end_read1 = reads_dict[key1][key2[:-5] + '.ab.1'] | |
479 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): | 530 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): |
480 read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) | 531 read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2'])) |
481 read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) | 532 read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2'])) |
533 cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2'] | |
534 pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2'] | |
535 end_read2 = reads_dict[key1][key2[:-5] + '.ab.2'] | |
482 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): | 536 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): |
483 read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) | 537 read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1'])) |
484 read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) | 538 read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1'])) |
539 cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1'] | |
540 pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1'] | |
541 end_read3 = reads_dict[key1][key2[:-5] + '.ba.1'] | |
485 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): | 542 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): |
486 read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) | 543 read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2'])) |
487 read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) | 544 read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2'])) |
545 #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']) | |
546 cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] | |
547 | |
548 pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] | |
549 #print(cigars_dcs4) | |
550 end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] | |
488 | 551 |
489 used_keys.append(key2[:-5]) | 552 used_keys.append(key2[:-5]) |
490 counts_mut += 1 | 553 counts_mut += 1 |
491 if (alt1f + alt2f + alt3f + alt4f) > 0.5: | 554 if (alt1f + alt2f + alt3f + alt4f) > 0.5: |
492 if total1new == 0: | 555 if total1new == 0: |
513 beg1 = beg4 = beg2 = beg3 = 0 | 576 beg1 = beg4 = beg2 = beg3 = 0 |
514 | 577 |
515 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | 578 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) |
516 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 579 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
517 | 580 |
518 trimmed_five = False | 581 trimmed = False |
519 trimmed_three = False | |
520 contradictory = False | 582 contradictory = False |
521 | 583 softclipped_mutation_allMates = False |
522 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): | 584 softclipped_mutation_oneOfTwoMates = False |
585 softclipped_mutation_oneOfTwoSSCS = False | |
586 softclipped_mutation_oneMate = False | |
587 softclipped_mutation_oneMateOneSSCS = False | |
588 print() | |
589 print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) | |
590 dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] | |
591 dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = [] | |
592 ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False | |
593 ratio_dist_end1 = ratio_dist_end2 = ratio_dist_end3 = ratio_dist_end4 = False | |
594 | |
595 # mate 1 - SSCS ab | |
596 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] | |
597 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads | |
598 | |
599 if any(ij is True for ij in softclipped_idx1): | |
600 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] | |
601 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] | |
602 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] | |
603 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] | |
604 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] | |
605 | |
606 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
607 if any(ij is True for ij in softclipped_both_ends_idx1): | |
608 print(softclipped_both_ends_idx1) | |
609 for nr, indx in enumerate(softclipped_both_ends_idx1): | |
610 if indx: | |
611 if dist_start_read1[nr] <= dist_end_read1[nr]: | |
612 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number | |
613 else: | |
614 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number | |
615 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads | |
616 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads | |
617 print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1) | |
618 | |
619 # mate 1 - SSCS ba | |
620 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] | |
621 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads | |
622 if any(ij is True for ij in softclipped_idx4): | |
623 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] | |
624 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] | |
625 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] | |
626 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] | |
627 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] | |
628 | |
629 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
630 if any(ij is True for ij in softclipped_both_ends_idx4): | |
631 print(softclipped_both_ends_idx4) | |
632 for nr, indx in enumerate(softclipped_both_ends_idx4): | |
633 if indx: | |
634 if dist_start_read4[nr] <= dist_end_read4[nr]: | |
635 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number | |
636 else: | |
637 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number | |
638 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads | |
639 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads | |
640 print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4) | |
641 | |
642 # mate 2 - SSCS ab | |
643 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] | |
644 #print(sum(softclipped_idx2)) | |
645 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads | |
646 if any(ij is True for ij in softclipped_idx2): | |
647 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] | |
648 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] | |
649 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] | |
650 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] | |
651 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] | |
652 | |
653 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
654 if any(ij is True for ij in softclipped_both_ends_idx2): | |
655 print(softclipped_both_ends_idx2) | |
656 for nr, indx in enumerate(softclipped_both_ends_idx2): | |
657 if indx: | |
658 if dist_start_read2[nr] <= dist_end_read2[nr]: | |
659 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number | |
660 else: | |
661 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number | |
662 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads | |
663 #print(ratio_dist_end2) | |
664 #print([True if x <= thr else False for x in ratio_dist_end2]) | |
665 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads | |
666 print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2) | |
667 | |
668 # mate 2 - SSCS ba | |
669 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] | |
670 ratio3 = safe_div(sum(softclipped_idx3), float(len(softclipped_idx3))) >= threshold_reads | |
671 if any(ij is True for ij in softclipped_idx3): | |
672 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] | |
673 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] | |
674 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] | |
675 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] | |
676 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] | |
677 | |
678 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | |
679 if any(ij is True for ij in softclipped_both_ends_idx3): | |
680 print(softclipped_both_ends_idx3) | |
681 for nr, indx in enumerate(softclipped_both_ends_idx3): | |
682 if indx: | |
683 if dist_start_read3[nr] <= dist_end_read3[nr]: | |
684 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh | |
685 else: | |
686 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number | |
687 #print([True if x <= thr else False for x in dist_start_read3]) | |
688 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads | |
689 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads | |
690 print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3) | |
691 | |
692 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant | |
693 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | | |
694 (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & | |
695 all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): | |
523 alt1ff = 0 | 696 alt1ff = 0 |
524 alt4ff = 0 | 697 alt4ff = 0 |
525 alt2ff = 0 | 698 alt2ff = 0 |
526 alt3ff = 0 | 699 alt3ff = 0 |
527 trimmed_five = False | 700 trimmed = False |
528 trimmed_three = False | |
529 contradictory = True | 701 contradictory = True |
530 else: | 702 # softclipping tiers |
531 if ((read_pos1 >= 0) and (read_pos1 <= trim5)): | 703 # information of both mates available --> all reads for both mates and SSCS are softclipped |
704 elif (ratio1 & ratio4 & ratio2 & ratio3 & | |
705 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | |
706 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
707 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
708 softclipped_mutation_allMates = True | |
709 softclipped_mutation_oneOfTwoMates = False | |
710 softclipped_mutation_oneOfTwoSSCS = False | |
711 softclipped_mutation_oneMate = False | |
712 softclipped_mutation_oneMateOneSSCS = False | |
713 alt1ff = 0 | |
714 alt4ff = 0 | |
715 alt2ff = 0 | |
716 alt3ff = 0 | |
717 trimmed = False | |
718 contradictory = False | |
719 print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates) | |
720 # information of both mates available --> only one mate softclipped | |
721 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | | |
722 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & | |
723 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
724 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
725 softclipped_mutation_allMates = False | |
726 softclipped_mutation_oneOfTwoMates = True | |
727 softclipped_mutation_oneOfTwoSSCS = False | |
728 softclipped_mutation_oneMate = False | |
729 softclipped_mutation_oneMateOneSSCS = False | |
730 alt1ff = 0 | |
731 alt4ff = 0 | |
732 alt2ff = 0 | |
733 alt3ff = 0 | |
734 trimmed = False | |
735 contradictory = False | |
736 print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates) | |
737 # information of both mates available --> only one mate softclipped | |
738 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | |
739 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | |
740 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | |
741 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
742 softclipped_mutation_allMates = False | |
743 softclipped_mutation_oneOfTwoMates = False | |
744 softclipped_mutation_oneOfTwoSSCS = True | |
745 softclipped_mutation_oneMate = False | |
746 softclipped_mutation_oneMateOneSSCS = False | |
747 alt1ff = 0 | |
748 alt4ff = 0 | |
749 alt2ff = 0 | |
750 alt3ff = 0 | |
751 trimmed = False | |
752 contradictory = False | |
753 print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) | |
754 # information of one mate available --> all reads of one mate are softclipped | |
755 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & | |
756 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | | |
757 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | |
758 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available | |
759 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
760 #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) & | |
761 # ((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) | | |
762 # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & | |
763 # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): | |
764 softclipped_mutation_allMates = False | |
765 softclipped_mutation_oneOfTwoMates = False | |
766 softclipped_mutation_oneOfTwoSSCS = False | |
767 softclipped_mutation_oneMate = True | |
768 softclipped_mutation_oneMateOneSSCS = False | |
769 alt1ff = 0 | |
770 alt4ff = 0 | |
771 alt2ff = 0 | |
772 alt3ff = 0 | |
773 trimmed = False | |
774 contradictory = False | |
775 print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate) | |
776 # information of one mate available --> only one SSCS is softclipped | |
777 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | |
778 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | | |
779 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | |
780 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available | |
781 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | |
782 #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) | | |
783 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) | | |
784 # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | | |
785 # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): | |
786 softclipped_mutation_allMates = False | |
787 softclipped_mutation_oneOfTwoMates = False | |
788 softclipped_mutation_oneOfTwoSSCS = False | |
789 softclipped_mutation_oneMate = False | |
790 softclipped_mutation_oneMateOneSSCS = True | |
791 alt1ff = 0 | |
792 alt4ff = 0 | |
793 alt2ff = 0 | |
794 alt3ff = 0 | |
795 trimmed = False | |
796 contradictory = False | |
797 print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS) | |
798 | |
799 else: | |
800 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | |
532 beg1 = total1new | 801 beg1 = total1new |
533 total1new = 0 | 802 total1new = 0 |
534 alt1ff = 0 | 803 alt1ff = 0 |
535 alt1f = 0 | 804 alt1f = 0 |
536 trimmed_five = True | 805 trimmed = True |
537 | 806 |
538 if ((read_pos1 >= 0) and (abs(read_len_median1 - read_pos1) <= trim3)): | 807 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): |
539 beg1 = total1new | |
540 total1new = 0 | |
541 alt1ff = 0 | |
542 alt1f = 0 | |
543 trimmed_three = True | |
544 | |
545 if ((read_pos4 >= 0) and (read_pos4 <= trim5)): | |
546 beg4 = total4new | 808 beg4 = total4new |
547 total4new = 0 | 809 total4new = 0 |
548 alt4ff = 0 | 810 alt4ff = 0 |
549 alt4f = 0 | 811 alt4f = 0 |
550 trimmed_five = True | 812 trimmed = True |
551 | 813 |
552 if ((read_pos4 >= 0) and (abs(read_len_median4 - read_pos4) <= trim3)): | 814 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): |
553 beg4 = total4new | |
554 total4new = 0 | |
555 alt4ff = 0 | |
556 alt4f = 0 | |
557 trimmed_three = True | |
558 | |
559 if ((read_pos2 >= 0) and (read_pos2 <= trim5)): | |
560 beg2 = total2new | 815 beg2 = total2new |
561 total2new = 0 | 816 total2new = 0 |
562 alt2ff = 0 | 817 alt2ff = 0 |
563 alt2f = 0 | 818 alt2f = 0 |
564 trimmed_five = True | 819 trimmed = True |
565 | 820 |
566 if ((read_pos2 >= 0) and (abs(read_len_median2 - read_pos2) <= trim3)): | 821 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): |
567 beg2 = total2new | |
568 total2new = 0 | |
569 alt2ff = 0 | |
570 alt2f = 0 | |
571 trimmed_three = True | |
572 | |
573 if ((read_pos3 >= 0) and (read_pos3 <= trim5)): | |
574 beg3 = total3new | 822 beg3 = total3new |
575 total3new = 0 | 823 total3new = 0 |
576 alt3ff = 0 | 824 alt3ff = 0 |
577 alt3f = 0 | 825 alt3f = 0 |
578 trimmed_five = True | 826 trimmed = True |
579 | |
580 if ((read_pos3 >= 0) and (abs(read_len_median3 - read_pos3) <= trim3)): | |
581 beg3 = total3new | |
582 total3new = 0 | |
583 alt3ff = 0 | |
584 alt3f = 0 | |
585 trimmed_three = True | |
586 | |
587 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | 827 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) |
588 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 828 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
589 | 829 |
590 # assign tiers | 830 # assign tiers |
591 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | (all(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | 831 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & |
832 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
833 (all(int(ij) >= 3 for ij in [total2new, total3new]) & | |
834 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
592 tier = "1.1" | 835 tier = "1.1" |
593 counter_tier11 += 1 | 836 counter_tier11 += 1 |
594 tier_dict[key1]["tier 1.1"] += 1 | 837 tier_dict[key1]["tier 1.1"] += 1 |
595 | 838 |
596 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) | 839 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & |
597 & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | 840 any(int(ij) >= 3 for ij in [total1new, total4new]) & |
841 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
842 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
598 tier = "1.2" | 843 tier = "1.2" |
599 counter_tier12 += 1 | 844 counter_tier12 += 1 |
600 tier_dict[key1]["tier 1.2"] += 1 | 845 tier_dict[key1]["tier 1.2"] += 1 |
601 | 846 |
602 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 847 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & |
603 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | 848 any(int(ij) >= 3 for ij in [total1new, total4new]) & |
849 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
850 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
851 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
852 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
604 tier = "2.1" | 853 tier = "2.1" |
605 counter_tier21 += 1 | 854 counter_tier21 += 1 |
606 tier_dict[key1]["tier 2.1"] += 1 | 855 tier_dict[key1]["tier 2.1"] += 1 |
607 | 856 |
608 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | 857 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & |
858 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
609 tier = "2.2" | 859 tier = "2.2" |
610 counter_tier22 += 1 | 860 counter_tier22 += 1 |
611 tier_dict[key1]["tier 2.2"] += 1 | 861 tier_dict[key1]["tier 2.2"] += 1 |
612 | 862 |
613 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & any(int(ij) >= 3 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | 863 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & |
614 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & any(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): | 864 any(int(ij) >= 3 for ij in [total2new, total3new]) & |
865 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & | |
866 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | | |
867 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
868 any(int(ij) >= 3 for ij in [total1new, total4new]) & | |
869 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & | |
870 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): | |
615 tier = "2.3" | 871 tier = "2.3" |
616 counter_tier23 += 1 | 872 counter_tier23 += 1 |
617 tier_dict[key1]["tier 2.3"] += 1 | 873 tier_dict[key1]["tier 2.3"] += 1 |
618 | 874 |
619 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 875 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & |
620 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | 876 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | |
877 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
878 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
621 tier = "2.4" | 879 tier = "2.4" |
622 counter_tier24 += 1 | 880 counter_tier24 += 1 |
623 tier_dict[key1]["tier 2.4"] += 1 | 881 tier_dict[key1]["tier 2.4"] += 1 |
624 | 882 |
625 elif ((len(pure_tags_dict_short[key1]) > 1) & (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | 883 elif ((len(pure_tags_dict_short[key1]) > 1) & |
884 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | | |
885 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
626 tier = "3.1" | 886 tier = "3.1" |
627 counter_tier31 += 1 | 887 counter_tier31 += 1 |
628 tier_dict[key1]["tier 3.1"] += 1 | 888 tier_dict[key1]["tier 3.1"] += 1 |
629 | 889 |
630 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | 890 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & |
631 | (all(int(ij) >= 1 for ij in [total2new, total3new]) & all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | 891 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | |
892 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
893 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
632 tier = "3.2" | 894 tier = "3.2" |
633 counter_tier32 += 1 | 895 counter_tier32 += 1 |
634 tier_dict[key1]["tier 3.2"] += 1 | 896 tier_dict[key1]["tier 3.2"] += 1 |
635 | 897 |
636 elif trimmed_five: | 898 elif (trimmed) and (len(pure_tags_dict_short[key1]) > 1): |
637 tier = "4.1" | 899 tier = "3.3" |
638 counter_tier41 += 1 | 900 counter_tier33 += 1 |
639 tier_dict[key1]["tier 4.1"] += 1 | 901 tier_dict[key1]["tier 3.3"] += 1 |
640 | 902 |
641 elif trimmed_three: | 903 elif (trimmed): |
642 tier = "4.2" | 904 tier = "4" |
643 counter_tier42 += 1 | 905 counter_tier4 += 1 |
644 tier_dict[key1]["tier 4.2"] += 1 | 906 tier_dict[key1]["tier 4"] += 1 |
645 | 907 |
646 elif contradictory: | 908 elif softclipped_mutation_allMates: |
647 tier = "5" | 909 tier = "5.1" |
648 counter_tier5 += 1 | 910 counter_tier51 += 1 |
649 tier_dict[key1]["tier 5"] += 1 | 911 tier_dict[key1]["tier 5.1"] += 1 |
650 else: | 912 |
913 elif softclipped_mutation_oneOfTwoMates: | |
914 tier = "5.2" | |
915 counter_tier52 += 1 | |
916 tier_dict[key1]["tier 5.2"] += 1 | |
917 | |
918 elif softclipped_mutation_oneOfTwoSSCS: | |
919 tier = "5.3" | |
920 counter_tier53 += 1 | |
921 tier_dict[key1]["tier 5.3"] += 1 | |
922 | |
923 elif softclipped_mutation_oneMate: | |
924 tier = "5.4" | |
925 counter_tier54 += 1 | |
926 tier_dict[key1]["tier 5.4"] += 1 | |
927 | |
928 elif softclipped_mutation_oneMateOneSSCS: | |
929 tier = "5.5" | |
930 counter_tier55 += 1 | |
931 tier_dict[key1]["tier 5.5"] += 1 | |
932 | |
933 elif (contradictory): | |
651 tier = "6" | 934 tier = "6" |
652 counter_tier6 += 1 | 935 counter_tier6 += 1 |
653 tier_dict[key1]["tier 6"] += 1 | 936 tier_dict[key1]["tier 6"] += 1 |
654 | 937 |
938 else: | |
939 tier = "7" | |
940 counter_tier7 += 1 | |
941 tier_dict[key1]["tier 7"] += 1 | |
942 | |
655 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) | 943 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
656 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) | 944 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
657 sample_tag = key2[:-5] | 945 sample_tag = key2[:-5] |
658 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | 946 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time |
659 # exclude identical tag from array2, to prevent comparison to itself | 947 # exclude identical tag from array2, to prevent comparison to itself |
660 same_tag = np.where(array2 == sample_tag) | 948 same_tag = np.where(array2 == sample_tag) |
661 index_array2 = np.arange(0, len(array2), 1) | 949 index_array2 = np.arange(0, len(array2), 1) |
680 half1_mate1 = array1_half2 | 968 half1_mate1 = array1_half2 |
681 half2_mate1 = array1_half | 969 half2_mate1 = array1_half |
682 half1_mate2 = array2_half2 | 970 half1_mate2 = array2_half2 |
683 half2_mate2 = array2_half | 971 half2_mate2 = array2_half |
684 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | 972 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" |
685 dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2]) | 973 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) |
686 min_index = np.where(dist == dist.min()) # get index of min HD | 974 min_index = np.where(dist == dist.min()) # get index of min HD |
687 # get all "b's" of the tag or all "a's" of the tag with minimum HD | 975 # get all "b's" of the tag or all "a's" of the tag with minimum HD |
688 min_tag_half2 = half2_mate2[min_index] | 976 min_tag_half2 = half2_mate2[min_index] |
689 min_tag_array2 = array2[min_index] # get whole tag with min HD | 977 min_tag_array2 = array2[min_index] # get whole tag with min HD |
690 min_value = dist.min() | 978 min_value = dist.min() |
691 # calculate HD of "b" to all "b's" or "a" to all "a's" | 979 # calculate HD of "b" to all "b's" or "a" to all "a's" |
692 dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e)) | 980 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) |
693 for e in min_tag_half2]) | 981 for e in min_tag_half2]) |
694 | 982 |
695 dist2 = dist_second_half.max() | 983 dist2 = dist_second_half.max() |
696 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | 984 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD |
697 max_tag = min_tag_array2[max_index] | 985 max_tag = min_tag_array2[max_index] |
698 | 986 |
699 # tags which have identical parts: | 987 # tags which have identical parts: |
700 if min_value == 0 or dist2 == 0: | 988 if min_value == 0 or dist2 == 0: |
701 min_tags_list_zeros.append(tag) | 989 min_tags_list_zeros.append(tag) |
702 chimera_tags.append(max_tag) | 990 chimera_tags.append(max_tag) |
703 | 991 # chimeric = True |
992 # else: | |
993 # chimeric = False | |
994 | |
995 # if mate_b is False: | |
996 # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric) | |
997 # else: | |
998 # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric) | |
704 i += 1 | 999 i += 1 |
705 chimera_tags = [x for x in chimera_tags if x != []] | 1000 chimera_tags = [x for x in chimera_tags if x != []] |
706 chimera_tags_new = [] | 1001 chimera_tags_new = [] |
707 for i in chimera_tags: | 1002 for i in chimera_tags: |
708 if len(i) > 1: | 1003 if len(i) > 1: |
731 read_pos2 = read_len_median2 = None | 1026 read_pos2 = read_len_median2 = None |
732 if (read_pos3 == -1): | 1027 if (read_pos3 == -1): |
733 read_pos3 = read_len_median3 = None | 1028 read_pos3 = read_len_median3 = None |
734 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 1029 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
735 ws1.write_row(row, 0, line) | 1030 ws1.write_row(row, 0, line) |
736 csv_writer.writerow(line) | |
737 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 1031 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
738 ws1.write_row(row + 1, 0, line) | 1032 ws1.write_row(row + 1, 0, line) |
739 csv_writer.writerow(line) | |
740 | 1033 |
741 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1034 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
742 {'type': 'formula', | 1035 {'type': 'formula', |
743 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1036 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
744 'format': format1, | 1037 'format': format1, |
745 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) | 1038 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
746 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1039 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
747 {'type': 'formula', | 1040 {'type': 'formula', |
748 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), | 1041 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), |
749 'format': format3, | 1042 'format': format3, |
750 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) | 1043 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
751 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1044 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
752 {'type': 'formula', | 1045 {'type': 'formula', |
753 'criteria': '=$B${}>="3"'.format(row + 1), | 1046 'criteria': '=$B${}>="3"'.format(row + 1), |
754 'format': format2, | 1047 'format': format2, |
755 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1)}) | 1048 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1049 | |
756 row += 3 | 1050 row += 3 |
757 | |
758 if chimera_correction: | 1051 if chimera_correction: |
759 chimeric_dcs_high_tiers = 0 | 1052 chimeric_dcs_high_tiers = 0 |
760 chimeric_dcs = 0 | 1053 chimeric_dcs = 0 |
761 for keys_chimera in chimeric_tag.keys(): | 1054 for keys_chimera in chimeric_tag.keys(): |
762 tiers = chimeric_tag[keys_chimera] | 1055 tiers = chimeric_tag[keys_chimera] |
765 if high_tiers == len(tiers): | 1058 if high_tiers == len(tiers): |
766 chimeric_dcs_high_tiers += high_tiers - 1 | 1059 chimeric_dcs_high_tiers += high_tiers - 1 |
767 else: | 1060 else: |
768 chimeric_dcs_high_tiers += high_tiers | 1061 chimeric_dcs_high_tiers += high_tiers |
769 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | 1062 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) |
770 #csv_data.close() | |
771 | |
772 # sheet 2 | 1063 # sheet 2 |
773 if chimera_correction: | 1064 if chimera_correction: |
774 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1065 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
775 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 1066 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
776 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1067 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
777 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5', 'AF 1.1-6') | 1068 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') |
778 else: | 1069 else: |
779 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1070 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
780 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | 1071 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', |
781 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1072 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
782 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5', 'AF 1.1-6') | 1073 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6') |
783 | 1074 |
784 ws2.write_row(0, 0, header_line2) | 1075 ws2.write_row(0, 0, header_line2) |
785 #ws2.conditional_format('J1', {'type': 'formula', 'criteria': 'containing', 'value': 'tier 1.1', 'format': format1, 'multi_range': 'J1:K1'}) | |
786 | |
787 row = 0 | 1076 row = 0 |
788 | 1077 |
789 for key1, value1 in sorted(tier_dict.items()): | 1078 for key1, value1 in sorted(tier_dict.items()): |
790 if key1 in pure_tags_dict_short.keys(): | 1079 if key1 in pure_tags_dict_short.keys(): |
791 i = np.where(np.array(['#'.join(str(i) for i in z) | 1080 i = np.where(np.array(['#'.join(str(i) for i in z) |
795 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) | 1084 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) |
796 ref_count = cvrg_dict[key1][0] | 1085 ref_count = cvrg_dict[key1][0] |
797 alt_count = cvrg_dict[key1][1] | 1086 alt_count = cvrg_dict[key1][1] |
798 cvrg = ref_count + alt_count | 1087 cvrg = ref_count + alt_count |
799 | 1088 |
800 var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) | 1089 var_id = '-'.join([chrom, str(int(pos)+1), ref, alt]) |
801 lst = [var_id, cvrg] | 1090 lst = [var_id, cvrg] |
802 used_tiers = [] | 1091 used_tiers = [] |
803 cum_af = [] | 1092 cum_af = [] |
804 for key2, value2 in sorted(value1.items()): | 1093 for key2, value2 in sorted(value1.items()): |
805 # calculate cummulative AF | 1094 # calculate cummulative AF |
806 used_tiers.append(value2) | 1095 used_tiers.append(value2) |
807 if len(used_tiers) > 1: | 1096 if len(used_tiers) > 1: |
808 cum = safe_div(sum(used_tiers), cvrg) | 1097 cum = safe_div(sum(used_tiers), cvrg) |
809 cum_af.append(cum) | 1098 cum_af.append(cum) |
1099 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place | |
1100 continue | |
810 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) | 1101 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) |
811 if chimera_correction: | 1102 if chimera_correction: |
812 chimeras_all = chimera_dict[key1][0] | 1103 chimeras_all = chimera_dict[key1][0] |
813 new_alt = sum(used_tiers) - chimeras_all | 1104 new_alt = sum(used_tiers) - chimeras_all |
814 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) | 1105 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers))) |
815 if fraction_chimeras is None: | 1106 if fraction_chimeras is None: |
816 fraction_chimeras = 0. | 1107 fraction_chimeras = 0. |
817 new_cvrg = cvrg * (1. - fraction_chimeras) | 1108 new_cvrg = cvrg * (1. - fraction_chimeras) |
818 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) | 1109 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) |
819 lst.extend([(cvrg - sum(used_tiers[-6:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-6:])))]) | 1110 lst.extend([(cvrg - sum(used_tiers[-11:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-11:])))]) |
820 if chimera_correction: | 1111 if chimera_correction: |
821 chimeras_all = chimera_dict[key1][1] | 1112 chimeras_all = chimera_dict[key1][1] |
822 new_alt = sum(used_tiers[0:6]) - chimeras_all | 1113 new_alt = sum(used_tiers[0:6]) - chimeras_all |
823 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:6]))) | 1114 fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:6]))) |
824 if fraction_chimeras is None: | 1115 if fraction_chimeras is None: |
825 fraction_chimeras = 0. | 1116 fraction_chimeras = 0. |
826 new_cvrg = (cvrg - sum(used_tiers[-6:])) * (1. - fraction_chimeras) | 1117 new_cvrg = (cvrg - sum(used_tiers[-11:])) * (1. - fraction_chimeras) |
827 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) | 1118 lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) |
828 lst.extend([alt_count, safe_div(alt_count, cvrg)]) | 1119 lst.extend([alt_count, safe_div(alt_count, cvrg)]) |
829 lst.extend(used_tiers) | 1120 lst.extend(used_tiers) |
830 lst.extend(cum_af) | 1121 lst.extend(cum_af) |
831 lst = tuple(lst) | 1122 lst = tuple(lst) |
832 ws2.write_row(row + 1, 0, lst) | 1123 ws2.write_row(row + 1, 0, lst) |
833 if chimera_correction: | 1124 if chimera_correction: |
834 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format12, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) | 1125 ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format12, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) |
835 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format32, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) | 1126 ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format32, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) |
836 ws2.conditional_format('V{}:AA{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format22, 'multi_range': 'V{}:AA{} V1:AA1'.format(row + 2, row + 2)}) | 1127 ws2.conditional_format('V{}:AF{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format22, 'multi_range': 'V{}:AF{} V1:AF1'.format(row + 2, row + 2)}) |
837 else: | 1128 else: |
838 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) | 1129 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format12, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) |
839 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) | 1130 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format32, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) |
840 ws2.conditional_format('P{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format22, 'multi_range': 'P{}:U{} P1:U1'.format(row + 2, row + 2)}) | 1131 ws2.conditional_format('P{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format22, 'multi_range': 'P{}:Z{} P1:Z1'.format(row + 2, row + 2)}) |
841 row += 1 | 1132 row += 1 |
842 | 1133 |
843 # sheet 3 | 1134 # sheet 3 |
844 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), | 1135 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), |
845 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), | 1136 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), |
846 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), | 1137 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 3.3", counter_tier33), ("tier 4", counter_tier), |
847 ("tier 4.2", counter_tier42), ("tier 5", counter_tier5), ("tier 6", counter_tier6)] | 1138 ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), |
1139 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6), ("tier 7", counter_tier7)] | |
848 | 1140 |
849 header = ("tier", "count") | 1141 header = ("tier", "count") |
850 ws3.write_row(0, 0, header) | 1142 ws3.write_row(0, 0, header) |
851 | 1143 |
852 for i in range(len(sheet3)): | 1144 for i in range(len(sheet3)): |
853 ws3.write_row(i + 1, 0, sheet3[i]) | 1145 ws3.write_row(i + 1, 0, sheet3[i]) |
854 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | 1146 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), |
855 {'type': 'formula', | 1147 {'type': 'formula', |
856 'criteria': '=OR($A${}="tier 1.1", $A${}="tier 1.2")'.format(i + 2, i + 2), | 1148 'criteria': '=OR($A${}="tier 1.1", $A${}="tier 1.2")'.format(i + 2, i + 2), |
857 'format': format13}) | 1149 'format': format1}) |
858 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | 1150 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), |
859 {'type': 'formula', | 1151 {'type': 'formula', |
860 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), | 1152 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), |
861 'format': format33}) | 1153 'format': format3}) |
862 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | 1154 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), |
863 {'type': 'formula', | 1155 {'type': 'formula', |
864 'criteria': '=$A${}>="3"'.format(i + 2), | 1156 'criteria': '=$A${}>="3"'.format(i + 2), |
865 'format': format23}) | 1157 'format': format2}) |
866 | 1158 |
867 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), | 1159 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), |
868 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), | 1160 ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), |
869 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), | 1161 ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), |
870 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), | 1162 ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), |
871 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), | 1163 ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), |
872 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), | 1164 ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), |
873 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), | 1165 ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), |
874 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), | 1166 ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), |
875 ("Tier 4.1", "variants at the beginning of the reads"), | 1167 ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), |
876 ("Tier 4.2", "variants at the end of the reads"), | 1168 ("Tier 5.1", "variant is close to softclipping in both mates"), |
877 ("Tier 5", "mates with contradictory information"), | 1169 ("Tier 5.2", "variant is close to softclipping in one of the mates"), |
1170 ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"), | |
1171 ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"), | |
1172 ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"), | |
878 ("Tier 6", "remaining variants")] | 1173 ("Tier 6", "remaining variants")] |
879 examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | 1174 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", |
880 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1175 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
881 "4081", "4098", "5", "10", "", ""), | 1176 "4081", "4098", "5", "10", "", ""), |
882 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | 1177 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, |
883 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, | 1178 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, |
884 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 1179 "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
885 [("chr5-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", | 1180 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", |
886 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", | 1181 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", |
887 "0", "4081", "4098", "5", "10", "", ""), | 1182 "0", "4081", "4098", "5", "10", "", ""), |
888 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", | 1183 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", |
889 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", | 1184 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", |
890 "7", "0", "0", "4081", "4098", "5", "10", "", "")], | 1185 "7", "0", "0", "4081", "4098", "5", "10", "", "")], |
891 [("chr5-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", | 1186 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", |
892 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 1187 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
893 "0", "0", "1", "6", "47170", "41149", "", ""), | 1188 "0", "0", "1", "6", "47170", "41149", "", ""), |
894 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", | 1189 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", |
895 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", | 1190 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", |
896 "0", "0", "1", "6", "47170", "41149", "", "")], | 1191 "0", "0", "1", "6", "47170", "41149", "", "")], |
897 [("chr5-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", | 1192 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", |
898 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1193 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
899 "4081", "4098", "5", "10", "", ""), | 1194 "4081", "4098", "5", "10", "", ""), |
900 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, | 1195 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, |
901 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", | 1196 "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", |
902 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | 1197 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], |
903 [("chr5-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", | 1198 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", |
904 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1199 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
905 "4081", "4098", "5", "10", "", ""), | 1200 "4081", "4098", "5", "10", "", ""), |
906 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", | 1201 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", |
907 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", | 1202 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", |
908 "4081", "4098", "5", "10", "", "")], | 1203 "4081", "4098", "5", "10", "", "")], |
909 [("chr5-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, | 1204 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, |
910 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", | 1205 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", |
911 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), | 1206 "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), |
912 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", | 1207 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", |
913 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", | 1208 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", |
914 "0", "0", "4081", "4098", "5", "10", "", "")], | 1209 "0", "0", "4081", "4098", "5", "10", "", "")], |
915 [("chr5-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", | 1210 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", |
916 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", | 1211 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", |
917 "4098", "5", "10", "", ""), | 1212 "4098", "5", "10", "", ""), |
918 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", | 1213 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", |
919 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", | 1214 "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", |
920 "4098", "5", "10", "", "")], | 1215 "4098", "5", "10", "", "")], |
921 [("chr5-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | 1216 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", |
922 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", | 1217 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", |
923 "0", "0", "3", "3", "47170", "41149", "", ""), | 1218 "0", "0", "3", "3", "47170", "41149", "", ""), |
924 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, | 1219 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, |
925 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", | 1220 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", |
926 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], | 1221 "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], |
927 [("chr5-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", | 1222 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", |
928 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 1223 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
929 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), | 1224 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), |
930 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", | 1225 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", |
931 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | 1226 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", |
932 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], | 1227 "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], |
933 [("chr5-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269", | 1228 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", |
934 "5", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), | 1229 "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), |
935 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1230 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
936 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1231 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
937 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1232 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
938 [("chr5-13983-G-C", "4.2", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "20", "270", "255", "276", "269", | 1233 [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", |
939 "5", "6", "5", "0", "0", "0", "5", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "6", "1", "1", "5348", "5350", "", ""), | |
940 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | |
941 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | |
942 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | |
943 [("chr5-13963-T-C", "5", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", | |
944 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", | 1234 "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", |
945 "0", "0", "0", "1", "1", "5348", "5350", "", ""), | 1235 "0", "0", "0", "1", "1", "5348", "5350", "", ""), |
946 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", | 1236 ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", |
947 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", | 1237 "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", |
948 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], | 1238 "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], |
949 [("chr5-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | 1239 [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], [("" * 34), ("" * 34)], |
1240 [("Chr5:5-20000-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", | |
950 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", | 1241 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", |
951 "0", "1", "1", "5348", "5350", "", ""), | 1242 "0", "1", "1", "5348", "5350", "", ""), |
952 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | 1243 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, |
953 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | 1244 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", |
954 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | 1245 "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")]] |
955 | 1246 |
956 start_row = 15 | 1247 start_row = 20 |
957 ws3.write(start_row, 0, "Description of tiers with examples") | 1248 ws3.write(start_row, 0, "Description of tiers with examples") |
958 ws3.write_row(start_row + 1, 0, header_line) | 1249 ws3.write_row(start_row + 1, 0, header_line) |
959 row = 0 | 1250 row = 0 |
960 for i in range(len(description_tiers)): | 1251 for i in range(len(description_tiers)): |
961 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) | 1252 ws3.write_row(start_row + 2 + row + i + 1, 0, description_tiers[i]) |
962 ex = examples_tiers[i] | 1253 ex = examples_tiers[i] |
963 for k in range(len(ex)): | 1254 for k in range(len(ex)): |
964 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) | 1255 ws3.write_row(start_row + 2 + row + i + k + 2, 0, ex[k]) |
965 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format13, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) | 1256 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), 'format': format13, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
966 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), | 1257 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
967 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), | 1258 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 2), |
968 'format': format33, | 1259 'format': format33, |
969 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) | 1260 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
970 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), | 1261 ws3.conditional_format('L{}:M{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3), |
971 {'type': 'formula', | 1262 {'type': 'formula', |
972 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), | 1263 'criteria': '=$B${}>="3"'.format(start_row + 2 + row + i + k + 2), |
973 'format': format23, | 1264 'format': format23, |
974 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) | 1265 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3)}) |
975 row += 3 | 1266 row += 3 |
976 workbook.close() | 1267 workbook.close() |
977 workbook2.close() | 1268 workbook2.close() |
978 workbook3.close() | 1269 workbook3.close() |
979 csv_data.close() | |
980 | |
981 | 1270 |
982 | 1271 |
983 if __name__ == '__main__': | 1272 if __name__ == '__main__': |
984 sys.exit(read2mut(sys.argv)) | 1273 sys.exit(read2mut(sys.argv)) |
1274 |