Mercurial > repos > iuc > variant_analyzer
comparison read2mut.py @ 0:8d29173d49a9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
| author | iuc |
|---|---|
| date | Wed, 20 Nov 2019 17:47:35 -0500 |
| parents | |
| children | 3556001ff2db |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8d29173d49a9 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """read2mut.py | |
| 4 | |
| 5 Author -- Gundula Povysil | |
| 6 Contact -- povysil@bioinf.jku.at | |
| 7 | |
| 8 Looks for reads with mutation at known | |
| 9 positions and calculates frequencies and stats. | |
| 10 | |
| 11 ======= ========== ================= ================================ | |
| 12 Version Date Author Description | |
| 13 0.2.1 2019-10-27 Gundula Povysil - | |
| 14 ======= ========== ================= ================================ | |
| 15 | |
| 16 | |
| 17 USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam | |
| 18 --inputJson tag_count_dict.json --sscsJson SSCS_counts.json | |
| 19 --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 | |
| 20 | |
| 21 """ | |
| 22 | |
| 23 from __future__ import division | |
| 24 | |
| 25 import argparse | |
| 26 import itertools | |
| 27 import json | |
| 28 import operator | |
| 29 import os | |
| 30 import re | |
| 31 import sys | |
| 32 | |
| 33 import numpy as np | |
| 34 import pysam | |
| 35 import xlsxwriter | |
| 36 | |
| 37 | |
| 38 def make_argparser(): | |
| 39 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations, a BAM file and JSON files as input and prints stats about variants to a user specified output file.') | |
| 40 parser.add_argument('--mutFile', | |
| 41 help='TABULAR file with DCS mutations.') | |
| 42 parser.add_argument('--bamFile', | |
| 43 help='BAM file with aligned raw reads of selected tags (FASTQ created by mut2read.py - trimming with Trimmomatic - alignment with bwa).') | |
| 44 parser.add_argument('--inputJson', | |
| 45 help='JSON file with data collected by mut2read.py.') | |
| 46 parser.add_argument('--sscsJson', | |
| 47 help='JSON file with SSCS counts collected by mut2sscs.py.') | |
| 48 parser.add_argument('--outputFile', | |
| 49 help='Output xlsx file of mutation details.') | |
| 50 parser.add_argument('--thresh', type=int, default=0, | |
| 51 help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') | |
| 52 parser.add_argument('--phred', type=int, default=20, | |
| 53 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | |
| 54 parser.add_argument('--trim', type=int, default=10, | |
| 55 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') | |
| 56 return parser | |
| 57 | |
| 58 | |
| 59 def safe_div(x, y): | |
| 60 if y == 0: | |
| 61 return None | |
| 62 return x / y | |
| 63 | |
| 64 | |
| 65 def read2mut(argv): | |
| 66 parser = make_argparser() | |
| 67 args = parser.parse_args(argv[1:]) | |
| 68 file1 = args.mutFile | |
| 69 file2 = args.bamFile | |
| 70 json_file = args.inputJson | |
| 71 sscs_json = args.sscsJson | |
| 72 outfile = args.outputFile | |
| 73 thresh = args.thresh | |
| 74 phred_score = args.phred | |
| 75 trim = args.trim | |
| 76 | |
| 77 if os.path.isfile(file1) is False: | |
| 78 sys.exit("Error: Could not find '{}'".format(file1)) | |
| 79 if os.path.isfile(file2) is False: | |
| 80 sys.exit("Error: Could not find '{}'".format(file2)) | |
| 81 if os.path.isfile(json_file) is False: | |
| 82 sys.exit("Error: Could not find '{}'".format(json_file)) | |
| 83 if thresh < 0: | |
| 84 sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) | |
| 85 if phred_score < 0: | |
| 86 sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) | |
| 87 if trim < 0: | |
| 88 sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) | |
| 89 | |
| 90 # 1. read mut file | |
| 91 with open(file1, 'r') as mut: | |
| 92 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') | |
| 93 | |
| 94 # 2. load dicts | |
| 95 with open(json_file, "r") as f: | |
| 96 (tag_dict, cvrg_dict) = json.load(f) | |
| 97 | |
| 98 with open(sscs_json, "r") as f: | |
| 99 (mut_pos_dict, ref_pos_dict) = json.load(f) | |
| 100 | |
| 101 # 3. read bam file | |
| 102 # pysam.index(file2) | |
| 103 bam = pysam.AlignmentFile(file2, "rb") | |
| 104 | |
| 105 # 4. create mut_dict | |
| 106 mut_dict = {} | |
| 107 mut_read_pos_dict = {} | |
| 108 mut_read_dict = {} | |
| 109 reads_dict = {} | |
| 110 if mut_array.shape == (13, ): | |
| 111 mut_array = mut_array.reshape((1, len(mut_array))) | |
| 112 | |
| 113 for m in range(0, len(mut_array[:, 0])): | |
| 114 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) | |
| 115 # for m in range(0, 5): | |
| 116 chrom = mut_array[m, 1] | |
| 117 stop_pos = mut_array[m, 2].astype(int) | |
| 118 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | |
| 119 ref = mut_array[m, 9] | |
| 120 alt = mut_array[m, 10] | |
| 121 mut_dict[chrom_stop_pos] = {} | |
| 122 mut_read_pos_dict[chrom_stop_pos] = {} | |
| 123 reads_dict[chrom_stop_pos] = {} | |
| 124 | |
| 125 for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): | |
| 126 if pileupcolumn.reference_pos == stop_pos - 1: | |
| 127 count_alt = 0 | |
| 128 count_ref = 0 | |
| 129 count_indel = 0 | |
| 130 count_n = 0 | |
| 131 count_other = 0 | |
| 132 count_lowq = 0 | |
| 133 n = 0 | |
| 134 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | |
| 135 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | |
| 136 for pileupread in pileupcolumn.pileups: | |
| 137 n += 1 | |
| 138 if not pileupread.is_del and not pileupread.is_refskip: | |
| 139 tag = pileupread.alignment.query_name | |
| 140 nuc = pileupread.alignment.query_sequence[pileupread.query_position] | |
| 141 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 | |
| 142 if phred < phred_score: | |
| 143 nuc = "lowQ" | |
| 144 if tag not in mut_dict[chrom_stop_pos]: | |
| 145 mut_dict[chrom_stop_pos][tag] = {} | |
| 146 if nuc in mut_dict[chrom_stop_pos][tag]: | |
| 147 mut_dict[chrom_stop_pos][tag][nuc] += 1 | |
| 148 else: | |
| 149 mut_dict[chrom_stop_pos][tag][nuc] = 1 | |
| 150 if tag not in mut_read_pos_dict[chrom_stop_pos]: | |
| 151 mut_read_pos_dict[chrom_stop_pos][tag] = np.array(pileupread.query_position) + 1 | |
| 152 reads_dict[chrom_stop_pos][tag] = len(pileupread.alignment.query_sequence) | |
| 153 else: | |
| 154 mut_read_pos_dict[chrom_stop_pos][tag] = np.append( | |
| 155 mut_read_pos_dict[chrom_stop_pos][tag], pileupread.query_position + 1) | |
| 156 reads_dict[chrom_stop_pos][tag] = np.append( | |
| 157 reads_dict[chrom_stop_pos][tag], len(pileupread.alignment.query_sequence)) | |
| 158 | |
| 159 if nuc == alt: | |
| 160 count_alt += 1 | |
| 161 if tag not in mut_read_dict: | |
| 162 mut_read_dict[tag] = {} | |
| 163 mut_read_dict[tag][chrom_stop_pos] = alt | |
| 164 else: | |
| 165 mut_read_dict[tag][chrom_stop_pos] = alt | |
| 166 elif nuc == ref: | |
| 167 count_ref += 1 | |
| 168 elif nuc == "N": | |
| 169 count_n += 1 | |
| 170 elif nuc == "lowQ": | |
| 171 count_lowq += 1 | |
| 172 else: | |
| 173 count_other += 1 | |
| 174 else: | |
| 175 count_indel += 1 | |
| 176 | |
| 177 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | |
| 178 | |
| 179 for read in bam.fetch(until_eof=True): | |
| 180 if read.is_unmapped: | |
| 181 pure_tag = read.query_name[:-5] | |
| 182 nuc = "na" | |
| 183 for key in tag_dict[pure_tag].keys(): | |
| 184 if key not in mut_dict: | |
| 185 mut_dict[key] = {} | |
| 186 if read.query_name not in mut_dict[key]: | |
| 187 mut_dict[key][read.query_name] = {} | |
| 188 if nuc in mut_dict[key][read.query_name]: | |
| 189 mut_dict[key][read.query_name][nuc] += 1 | |
| 190 else: | |
| 191 mut_dict[key][read.query_name][nuc] = 1 | |
| 192 bam.close() | |
| 193 | |
| 194 # 5. create pure_tags_dict | |
| 195 pure_tags_dict = {} | |
| 196 for key1, value1 in sorted(mut_dict.items()): | |
| 197 i = np.where(np.array(['#'.join(str(i) for i in z) | |
| 198 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | |
| 199 ref = mut_array[i, 9] | |
| 200 alt = mut_array[i, 10] | |
| 201 pure_tags_dict[key1] = {} | |
| 202 for key2, value2 in sorted(value1.items()): | |
| 203 for key3, value3 in value2.items(): | |
| 204 pure_tag = key2[:-5] | |
| 205 if key3 == alt: | |
| 206 if pure_tag in pure_tags_dict[key1]: | |
| 207 pure_tags_dict[key1][pure_tag] += 1 | |
| 208 else: | |
| 209 pure_tags_dict[key1][pure_tag] = 1 | |
| 210 | |
| 211 # 6. create pure_tags_dict_short with thresh | |
| 212 if thresh > 0: | |
| 213 pure_tags_dict_short = {} | |
| 214 for key, value in sorted(pure_tags_dict.items()): | |
| 215 if len(value) < thresh: | |
| 216 pure_tags_dict_short[key] = value | |
| 217 else: | |
| 218 pure_tags_dict_short = pure_tags_dict | |
| 219 | |
| 220 whole_array = [] | |
| 221 for k in pure_tags_dict.values(): | |
| 222 if len(k) != 0: | |
| 223 keys = k.keys() | |
| 224 if len(keys) > 1: | |
| 225 for k1 in keys: | |
| 226 whole_array.append(k1) | |
| 227 else: | |
| 228 whole_array.append(keys[0]) | |
| 229 | |
| 230 # 7. output summary with threshold | |
| 231 workbook = xlsxwriter.Workbook(outfile) | |
| 232 ws1 = workbook.add_worksheet("Results") | |
| 233 ws2 = workbook.add_worksheet("Allele frequencies") | |
| 234 ws3 = workbook.add_worksheet("Tiers") | |
| 235 | |
| 236 format1 = workbook.add_format({'bg_color': '#BCF5A9'}) # green | |
| 237 format2 = workbook.add_format({'bg_color': '#FFC7CE'}) # red | |
| 238 format3 = workbook.add_format({'bg_color': '#FACC2E'}) # yellow | |
| 239 | |
| 240 header_line = ('variant ID', 'tier', 'tag', 'mate', 'read pos.ab', 'read pos.ba', 'read median length.ab', | |
| 241 'read median length.ba', 'DCS median length', | |
| 242 'FS.ab', 'FS.ba', 'FSqc.ab', 'FSqc.ba', 'ref.ab', 'ref.ba', 'alt.ab', 'alt.ba', | |
| 243 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | |
| 244 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', | |
| 245 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | |
| 246 'other mut', 'chimeric tag') | |
| 247 ws1.write_row(0, 0, header_line) | |
| 248 | |
| 249 counter_tier11 = 0 | |
| 250 counter_tier12 = 0 | |
| 251 counter_tier21 = 0 | |
| 252 counter_tier22 = 0 | |
| 253 counter_tier23 = 0 | |
| 254 counter_tier24 = 0 | |
| 255 counter_tier31 = 0 | |
| 256 counter_tier32 = 0 | |
| 257 counter_tier41 = 0 | |
| 258 counter_tier42 = 0 | |
| 259 | |
| 260 row = 1 | |
| 261 tier_dict = {} | |
| 262 for key1, value1 in sorted(mut_dict.items()): | |
| 263 counts_mut = 0 | |
| 264 if key1 in pure_tags_dict_short.keys(): | |
| 265 i = np.where(np.array(['#'.join(str(i) for i in z) | |
| 266 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | |
| 267 ref = mut_array[i, 9] | |
| 268 alt = mut_array[i, 10] | |
| 269 dcs_median = cvrg_dict[key1][2] | |
| 270 | |
| 271 tier_dict[key1] = {} | |
| 272 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0)] | |
| 273 for k, v in values_tier_dict: | |
| 274 tier_dict[key1][k] = v | |
| 275 | |
| 276 used_keys = [] | |
| 277 if 'ab' in mut_pos_dict[key1].keys(): | |
| 278 sscs_mut_ab = mut_pos_dict[key1]['ab'] | |
| 279 else: | |
| 280 sscs_mut_ab = 0 | |
| 281 if 'ba' in mut_pos_dict[key1].keys(): | |
| 282 sscs_mut_ba = mut_pos_dict[key1]['ba'] | |
| 283 else: | |
| 284 sscs_mut_ba = 0 | |
| 285 if 'ab' in ref_pos_dict[key1].keys(): | |
| 286 sscs_ref_ab = ref_pos_dict[key1]['ab'] | |
| 287 else: | |
| 288 sscs_ref_ab = 0 | |
| 289 if 'ba' in ref_pos_dict[key1].keys(): | |
| 290 sscs_ref_ba = ref_pos_dict[key1]['ba'] | |
| 291 else: | |
| 292 sscs_ref_ba = 0 | |
| 293 for key2, value2 in sorted(value1.items()): | |
| 294 add_mut14 = "" | |
| 295 add_mut23 = "" | |
| 296 if (key2[:-5] in pure_tags_dict_short[key1].keys()) and (key2[:-5] not in used_keys) and (key1 in tag_dict[key2[:-5]].keys()): | |
| 297 if key2[:-5] + '.ab.1' in mut_dict[key1].keys(): | |
| 298 total1 = sum(mut_dict[key1][key2[:-5] + '.ab.1'].values()) | |
| 299 if 'na' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | |
| 300 na1 = mut_dict[key1][key2[:-5] + '.ab.1']['na'] | |
| 301 # na1f = na1/total1 | |
| 302 else: | |
| 303 # na1 = na1f = 0 | |
| 304 na1 = 0 | |
| 305 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | |
| 306 lowq1 = mut_dict[key1][key2[:-5] + '.ab.1']['lowQ'] | |
| 307 # lowq1f = lowq1 / total1 | |
| 308 else: | |
| 309 # lowq1 = lowq1f = 0 | |
| 310 lowq1 = 0 | |
| 311 if ref in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | |
| 312 ref1 = mut_dict[key1][key2[:-5] + '.ab.1'][ref] | |
| 313 ref1f = ref1 / (total1 - na1 - lowq1) | |
| 314 else: | |
| 315 ref1 = ref1f = 0 | |
| 316 if alt in mut_dict[key1][key2[:-5] + '.ab.1'].keys(): | |
| 317 alt1 = mut_dict[key1][key2[:-5] + '.ab.1'][alt] | |
| 318 alt1f = alt1 / (total1 - na1 - lowq1) | |
| 319 else: | |
| 320 alt1 = alt1f = 0 | |
| 321 total1new = total1 - na1 - lowq1 | |
| 322 if (key2[:-5] + '.ab.1') in mut_read_dict.keys(): | |
| 323 k1 = mut_read_dict[(key2[:-5] + '.ab.1')].keys() | |
| 324 add_mut1 = len(k1) | |
| 325 if add_mut1 > 1: | |
| 326 for k, v in mut_read_dict[(key2[:-5] + '.ab.1')].items(): | |
| 327 if k != key1: | |
| 328 if len(add_mut14) == 0: | |
| 329 add_mut14 = str(k) + "_" + v | |
| 330 else: | |
| 331 add_mut14 = add_mut14 + ", " + str(k) + "_" + v | |
| 332 else: | |
| 333 k1 = [] | |
| 334 else: | |
| 335 total1 = total1new = na1 = lowq1 = 0 | |
| 336 ref1 = alt1 = ref1f = alt1f = 0 | |
| 337 | |
| 338 if key2[:-5] + '.ab.2' in mut_dict[key1].keys(): | |
| 339 total2 = sum(mut_dict[key1][key2[:-5] + '.ab.2'].values()) | |
| 340 if 'na' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | |
| 341 na2 = mut_dict[key1][key2[:-5] + '.ab.2']['na'] | |
| 342 # na2f = na2 / total2 | |
| 343 else: | |
| 344 # na2 = na2f = 0 | |
| 345 na2 = 0 | |
| 346 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | |
| 347 lowq2 = mut_dict[key1][key2[:-5] + '.ab.2']['lowQ'] | |
| 348 # lowq2f = lowq2 / total2 | |
| 349 else: | |
| 350 # lowq2 = lowq2f = 0 | |
| 351 lowq2 = 0 | |
| 352 if ref in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | |
| 353 ref2 = mut_dict[key1][key2[:-5] + '.ab.2'][ref] | |
| 354 ref2f = ref2 / (total2 - na2 - lowq2) | |
| 355 else: | |
| 356 ref2 = ref2f = 0 | |
| 357 if alt in mut_dict[key1][key2[:-5] + '.ab.2'].keys(): | |
| 358 alt2 = mut_dict[key1][key2[:-5] + '.ab.2'][alt] | |
| 359 alt2f = alt2 / (total2 - na2 - lowq2) | |
| 360 else: | |
| 361 alt2 = alt2f = 0 | |
| 362 total2new = total2 - na2 - lowq2 | |
| 363 if (key2[:-5] + '.ab.2') in mut_read_dict.keys(): | |
| 364 k2 = mut_read_dict[(key2[:-5] + '.ab.2')].keys() | |
| 365 add_mut2 = len(k2) | |
| 366 if add_mut2 > 1: | |
| 367 for k, v in mut_read_dict[(key2[:-5] + '.ab.2')].items(): | |
| 368 if k != key1: | |
| 369 if len(add_mut23) == 0: | |
| 370 add_mut23 = str(k) + "_" + v | |
| 371 else: | |
| 372 add_mut23 = add_mut23 + ", " + str(k) + "_" + v | |
| 373 else: | |
| 374 k2 = [] | |
| 375 else: | |
| 376 total2 = total2new = na2 = lowq2 = 0 | |
| 377 ref2 = alt2 = ref2f = alt2f = 0 | |
| 378 | |
| 379 if key2[:-5] + '.ba.1' in mut_dict[key1].keys(): | |
| 380 total3 = sum(mut_dict[key1][key2[:-5] + '.ba.1'].values()) | |
| 381 if 'na' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | |
| 382 na3 = mut_dict[key1][key2[:-5] + '.ba.1']['na'] | |
| 383 # na3f = na3 / total3 | |
| 384 else: | |
| 385 # na3 = na3f = 0 | |
| 386 na3 = 0 | |
| 387 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | |
| 388 lowq3 = mut_dict[key1][key2[:-5] + '.ba.1']['lowQ'] | |
| 389 # lowq3f = lowq3 / total3 | |
| 390 else: | |
| 391 # lowq3 = lowq3f = 0 | |
| 392 lowq3 = 0 | |
| 393 if ref in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | |
| 394 ref3 = mut_dict[key1][key2[:-5] + '.ba.1'][ref] | |
| 395 ref3f = ref3 / (total3 - na3 - lowq3) | |
| 396 else: | |
| 397 ref3 = ref3f = 0 | |
| 398 if alt in mut_dict[key1][key2[:-5] + '.ba.1'].keys(): | |
| 399 alt3 = mut_dict[key1][key2[:-5] + '.ba.1'][alt] | |
| 400 alt3f = alt3 / (total3 - na3 - lowq3) | |
| 401 else: | |
| 402 alt3 = alt3f = 0 | |
| 403 total3new = total3 - na3 - lowq3 | |
| 404 if (key2[:-5] + '.ba.1') in mut_read_dict.keys(): | |
| 405 add_mut3 = len(mut_read_dict[(key2[:-5] + '.ba.1')].keys()) | |
| 406 if add_mut3 > 1: | |
| 407 for k, v in mut_read_dict[(key2[:-5] + '.ba.1')].items(): | |
| 408 if k != key1 and k not in k2: | |
| 409 if len(add_mut23) == 0: | |
| 410 add_mut23 = str(k) + "_" + v | |
| 411 else: | |
| 412 add_mut23 = add_mut23 + ", " + str(k) + "_" + v | |
| 413 else: | |
| 414 total3 = total3new = na3 = lowq3 = 0 | |
| 415 ref3 = alt3 = ref3f = alt3f = 0 | |
| 416 | |
| 417 if key2[:-5] + '.ba.2' in mut_dict[key1].keys(): | |
| 418 total4 = sum(mut_dict[key1][key2[:-5] + '.ba.2'].values()) | |
| 419 if 'na' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | |
| 420 na4 = mut_dict[key1][key2[:-5] + '.ba.2']['na'] | |
| 421 # na4f = na4 / total4 | |
| 422 else: | |
| 423 # na4 = na4f = 0 | |
| 424 na4 = 0 | |
| 425 if 'lowQ' in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | |
| 426 lowq4 = mut_dict[key1][key2[:-5] + '.ba.2']['lowQ'] | |
| 427 # lowq4f = lowq4 / total4 | |
| 428 else: | |
| 429 # lowq4 = lowq4f = 0 | |
| 430 lowq4 = 0 | |
| 431 if ref in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | |
| 432 ref4 = mut_dict[key1][key2[:-5] + '.ba.2'][ref] | |
| 433 ref4f = ref4 / (total4 - na4 - lowq4) | |
| 434 else: | |
| 435 ref4 = ref4f = 0 | |
| 436 if alt in mut_dict[key1][key2[:-5] + '.ba.2'].keys(): | |
| 437 alt4 = mut_dict[key1][key2[:-5] + '.ba.2'][alt] | |
| 438 alt4f = alt4 / (total4 - na4 - lowq4) | |
| 439 else: | |
| 440 alt4 = alt4f = 0 | |
| 441 total4new = total4 - na4 - lowq4 | |
| 442 if (key2[:-5] + '.ba.2') in mut_read_dict.keys(): | |
| 443 add_mut4 = len(mut_read_dict[(key2[:-5] + '.ba.2')].keys()) | |
| 444 if add_mut4 > 1: | |
| 445 for k, v in mut_read_dict[(key2[:-5] + '.ba.2')].items(): | |
| 446 if k != key1 and k not in k1: | |
| 447 if len(add_mut14) == 0: | |
| 448 add_mut14 = str(k) + "_" + v | |
| 449 else: | |
| 450 add_mut14 = add_mut14 + ", " + str(k) + "_" + v | |
| 451 else: | |
| 452 total4 = total4new = na4 = lowq4 = 0 | |
| 453 ref4 = alt4 = ref4f = alt4f = 0 | |
| 454 | |
| 455 read_pos1 = read_pos2 = read_pos3 = read_pos4 = -1 | |
| 456 read_len_median1 = read_len_median2 = read_len_median3 = read_len_median4 = 0 | |
| 457 | |
| 458 if key2[:-5] + '.ab.1' in mut_read_pos_dict[key1].keys(): | |
| 459 read_pos1 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.1']) | |
| 460 read_len_median1 = np.median(reads_dict[key1][key2[:-5] + '.ab.1']) | |
| 461 if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): | |
| 462 read_pos2 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ab.2']) | |
| 463 read_len_median2 = np.median(reads_dict[key1][key2[:-5] + '.ab.2']) | |
| 464 if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): | |
| 465 read_pos3 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.1']) | |
| 466 read_len_median3 = np.median(reads_dict[key1][key2[:-5] + '.ba.1']) | |
| 467 if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): | |
| 468 read_pos4 = np.median(mut_read_pos_dict[key1][key2[:-5] + '.ba.2']) | |
| 469 read_len_median4 = np.median(reads_dict[key1][key2[:-5] + '.ba.2']) | |
| 470 | |
| 471 used_keys.append(key2[:-5]) | |
| 472 counts_mut += 1 | |
| 473 if (alt1f + alt2f + alt3f + alt4f) > 0.5: | |
| 474 if total1new == 0: | |
| 475 ref1f = alt1f = None | |
| 476 alt1ff = -1 | |
| 477 else: | |
| 478 alt1ff = alt1f | |
| 479 if total2new == 0: | |
| 480 ref2f = alt2f = None | |
| 481 alt2ff = -1 | |
| 482 else: | |
| 483 alt2ff = alt2f | |
| 484 if total3new == 0: | |
| 485 ref3f = alt3f = None | |
| 486 alt3ff = -1 | |
| 487 else: | |
| 488 alt3ff = alt3f | |
| 489 if total4new == 0: | |
| 490 ref4f = alt4f = None | |
| 491 alt4ff = -1 | |
| 492 else: | |
| 493 alt4ff = alt4f | |
| 494 | |
| 495 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4) | |
| 496 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3) | |
| 497 trimmed = False | |
| 498 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | |
| 499 total1new = 0 | |
| 500 alt1ff = 0 | |
| 501 trimmed = True | |
| 502 | |
| 503 if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): | |
| 504 total4new = 0 | |
| 505 alt4ff = 0 | |
| 506 trimmed = True | |
| 507 | |
| 508 if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): | |
| 509 total2new = 0 | |
| 510 alt2ff = 0 | |
| 511 trimmed = True | |
| 512 | |
| 513 if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): | |
| 514 total3new = 0 | |
| 515 alt3ff = 0 | |
| 516 trimmed = True | |
| 517 | |
| 518 chrom, pos = re.split(r'\#', key1) | |
| 519 # assign tiers | |
| 520 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & | |
| 521 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
| 522 (all(int(ij) >= 3 for ij in [total2new, total3new]) & | |
| 523 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
| 524 tier = "1.1" | |
| 525 counter_tier11 += 1 | |
| 526 tier_dict[key1]["tier 1.1"] += 1 | |
| 527 | |
| 528 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & | |
| 529 any(int(ij) >= 3 for ij in [total1new, total4new]) & | |
| 530 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
| 531 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
| 532 tier = "1.2" | |
| 533 counter_tier12 += 1 | |
| 534 tier_dict[key1]["tier 1.2"] += 1 | |
| 535 | |
| 536 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | |
| 537 any(int(ij) >= 3 for ij in [total1new, total4new]) & | |
| 538 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
| 539 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
| 540 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
| 541 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
| 542 tier = "2.1" | |
| 543 counter_tier21 += 1 | |
| 544 tier_dict[key1]["tier 2.1"] += 1 | |
| 545 | |
| 546 elif (all(int(ij) >= 1 for ij in [total1new, total2new, total3new, total4new]) & | |
| 547 all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): | |
| 548 tier = "2.2" | |
| 549 counter_tier22 += 1 | |
| 550 tier_dict[key1]["tier 2.2"] += 1 | |
| 551 | |
| 552 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | |
| 553 any(int(ij) >= 3 for ij in [total2new, total3new]) & | |
| 554 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]) & | |
| 555 any(float(ij) >= 0.75 for ij in [alt2ff, alt3ff])) | | |
| 556 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
| 557 any(int(ij) >= 3 for ij in [total1new, total4new]) & | |
| 558 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]) & | |
| 559 any(float(ij) >= 0.75 for ij in [alt1ff, alt4ff]))): | |
| 560 tier = "2.3" | |
| 561 counter_tier23 += 1 | |
| 562 tier_dict[key1]["tier 2.3"] += 1 | |
| 563 | |
| 564 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | |
| 565 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | |
| 566 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
| 567 all(float(ij) >= 0.75 for ij in [alt2ff, alt3ff]))): | |
| 568 tier = "2.4" | |
| 569 counter_tier24 += 1 | |
| 570 tier_dict[key1]["tier 2.4"] += 1 | |
| 571 | |
| 572 elif ((len(pure_tags_dict_short[key1]) > 1) & | |
| 573 (all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) | | |
| 574 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
| 575 tier = "3.1" | |
| 576 counter_tier31 += 1 | |
| 577 tier_dict[key1]["tier 3.1"] += 1 | |
| 578 | |
| 579 elif ((all(int(ij) >= 1 for ij in [total1new, total4new]) & | |
| 580 all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff])) | | |
| 581 (all(int(ij) >= 1 for ij in [total2new, total3new]) & | |
| 582 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | |
| 583 tier = "3.2" | |
| 584 counter_tier32 += 1 | |
| 585 tier_dict[key1]["tier 3.2"] += 1 | |
| 586 | |
| 587 elif (trimmed): | |
| 588 tier = "4.1" | |
| 589 counter_tier41 += 1 | |
| 590 tier_dict[key1]["tier 4.1"] += 1 | |
| 591 | |
| 592 else: | |
| 593 tier = "4.2" | |
| 594 counter_tier42 += 1 | |
| 595 tier_dict[key1]["tier 4.2"] += 1 | |
| 596 | |
| 597 var_id = '-'.join([chrom, pos, ref, alt]) | |
| 598 sample_tag = key2[:-5] | |
| 599 array2 = np.unique(whole_array) # remove duplicate sequences to decrease running time | |
| 600 # exclude identical tag from array2, to prevent comparison to itself | |
| 601 same_tag = np.where(array2 == sample_tag) | |
| 602 index_array2 = np.arange(0, len(array2), 1) | |
| 603 index_withoutSame = np.delete(index_array2, same_tag) # delete identical tag from the data | |
| 604 array2 = array2[index_withoutSame] | |
| 605 if len(array2) != 0: # only perform chimera analysis if there is more than 1 variant | |
| 606 array1_half = sample_tag[0:int(len(sample_tag) / 2)] # mate1 part1 | |
| 607 array1_half2 = sample_tag[int(len(sample_tag) / 2):int(len(sample_tag))] # mate1 part 2 | |
| 608 array2_half = np.array([ii[0:int(len(ii) / 2)] for ii in array2]) # mate2 part1 | |
| 609 array2_half2 = np.array([ii[int(len(ii) / 2):int(len(ii))] for ii in array2]) # mate2 part2 | |
| 610 | |
| 611 min_tags_list_zeros = [] | |
| 612 chimera_tags = [] | |
| 613 for mate_b in [False, True]: | |
| 614 i = 0 # counter, only used to see how many HDs of tags were already calculated | |
| 615 if mate_b is False: # HD calculation for all a's | |
| 616 half1_mate1 = array1_half | |
| 617 half2_mate1 = array1_half2 | |
| 618 half1_mate2 = array2_half | |
| 619 half2_mate2 = array2_half2 | |
| 620 elif mate_b is True: # HD calculation for all b's | |
| 621 half1_mate1 = array1_half2 | |
| 622 half2_mate1 = array1_half | |
| 623 half1_mate2 = array2_half2 | |
| 624 half2_mate2 = array2_half | |
| 625 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" | |
| 626 dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) | |
| 627 min_index = np.where(dist == dist.min()) # get index of min HD | |
| 628 # get all "b's" of the tag or all "a's" of the tag with minimum HD | |
| 629 min_tag_half2 = half2_mate2[min_index] | |
| 630 min_tag_array2 = array2[min_index] # get whole tag with min HD | |
| 631 min_value = dist.min() | |
| 632 # calculate HD of "b" to all "b's" or "a" to all "a's" | |
| 633 dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) | |
| 634 for e in min_tag_half2]) | |
| 635 | |
| 636 dist2 = dist_second_half.max() | |
| 637 max_index = np.where(dist_second_half == dist_second_half.max())[0] # get index of max HD | |
| 638 max_tag = min_tag_array2[max_index] | |
| 639 | |
| 640 # tags which have identical parts: | |
| 641 if min_value == 0 or dist2 == 0: | |
| 642 min_tags_list_zeros.append(tag) | |
| 643 chimera_tags.append(max_tag) | |
| 644 # chimeric = True | |
| 645 # else: | |
| 646 # chimeric = False | |
| 647 | |
| 648 # if mate_b is False: | |
| 649 # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric) | |
| 650 # else: | |
| 651 # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric) | |
| 652 i += 1 | |
| 653 chimera_tags = [x for x in chimera_tags if x != []] | |
| 654 chimera_tags_new = [] | |
| 655 for i in chimera_tags: | |
| 656 if len(i) > 1: | |
| 657 for t in i: | |
| 658 chimera_tags_new.append(t) | |
| 659 else: | |
| 660 chimera_tags_new.extend(i) | |
| 661 chimera_tags_new = np.asarray(chimera_tags_new) | |
| 662 chimera = ", ".join(chimera_tags_new) | |
| 663 else: | |
| 664 chimera = "" | |
| 665 | |
| 666 if (read_pos1 == -1): | |
| 667 read_pos1 = read_len_median1 = None | |
| 668 if (read_pos4 == -1): | |
| 669 read_pos4 = read_len_median4 = None | |
| 670 if (read_pos2 == -1): | |
| 671 read_pos2 = read_len_median2 = None | |
| 672 if (read_pos3 == -1): | |
| 673 read_pos3 = read_len_median3 = None | |
| 674 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | |
| 675 ws1.write_row(row, 0, line) | |
| 676 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | |
| 677 ws1.write_row(row + 1, 0, line) | |
| 678 | |
| 679 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | |
| 680 {'type': 'formula', | |
| 681 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | |
| 682 'format': format1, | |
| 683 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
| 684 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | |
| 685 {'type': 'formula', | |
| 686 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(row + 1, row + 1, row + 1, row + 1), | |
| 687 'format': format3, | |
| 688 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
| 689 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | |
| 690 {'type': 'formula', | |
| 691 'criteria': '=$B${}>="3"'.format(row + 1), | |
| 692 'format': format2, | |
| 693 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
| 694 | |
| 695 row += 3 | |
| 696 | |
| 697 # sheet 2 | |
| 698 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (Du Novo)', 'AF (Du Novo)', | |
| 699 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', | |
| 700 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | |
| 701 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2') | |
| 702 | |
| 703 ws2.write_row(0, 0, header_line2) | |
| 704 row = 0 | |
| 705 | |
| 706 for key1, value1 in sorted(tier_dict.items()): | |
| 707 if key1 in pure_tags_dict_short.keys(): | |
| 708 i = np.where(np.array(['#'.join(str(i) for i in z) | |
| 709 for z in zip(mut_array[:, 1], mut_array[:, 2])]) == key1)[0][0] | |
| 710 ref = mut_array[i, 9] | |
| 711 alt = mut_array[i, 10] | |
| 712 chrom, pos = re.split(r'\#', key1) | |
| 713 ref_count = cvrg_dict[key1][0] | |
| 714 alt_count = cvrg_dict[key1][1] | |
| 715 cvrg = ref_count + alt_count | |
| 716 | |
| 717 var_id = '-'.join([chrom, pos, ref, alt]) | |
| 718 lst = [var_id, cvrg] | |
| 719 used_tiers = [] | |
| 720 cum_af = [] | |
| 721 for key2, value2 in sorted(value1.items()): | |
| 722 # calculate cummulative AF | |
| 723 used_tiers.append(value2) | |
| 724 if len(used_tiers) > 1: | |
| 725 cum = safe_div(sum(used_tiers), cvrg) | |
| 726 cum_af.append(cum) | |
| 727 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg), (cvrg - sum(used_tiers[-4:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-4:]))), alt_count, safe_div(alt_count, cvrg)]) | |
| 728 lst.extend(used_tiers) | |
| 729 lst.extend(cum_af) | |
| 730 lst = tuple(lst) | |
| 731 ws2.write_row(row + 1, 0, lst) | |
| 732 ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) | |
| 733 ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) | |
| 734 ws2.conditional_format('P{}:S{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:S{} P1:S1'.format(row + 2, row + 2)}) | |
| 735 row += 1 | |
| 736 | |
| 737 # sheet 3 | |
| 738 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), | |
| 739 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), | |
| 740 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), | |
| 741 ("tier 4.1", counter_tier41), ("tier 4.2", counter_tier42)] | |
| 742 | |
| 743 header = ("tier", "count") | |
| 744 ws3.write_row(0, 0, header) | |
| 745 | |
| 746 for i in range(len(sheet3)): | |
| 747 ws3.write_row(i + 1, 0, sheet3[i]) | |
| 748 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | |
| 749 {'type': 'formula', | |
| 750 'criteria': '=OR($A${}="tier 1.1", $A${}="tier 1.2")'.format(i + 2, i + 2), | |
| 751 'format': format1}) | |
| 752 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | |
| 753 {'type': 'formula', | |
| 754 'criteria': '=OR($A${}="tier 2.1", $A${}="tier 2.2", $A${}="tier 2.3", $A${}="tier 2.4")'.format(i + 2, i + 2, i + 2, i + 2), | |
| 755 'format': format3}) | |
| 756 ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), | |
| 757 {'type': 'formula', | |
| 758 'criteria': '=OR($A${}="tier 3.1", $A${}="tier 3.2", $A${}="tier 4.1", $A${}="tier 4.2")'.format(i + 2, i + 2, i + 2, i + 2), | |
| 759 'format': format2}) | |
| 760 | |
| 761 description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "remaining variants")] | |
| 762 examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", | |
| 763 "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", | |
| 764 "4081", "4098", "5", "10", "", ""), | |
| 765 ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, | |
| 766 "289", "0", "0", "0", "0", "0", "0", "3", "6", None, None, None, None, | |
| 767 "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], | |
| 768 [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", | |
| 769 "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", | |
| 770 "0", "4081", "4098", "5", "10", "", ""), | |
| 771 ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "11068", "268", "268", "270", "288", "289", | |
| 772 "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", | |
| 773 "7", "4081", "4098", "5", "10", "", "")], | |
| 774 [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", | |
| 775 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", | |
| 776 "6", "47170", "41149", "", ""), | |
| 777 ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", | |
| 778 "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "1", | |
| 779 "6", "47170", "41149", "", "")], | |
| 780 [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", | |
| 781 "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", | |
| 782 "4081", "4098", "5", "10", "", ""), | |
| 783 ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, | |
| 784 "289", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", None, None, "0", "0", | |
| 785 "0", "0", "4081", "4098", "5", "10", "", "")], | |
| 786 [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", | |
| 787 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", | |
| 788 "4081", "4098", "5", "10", "", ""), | |
| 789 ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", | |
| 790 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", | |
| 791 "4081", "4098", "5", "10", "", "")], | |
| 792 [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, | |
| 793 "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", | |
| 794 "0", "0", "0", "4081", "4098", "5", "10", "", ""), | |
| 795 ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", | |
| 796 "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "1", "7", | |
| 797 "4081", "4098", "5", "10", "", "")], | |
| 798 [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", | |
| 799 "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "4081", | |
| 800 "4098", "5", "10", "", ""), | |
| 801 ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", | |
| 802 "1", "1", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "4081", | |
| 803 "4098", "5", "10", "", "")], | |
| 804 [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", | |
| 805 "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", | |
| 806 "3", "3", "47170", "41149", "", ""), | |
| 807 ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, | |
| 808 "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", | |
| 809 "0", "0", "0", "1", "3", "3", "47170", "41149", "", "")], | |
| 810 [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", | |
| 811 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | |
| 812 "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), | |
| 813 ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", | |
| 814 "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", | |
| 815 "0.666666666666667", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], | |
| 816 [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "0", "255", "276", "269", | |
| 817 "5", "6", "5", "6", "0", "0", "5", "6", "0", "0", "1", "1", "0", "0", "0", "0", "1", | |
| 818 "1", "5348", "5350", "", ""), | |
| 819 ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, | |
| 820 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | |
| 821 "0", "0", "0", "1", "1", "5348", "5350", "", "")], | |
| 822 [("Chr5:5-20000-13983-G-C", "4.2", "ATGTTGTGAATAACCCACAC", "ab1.ba2", "209", "186", "255", "276", "269", | |
| 823 "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "1", | |
| 824 "1", "5348", "5350", "", ""), | |
| 825 ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, | |
| 826 "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", | |
| 827 "0", "0", "0", "1", "1", "5348", "5350", "", "")]] | |
| 828 | |
| 829 ws3.write(11, 0, "Description of tiers with examples") | |
| 830 ws3.write_row(12, 0, header_line) | |
| 831 row = 0 | |
| 832 for i in range(len(description_tiers)): | |
| 833 ws3.write_row(13 + row + i + 1, 0, description_tiers[i]) | |
| 834 ex = examples_tiers[i] | |
| 835 for k in range(len(ex)): | |
| 836 ws3.write_row(13 + row + i + k + 2, 0, ex[k]) | |
| 837 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), {'type': 'formula', 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(13 + row + i + k + 2, 13 + row + i + k + 2), 'format': format1, 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) | |
| 838 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), | |
| 839 {'type': 'formula', 'criteria': '=OR($B${}="2.1",$B${}="2.2", $B${}="2.3", $B${}="2.4")'.format(13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2, 13 + row + i + k + 2), | |
| 840 'format': format3, | |
| 841 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) | |
| 842 ws3.conditional_format('L{}:M{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3), | |
| 843 {'type': 'formula', | |
| 844 'criteria': '=$B${}>="3"'.format(13 + row + i + k + 2), | |
| 845 'format': format2, | |
| 846 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3, 13 + row + i + k + 2, 13 + row + i + k + 3)}) | |
| 847 row += 3 | |
| 848 workbook.close() | |
| 849 | |
| 850 | |
| 851 if __name__ == '__main__': | |
| 852 sys.exit(read2mut(sys.argv)) |
