Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 75:6ccff403db8a draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
| author | mheinzl |
|---|---|
| date | Tue, 23 Mar 2021 15:18:17 +0000 |
| parents | eca1365eb42c |
| children | 56f271641828 |
comparison
equal
deleted
inserted
replaced
| 74:5023186c2061 | 75:6ccff403db8a |
|---|---|
| 129 real_start_end = {} | 129 real_start_end = {} |
| 130 i = 0 | 130 i = 0 |
| 131 mut_array = [] | 131 mut_array = [] |
| 132 | 132 |
| 133 for count, variant in enumerate(VCF(file1)): | 133 for count, variant in enumerate(VCF(file1)): |
| 134 #if count == 2000: | |
| 135 # break | |
| 136 chrom = variant.CHROM | 134 chrom = variant.CHROM |
| 137 stop_pos = variant.start | 135 stop_pos = variant.start |
| 138 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | |
| 139 ref = variant.REF | 136 ref = variant.REF |
| 140 if len(variant.ALT) == 0: | 137 if len(variant.ALT) == 0: |
| 141 continue | 138 continue |
| 142 else: | 139 else: |
| 143 alt = variant.ALT[0] | 140 alt = variant.ALT[0] |
| 159 count_indel = 0 | 156 count_indel = 0 |
| 160 count_n = 0 | 157 count_n = 0 |
| 161 count_other = 0 | 158 count_other = 0 |
| 162 count_lowq = 0 | 159 count_lowq = 0 |
| 163 n = 0 | 160 n = 0 |
| 164 #print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | |
| 165 # "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | |
| 166 for pileupread in pileupcolumn.pileups: | 161 for pileupread in pileupcolumn.pileups: |
| 167 n += 1 | 162 n += 1 |
| 168 if not pileupread.is_del and not pileupread.is_refskip: | 163 if not pileupread.is_del and not pileupread.is_refskip: |
| 169 tag = pileupread.alignment.query_name | 164 tag = pileupread.alignment.query_name |
| 170 nuc = pileupread.alignment.query_sequence[pileupread.query_position] | 165 nuc = pileupread.alignment.query_sequence[pileupread.query_position] |
| 171 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 | 166 phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 |
| 172 | |
| 173 # if read is softclipped, store real position in reference | 167 # if read is softclipped, store real position in reference |
| 174 if "S" in pileupread.alignment.cigarstring: | 168 if "S" in pileupread.alignment.cigarstring: |
| 175 # spftclipped at start | 169 # spftclipped at start |
| 176 if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring): | 170 if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring): |
| 177 start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0]) | 171 start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0]) |
| 178 end = pileupread.alignment.reference_end | 172 end = pileupread.alignment.reference_end |
| 179 # softclipped at end | 173 # softclipped at end |
| 196 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] | 190 mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] |
| 197 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] | 191 reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] |
| 198 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] | 192 mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] |
| 199 real_start_end[chrom_stop_pos][tag] = [(start, end)] | 193 real_start_end[chrom_stop_pos][tag] = [(start, end)] |
| 200 else: | 194 else: |
| 201 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) | 195 mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) |
| 202 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) | 196 reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) |
| 203 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) | 197 mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) |
| 204 real_start_end[chrom_stop_pos][tag].append((start, end)) | 198 real_start_end[chrom_stop_pos][tag].append((start, end)) |
| 205 if nuc == alt: | 199 if nuc == alt: |
| 206 count_alt += 1 | 200 count_alt += 1 |
| 207 if tag not in mut_read_dict: | 201 if tag not in mut_read_dict: |
| 208 mut_read_dict[tag] = {} | 202 mut_read_dict[tag] = {} |
| 218 else: | 212 else: |
| 219 count_other += 1 | 213 count_other += 1 |
| 220 else: | 214 else: |
| 221 count_indel += 1 | 215 count_indel += 1 |
| 222 | 216 |
| 223 #print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s\n" % (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, count_indel, count_lowq)) | |
| 224 #else: | |
| 225 # print("indels are currently not evaluated") | |
| 226 mut_array = np.array(mut_array) | 217 mut_array = np.array(mut_array) |
| 227 for read in bam.fetch(until_eof=True): | 218 for read in bam.fetch(until_eof=True): |
| 228 if read.is_unmapped: | 219 if read.is_unmapped: |
| 229 pure_tag = read.query_name[:-5] | 220 pure_tag = read.query_name[:-5] |
| 230 nuc = "na" | 221 nuc = "na" |
| 240 bam.close() | 231 bam.close() |
| 241 | 232 |
| 242 # create pure_tags_dict | 233 # create pure_tags_dict |
| 243 pure_tags_dict = {} | 234 pure_tags_dict = {} |
| 244 for key1, value1 in sorted(mut_dict.items()): | 235 for key1, value1 in sorted(mut_dict.items()): |
| 245 #if len(np.where(np.array(['#'.join(str(i) for i in z) | |
| 246 # for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0: | |
| 247 # continue | |
| 248 | |
| 249 i = np.where(np.array(['#'.join(str(i) for i in z) | 236 i = np.where(np.array(['#'.join(str(i) for i in z) |
| 250 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] | 237 for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0] |
| 251 ref = mut_array[i, 2] | 238 ref = mut_array[i, 2] |
| 252 alt = mut_array[i, 3] | 239 alt = mut_array[i, 3] |
| 253 pure_tags_dict[key1] = {} | 240 pure_tags_dict[key1] = {} |
| 334 alt = mut_array[i, 3] | 321 alt = mut_array[i, 3] |
| 335 dcs_median = cvrg_dict[key1][2] | 322 dcs_median = cvrg_dict[key1][2] |
| 336 whole_array = pure_tags_dict_short[key1].keys() | 323 whole_array = pure_tags_dict_short[key1].keys() |
| 337 | 324 |
| 338 tier_dict[key1] = {} | 325 tier_dict[key1] = {} |
| 339 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0),("tier 2.5", 0), | 326 values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 2.5", 0), |
| 340 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), | 327 ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4", 0), ("tier 5.1", 0), ("tier 5.2", 0), ("tier 5.3", 0), ("tier 5.4", 0), ("tier 5.5", 0), |
| 341 ("tier 6", 0), ("tier 7", 0)] | 328 ("tier 6", 0), ("tier 7", 0)] |
| 342 for k, v in values_tier_dict: | 329 for k, v in values_tier_dict: |
| 343 tier_dict[key1][k] = v | 330 tier_dict[key1][k] = v |
| 344 | 331 |
| 617 | 604 |
| 618 # mate 1 - SSCS ab | 605 # mate 1 - SSCS ab |
| 619 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] | 606 softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] |
| 620 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads | 607 ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads |
| 621 if any(ij is True for ij in softclipped_idx1): | 608 if any(ij is True for ij in softclipped_idx1): |
| 622 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] | 609 softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] |
| 623 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] | 610 softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] |
| 624 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] | 611 softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] |
| 625 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] | 612 dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] |
| 626 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] | 613 dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] |
| 627 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | 614 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping |
| 628 if any(ij is True for ij in softclipped_both_ends_idx1): | 615 if any(ij is True for ij in softclipped_both_ends_idx1): |
| 629 for nr, indx in enumerate(softclipped_both_ends_idx1): | 616 for nr, indx in enumerate(softclipped_both_ends_idx1): |
| 630 if indx: | 617 if indx: |
| 631 if dist_start_read1[nr] <= dist_end_read1[nr]: | 618 if dist_start_read1[nr] <= dist_end_read1[nr]: |
| 632 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number | 619 dist_end_read1[nr] = thr + 1000 # use dist of start and set start to very large number |
| 633 else: | 620 else: |
| 634 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number | 621 dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number |
| 635 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads | 622 ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads |
| 636 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads | 623 ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads |
| 637 | 624 |
| 638 # mate 1 - SSCS ba | 625 # mate 1 - SSCS ba |
| 639 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] | 626 softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] |
| 640 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads | 627 ratio4 = safe_div(sum(softclipped_idx4), float(len(softclipped_idx4))) >= threshold_reads |
| 641 if any(ij is True for ij in softclipped_idx4): | 628 if any(ij is True for ij in softclipped_idx4): |
| 642 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] | 629 softclipped_both_ends_idx4 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs4] |
| 643 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] | 630 softclipped_start4 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs4] |
| 644 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] | 631 softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] |
| 645 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] | 632 dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] |
| 646 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] | 633 dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] |
| 647 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | 634 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping |
| 648 if any(ij is True for ij in softclipped_both_ends_idx4): | 635 if any(ij is True for ij in softclipped_both_ends_idx4): |
| 649 for nr, indx in enumerate(softclipped_both_ends_idx4): | 636 for nr, indx in enumerate(softclipped_both_ends_idx4): |
| 650 if indx: | 637 if indx: |
| 651 if dist_start_read4[nr] <= dist_end_read4[nr]: | 638 if dist_start_read4[nr] <= dist_end_read4[nr]: |
| 652 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number | 639 dist_end_read4[nr] = thr + 1000 # use dist of start and set start to very large number |
| 653 else: | 640 else: |
| 654 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number | 641 dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number |
| 655 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads | 642 ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads |
| 656 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads | 643 ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads |
| 657 | 644 |
| 658 # mate 2 - SSCS ab | 645 # mate 2 - SSCS ab |
| 659 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] | 646 softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] |
| 660 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads | 647 ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads |
| 661 if any(ij is True for ij in softclipped_idx2): | 648 if any(ij is True for ij in softclipped_idx2): |
| 662 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] | 649 softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] |
| 663 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] | 650 softclipped_start2 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs2] |
| 664 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] | 651 softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] |
| 665 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] | 652 dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] |
| 666 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] | 653 dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] |
| 667 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | 654 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping |
| 668 if any(ij is True for ij in softclipped_both_ends_idx2): | 655 if any(ij is True for ij in softclipped_both_ends_idx2): |
| 669 for nr, indx in enumerate(softclipped_both_ends_idx2): | 656 for nr, indx in enumerate(softclipped_both_ends_idx2): |
| 670 if indx: | 657 if indx: |
| 671 if dist_start_read2[nr] <= dist_end_read2[nr]: | 658 if dist_start_read2[nr] <= dist_end_read2[nr]: |
| 672 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number | 659 dist_end_read2[nr] = thr + 1000 # use dist of start and set start to very large number |
| 673 else: | 660 else: |
| 674 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number | 661 dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number |
| 675 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads | 662 ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads |
| 676 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads | 663 ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads |
| 677 | 664 |
| 678 # mate 2 - SSCS ba | 665 # mate 2 - SSCS ba |
| 679 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] | 666 softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] |
| 682 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] | 669 softclipped_both_ends_idx3 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs3] |
| 683 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] | 670 softclipped_start3 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs3] |
| 684 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] | 671 softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] |
| 685 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] | 672 dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] |
| 686 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] | 673 dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] |
| 687 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping | 674 # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping |
| 688 if any(ij is True for ij in softclipped_both_ends_idx3): | 675 if any(ij is True for ij in softclipped_both_ends_idx3): |
| 689 for nr, indx in enumerate(softclipped_both_ends_idx3): | 676 for nr, indx in enumerate(softclipped_both_ends_idx3): |
| 690 if indx: | 677 if indx: |
| 691 if dist_start_read3[nr] <= dist_end_read3[nr]: | 678 if dist_start_read3[nr] <= dist_end_read3[nr]: |
| 692 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh | 679 dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh |
| 693 else: | 680 else: |
| 694 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number | 681 dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number |
| 695 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads | 682 ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads |
| 696 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads | 683 ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads |
| 697 | 684 |
| 698 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant | 685 if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant |
| 699 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | | 686 all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | |
| 705 alt3ff = 0 | 692 alt3ff = 0 |
| 706 trimmed = False | 693 trimmed = False |
| 707 contradictory = True | 694 contradictory = True |
| 708 # softclipping tiers | 695 # softclipping tiers |
| 709 # information of both mates available --> all reads for both mates and SSCS are softclipped | 696 # information of both mates available --> all reads for both mates and SSCS are softclipped |
| 710 elif (ratio1 & ratio4 & ratio2 & ratio3 & | 697 elif (ratio1 & ratio4 & ratio2 & ratio3 & |
| 711 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | 698 (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & |
| 712 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | 699 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available |
| 713 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 700 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
| 714 softclipped_mutation_allMates = True | 701 softclipped_mutation_allMates = True |
| 715 softclipped_mutation_oneOfTwoMates = False | 702 softclipped_mutation_oneOfTwoMates = False |
| 716 softclipped_mutation_oneOfTwoSSCS = False | 703 softclipped_mutation_oneOfTwoSSCS = False |
| 717 softclipped_mutation_oneOfTwoSSCS_diffMates = False | 704 softclipped_mutation_oneOfTwoSSCS_diffMates = False |
| 718 softclipped_mutation_oneMate = False | 705 softclipped_mutation_oneMate = False |
| 724 trimmed = False | 711 trimmed = False |
| 725 contradictory = False | 712 contradictory = False |
| 726 # information of both mates available --> only one mate softclipped | 713 # information of both mates available --> only one mate softclipped |
| 727 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | | 714 elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | |
| 728 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & | 715 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & |
| 729 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | 716 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available |
| 730 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 717 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
| 731 min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red | 718 min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red |
| 732 min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue | 719 min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue |
| 733 max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red | 720 max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red |
| 734 max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue | 721 max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue |
| 735 if (min_start1 > min_start2) or (max_end1 > max_end2): # if mate1 is red and mate2 is blue | 722 if (min_start1 > min_start2) or (max_end1 > max_end2): # if mate1 is red and mate2 is blue |
| 736 softclipped_mutation_oneOfTwoMates = False | 723 softclipped_mutation_oneOfTwoMates = False |
| 737 # blue mate at beginning softclipped | 724 # blue mate at beginning softclipped |
| 738 if min_start1 > min_start2: | 725 if min_start1 > min_start2: |
| 739 n_spacer_barcode = min_start1 - min_start2 | 726 n_spacer_barcode = min_start1 - min_start2 |
| 740 read_pos2 = read_pos2 - n_spacer_barcode | 727 read_pos2 = read_pos2 - n_spacer_barcode |
| 741 read_pos3 = read_pos3 - n_spacer_barcode | 728 read_pos3 = read_pos3 - n_spacer_barcode |
| 742 read_len_median2 = read_len_median2 - n_spacer_barcode | 729 read_len_median2 = read_len_median2 - n_spacer_barcode |
| 743 read_len_median3 = read_len_median3 - n_spacer_barcode | 730 read_len_median3 = read_len_median3 - n_spacer_barcode |
| 744 # red mate at end softclipped | 731 # red mate at end softclipped |
| 745 if max_end1 > max_end2: | 732 if max_end1 > max_end2: |
| 746 n_spacer_barcode = max_end1 - max_end2 | 733 n_spacer_barcode = max_end1 - max_end2 |
| 747 read_len_median1 = read_len_median1 - n_spacer_barcode | 734 read_len_median1 = read_len_median1 - n_spacer_barcode |
| 748 read_len_median4 = read_len_median4 - n_spacer_barcode | 735 read_len_median4 = read_len_median4 - n_spacer_barcode |
| 749 elif (min_start1 < min_start2) or (max_end1 < max_end2): # if mate1 is blue and mate2 is red | 736 elif (min_start1 < min_start2) or (max_end1 < max_end2): # if mate1 is blue and mate2 is red |
| 750 softclipped_mutation_oneOfTwoMates = False | 737 softclipped_mutation_oneOfTwoMates = False |
| 751 if min_start1 < min_start2: | 738 if min_start1 < min_start2: |
| 752 n_spacer_barcode = min_start2 - min_start1 | 739 n_spacer_barcode = min_start2 - min_start1 |
| 753 read_pos1 = read_pos1 - n_spacer_barcode | 740 read_pos1 = read_pos1 - n_spacer_barcode |
| 754 read_pos4 = read_pos4 - n_spacer_barcode | 741 read_pos4 = read_pos4 - n_spacer_barcode |
| 755 read_len_median1 = read_len_median1 - n_spacer_barcode | 742 read_len_median1 = read_len_median1 - n_spacer_barcode |
| 756 read_len_median4 = read_len_median4 - n_spacer_barcode | 743 read_len_median4 = read_len_median4 - n_spacer_barcode |
| 757 if max_end1 < max_end2: # if mate1 ends after mate 2 starts | 744 if max_end1 < max_end2: # if mate1 ends after mate 2 starts |
| 758 n_spacer_barcode = max_end2 - max_end1 | 745 n_spacer_barcode = max_end2 - max_end1 |
| 759 read_len_median2 = read_len_median2 - n_spacer_barcode | 746 read_len_median2 = read_len_median2 - n_spacer_barcode |
| 760 read_len_median3 = read_len_median3 - n_spacer_barcode | 747 read_len_median3 = read_len_median3 - n_spacer_barcode |
| 761 else: | 748 else: |
| 762 softclipped_mutation_oneOfTwoMates = True | 749 softclipped_mutation_oneOfTwoMates = True |
| 768 contradictory = False | 755 contradictory = False |
| 769 softclipped_mutation_allMates = False | 756 softclipped_mutation_allMates = False |
| 770 softclipped_mutation_oneOfTwoSSCS = False | 757 softclipped_mutation_oneOfTwoSSCS = False |
| 771 softclipped_mutation_oneMate = False | 758 softclipped_mutation_oneMate = False |
| 772 softclipped_mutation_oneMateOneSSCS = False | 759 softclipped_mutation_oneMateOneSSCS = False |
| 773 | 760 |
| 774 if softclipped_mutation_oneOfTwoMates is False: # check trimming tier | 761 if softclipped_mutation_oneOfTwoMates is False: # check trimming tier |
| 775 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): | 762 if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): |
| 776 beg1 = total1new | 763 beg1 = total1new |
| 777 total1new = 0 | 764 total1new = 0 |
| 778 alt1ff = 0 | 765 alt1ff = 0 |
| 779 alt1f = 0 | 766 alt1f = 0 |
| 803 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 790 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
| 804 | 791 |
| 805 # information of both mates available --> only one mate softclipped | 792 # information of both mates available --> only one mate softclipped |
| 806 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | 793 elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & |
| 807 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | 794 ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & |
| 808 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available | 795 all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available |
| 809 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 796 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
| 810 softclipped_mutation_allMates = False | 797 softclipped_mutation_allMates = False |
| 811 softclipped_mutation_oneOfTwoMates = False | 798 softclipped_mutation_oneOfTwoMates = False |
| 812 softclipped_mutation_oneOfTwoSSCS = True | 799 softclipped_mutation_oneOfTwoSSCS = True |
| 813 softclipped_mutation_oneOfTwoSSCS_diffMates = False | 800 softclipped_mutation_oneOfTwoSSCS_diffMates = False |
| 814 softclipped_mutation_oneMate = False | 801 softclipped_mutation_oneMate = False |
| 821 contradictory = False | 808 contradictory = False |
| 822 | 809 |
| 823 # information of one mate available --> all reads of one mate are softclipped | 810 # information of one mate available --> all reads of one mate are softclipped |
| 824 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & | 811 elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & |
| 825 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | | 812 all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff])) | |
| 826 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & | 813 (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & |
| 827 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available | 814 all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available |
| 828 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 815 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
| 829 softclipped_mutation_allMates = False | 816 softclipped_mutation_allMates = False |
| 830 softclipped_mutation_oneOfTwoMates = False | 817 softclipped_mutation_oneOfTwoMates = False |
| 831 softclipped_mutation_oneOfTwoSSCS = False | 818 softclipped_mutation_oneOfTwoSSCS = False |
| 832 softclipped_mutation_oneOfTwoSSCS_diffMates = False | 819 softclipped_mutation_oneOfTwoSSCS_diffMates = False |
| 833 softclipped_mutation_oneMate = True | 820 softclipped_mutation_oneMate = True |
| 839 trimmed = False | 826 trimmed = False |
| 840 contradictory = False | 827 contradictory = False |
| 841 # information of one mate available --> only one SSCS is softclipped | 828 # information of one mate available --> only one SSCS is softclipped |
| 842 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & | 829 elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & |
| 843 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | | 830 (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | |
| 844 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & | 831 (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & |
| 845 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available | 832 (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available |
| 846 # if distance between softclipping and mutation is at start or end of the read smaller than threshold | 833 # if distance between softclipping and mutation is at start or end of the read smaller than threshold |
| 847 softclipped_mutation_allMates = False | 834 softclipped_mutation_allMates = False |
| 848 softclipped_mutation_oneOfTwoMates = False | 835 softclipped_mutation_oneOfTwoMates = False |
| 849 softclipped_mutation_oneOfTwoSSCS = False | 836 softclipped_mutation_oneOfTwoSSCS = False |
| 850 softclipped_mutation_oneOfTwoSSCS_diffMates = False | 837 softclipped_mutation_oneOfTwoSSCS_diffMates = False |
| 851 softclipped_mutation_oneMate = False | 838 softclipped_mutation_oneMate = False |
| 959 elif (trimmed): | 946 elif (trimmed): |
| 960 tier = "4" | 947 tier = "4" |
| 961 counter_tier4 += 1 | 948 counter_tier4 += 1 |
| 962 tier_dict[key1]["tier 4"] += 1 | 949 tier_dict[key1]["tier 4"] += 1 |
| 963 | 950 |
| 964 # assign tiers | 951 # assign tiers |
| 965 if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & | 952 if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & |
| 966 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | | 953 all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | |
| 967 (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & | 954 (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & |
| 968 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): | 955 all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): |
| 969 trimmed_actual_high_tier = True | 956 trimmed_actual_high_tier = True |
| 1110 if (read_pos2 == -1): | 1097 if (read_pos2 == -1): |
| 1111 read_pos2 = read_len_median2 = None | 1098 read_pos2 = read_len_median2 = None |
| 1112 if (read_pos3 == -1): | 1099 if (read_pos3 == -1): |
| 1113 read_pos3 = read_len_median3 = None | 1100 read_pos3 = read_len_median3 = None |
| 1114 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 1101 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
| 1115 #ws1.write_row(row, 0, line) | 1102 # ws1.write_row(row, 0, line) |
| 1116 #csv_writer.writerow(line) | 1103 # csv_writer.writerow(line) |
| 1117 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 1104 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
| 1118 #ws1.write_row(row + 1, 0, line2) | 1105 # ws1.write_row(row + 1, 0, line2) |
| 1119 #csv_writer.writerow(line2) | 1106 # csv_writer.writerow(line2) |
| 1120 | 1107 |
| 1121 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1108 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1122 # {'type': 'formula', | 1109 # {'type': 'formula', |
| 1123 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1110 # 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
| 1124 # 'format': format1, | 1111 # 'format': format1, |
| 1125 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1112 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1126 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1113 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1127 # {'type': 'formula', | 1114 # {'type': 'formula', |
| 1128 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | 1115 # 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), |
| 1129 # 'format': format3, | 1116 # 'format': format3, |
| 1130 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1117 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1131 #ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1118 # ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 1132 # {'type': 'formula', | 1119 # {'type': 'formula', |
| 1133 # 'criteria': '=$B${}>="3"'.format(row + 1), | 1120 # 'criteria': '=$B${}>="3"'.format(row + 1), |
| 1134 # 'format': format2, | 1121 # 'format': format2, |
| 1135 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1122 # 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
| 1136 change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier)) | 1123 change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier)) |
| 1175 csv_writer.writerow(line1) | 1162 csv_writer.writerow(line1) |
| 1176 ws1.write_row(row_number + 1, 0, line2) | 1163 ws1.write_row(row_number + 1, 0, line2) |
| 1177 csv_writer.writerow(line2) | 1164 csv_writer.writerow(line2) |
| 1178 | 1165 |
| 1179 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), | 1166 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), |
| 1180 {'type': 'formula', | 1167 {'type': 'formula', |
| 1181 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), | 1168 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row_number + 1, row_number + 1), |
| 1182 'format': format1, | 1169 'format': format1, |
| 1183 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1170 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) |
| 1184 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), | 1171 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), |
| 1185 {'type': 'formula', | 1172 {'type': 'formula', |
| 1186 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), | 1173 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row_number + 1, row_number + 1, row_number + 1, row_number + 1, row_number + 1), |
| 1187 'format': format3, | 1174 'format': format3, |
| 1188 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1175 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) |
| 1189 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), | 1176 ws1.conditional_format('L{}:M{}'.format(row_number + 1, row_number + 2), |
| 1190 {'type': 'formula', | 1177 {'type': 'formula', |
| 1191 'criteria': '=$B${}>="3"'.format(row_number + 1), | 1178 'criteria': '=$B${}>="3"'.format(row_number + 1), |
| 1192 'format': format2, | 1179 'format': format2, |
| 1193 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) | 1180 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row_number + 1, row_number + 2, row_number + 1, row_number + 2, row_number + 1, row_number + 2)}) |
| 1194 | |
| 1195 # sheet 2 | 1181 # sheet 2 |
| 1196 if chimera_correction: | 1182 if chimera_correction: |
| 1197 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1183 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'chimera-corrected cvrg (tiers 1.1-2.5)', 'chimeras in AC alt (tiers 1.1-2.5)', 'chimera-corrected AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
| 1198 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1184 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
| 1199 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1185 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
| 1200 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') | 1186 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') |
| 1201 else: | 1187 else: |
| 1202 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1188 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.5)', 'AC alt (tiers 1.1-2.5)', 'AF (tiers 1.1-2.5)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
| 1203 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1189 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
| 1204 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1190 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |
| 1205 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') | 1191 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-2.5', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4', 'AF 1.1-5.1', 'AF 1.1-5.2', 'AF 1.1-5.3', 'AF 1.1-5.4', 'AF 1.1-5.5', 'AF 1.1-6', 'AF 1.1-7') |
| 1226 # calculate cummulative AF | 1212 # calculate cummulative AF |
| 1227 used_tiers.append(value2) | 1213 used_tiers.append(value2) |
| 1228 if len(used_tiers) > 1: | 1214 if len(used_tiers) > 1: |
| 1229 cum = safe_div(sum(used_tiers), cvrg) | 1215 cum = safe_div(sum(used_tiers), cvrg) |
| 1230 cum_af.append(cum) | 1216 cum_af.append(cum) |
| 1231 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place | 1217 if sum(used_tiers) == 0: # skip mutations that are filtered by the VA in the first place |
| 1232 continue | 1218 continue |
| 1233 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) | 1219 lst.extend([sum(used_tiers), safe_div(sum(used_tiers), cvrg)]) |
| 1234 lst.extend([(cvrg - sum(used_tiers[-10:])), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (cvrg - sum(used_tiers[-10:])))]) | 1220 lst.extend([(cvrg - sum(used_tiers[-10:])), sum(used_tiers[0:7]), safe_div(sum(used_tiers[0:7]), (cvrg - sum(used_tiers[-10:])))]) |
| 1235 if chimera_correction: | 1221 if chimera_correction: |
| 1236 chimeras_all = chimera_dict[key1][1] | 1222 chimeras_all = chimera_dict[key1][1] |
| 1237 new_alt = sum(used_tiers[0:7]) - chimeras_all | 1223 new_alt = sum(used_tiers[0:7]) - chimeras_all |
| 1255 ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)}) | 1241 ws2.conditional_format('Q{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$Q$1="tier 3.1"', 'format': format22, 'multi_range': 'Q{}:Z{} Q1:Z1'.format(row + 2, row + 2)}) |
| 1256 row += 1 | 1242 row += 1 |
| 1257 | 1243 |
| 1258 # sheet 3 | 1244 # sheet 3 |
| 1259 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), | 1245 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), |
| 1260 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), ("tier 2.5", counter_tier25), | 1246 ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), ("tier 2.5", counter_tier25), |
| 1261 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4", counter_tier4), | 1247 ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4", counter_tier4), |
| 1262 ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), | 1248 ("tier 5.1", counter_tier51), ("tier 5.2", counter_tier52), |
| 1263 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6), ("tier 7", counter_tier7)] | 1249 ("tier 5.3", counter_tier53), ("tier 5.4", counter_tier54), ("tier 5.5", counter_tier55), ("tier 6", counter_tier6), ("tier 7", counter_tier7)] |
| 1264 | 1250 |
| 1265 header = ("tier", "count") | 1251 header = ("tier", "count") |
| 1401 csv_data.close() | 1387 csv_data.close() |
| 1402 | 1388 |
| 1403 | 1389 |
| 1404 if __name__ == '__main__': | 1390 if __name__ == '__main__': |
| 1405 sys.exit(read2mut(sys.argv)) | 1391 sys.exit(read2mut(sys.argv)) |
| 1406 |
