Mercurial > repos > mheinzl > variant_analyzer2
changeset 61:3722268ffac5 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Wed, 17 Mar 2021 13:14:40 +0000 |
parents | 9ce53bf0931c |
children | 66c1245436b9 |
files | read2mut.py |
diffstat | 1 files changed, 147 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/read2mut.py Mon Mar 15 11:43:41 2021 +0000 +++ b/read2mut.py Wed Mar 17 13:14:40 2021 +0000 @@ -126,6 +126,7 @@ mut_read_dict = {} reads_dict = {} mut_read_cigar_dict = {} + real_start_end = {} i = 0 mut_array = [] @@ -149,6 +150,7 @@ mut_read_pos_dict[chrom_stop_pos] = {} reads_dict[chrom_stop_pos] = {} mut_read_cigar_dict[chrom_stop_pos] = {} + real_start_end[chrom_stop_pos] = {} for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): if pileupcolumn.reference_pos == stop_pos: @@ -167,6 +169,21 @@ tag = pileupread.alignment.query_name nuc = pileupread.alignment.query_sequence[pileupread.query_position] phred = ord(pileupread.alignment.qual[pileupread.query_position]) - 33 + + # if read is softclipped, store real position in reference + if "S" in pileupread.alignment.cigarstring: + # spftclipped at start + if re.search(r"^[0-9]+S", pileupread.alignment.cigarstring): + start = pileupread.alignment.reference_start - int(pileupread.alignment.cigarstring.split("S")[0]) + end = pileupread.alignment.reference_end + # softclipped at end + elif re.search(r"S$", pileupread.alignment.cigarstring): + end = pileupread.alignment.reference_end + int(re.split("[A-Z]", str(pileupread.alignment.cigarstring))[-2]) + start = pileupread.alignment.reference_start + else: + end = pileupread.alignment.reference_end + start = pileupread.alignment.reference_start + if phred < phred_score: nuc = "lowQ" if tag not in mut_dict[chrom_stop_pos]: @@ -179,12 +196,12 @@ mut_read_pos_dict[chrom_stop_pos][tag] = [pileupread.query_position + 1] reads_dict[chrom_stop_pos][tag] = [len(pileupread.alignment.query_sequence)] mut_read_cigar_dict[chrom_stop_pos][tag] = [pileupread.alignment.cigarstring] - - #alignedRefPositions = pileupread.get_reference_positions()[0] + real_start_end[chrom_stop_pos][tag] = [(start, end)] else: mut_read_pos_dict[chrom_stop_pos][tag].append(pileupread.query_position + 1) reads_dict[chrom_stop_pos][tag].append(len(pileupread.alignment.query_sequence)) mut_read_cigar_dict[chrom_stop_pos][tag].append(pileupread.alignment.cigarstring) + real_start_end[chrom_stop_pos][tag].append((start, end)) if nuc == alt: count_alt += 1 if tag not in mut_read_dict: @@ -533,55 +550,63 @@ read_pos1 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.1'])) read_len_median1 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.1'])) cigars_dcs1 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.1'] - #print(mut_read_cigar_dict[key1][key2[:-5] + '.ab.1']) pos_read1 = mut_read_pos_dict[key1][key2[:-5] + '.ab.1'] - #print(cigars_dcs1) end_read1 = reads_dict[key1][key2[:-5] + '.ab.1'] + ref_positions1 = real_start_end[key1][key2[:-5] + '.ab.1'] if key2[:-5] + '.ab.2' in mut_read_pos_dict[key1].keys(): read_pos2 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ab.2'])) read_len_median2 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ab.2'])) cigars_dcs2 = mut_read_cigar_dict[key1][key2[:-5] + '.ab.2'] pos_read2 = mut_read_pos_dict[key1][key2[:-5] + '.ab.2'] end_read2 = reads_dict[key1][key2[:-5] + '.ab.2'] + ref_positions2 = real_start_end[key1][key2[:-5] + '.ab.2'] if key2[:-5] + '.ba.1' in mut_read_pos_dict[key1].keys(): read_pos3 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.1'])) read_len_median3 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.1'])) cigars_dcs3 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.1'] pos_read3 = mut_read_pos_dict[key1][key2[:-5] + '.ba.1'] end_read3 = reads_dict[key1][key2[:-5] + '.ba.1'] + ref_positions3 = real_start_end[key1][key2[:-5] + '.ba.1'] if key2[:-5] + '.ba.2' in mut_read_pos_dict[key1].keys(): read_pos4 = np.median(np.array(mut_read_pos_dict[key1][key2[:-5] + '.ba.2'])) read_len_median4 = np.median(np.array(reads_dict[key1][key2[:-5] + '.ba.2'])) - #print(mut_read_cigar_dict[key1][key2[:-5] + '.ba.2']) cigars_dcs4 = mut_read_cigar_dict[key1][key2[:-5] + '.ba.2'] - pos_read4 = mut_read_pos_dict[key1][key2[:-5] + '.ba.2'] - #print(cigars_dcs4) end_read4 = reads_dict[key1][key2[:-5] + '.ba.2'] + ref_positions4 = real_start_end[key1][key2[:-5] + '.ba.2'] used_keys.append(key2[:-5]) counts_mut += 1 if (alt1f + alt2f + alt3f + alt4f) > 0.5: + total1new_trim, total2new_trim, total3new_trim, total4new_trim = total1new, total2new, total3new, total4new if total1new == 0: ref1f = alt1f = None alt1ff = -1 + alt1ff_trim = -1 else: alt1ff = alt1f + alt1ff_trim = alt1f if total2new == 0: ref2f = alt2f = None alt2ff = -1 + alt2ff_trim = -1 else: alt2ff = alt2f + alt2ff_trim = alt2f if total3new == 0: ref3f = alt3f = None alt3ff = -1 + alt3ff_trim = -1 else: alt3ff = alt3f + alt3ff_trim = alt3f if total4new == 0: ref4f = alt4f = None alt4ff = -1 + alt4ff_trim = alt4f else: alt4ff = alt4f + alt4ff_trim = alt4f beg1 = beg4 = beg2 = beg3 = 0 @@ -596,8 +621,9 @@ softclipped_mutation_oneOfTwoSSCS_diffMates = False softclipped_mutation_oneMate = False softclipped_mutation_oneMateOneSSCS = False - print() - print(key1, cigars_dcs1, cigars_dcs4, cigars_dcs2, cigars_dcs3) + + trimmed_actual_high_tier = False + dist_start_read1 = dist_start_read2 = dist_start_read3 = dist_start_read4 = [] dist_end_read1 = dist_end_read2 = dist_end_read3 = dist_end_read4 = [] ratio_dist_start1 = ratio_dist_start2 = ratio_dist_start3 = ratio_dist_start4 = False @@ -606,14 +632,12 @@ # mate 1 - SSCS ab softclipped_idx1 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs1] ratio1 = safe_div(sum(softclipped_idx1), float(len(softclipped_idx1))) >= threshold_reads - if any(ij is True for ij in softclipped_idx1): softclipped_both_ends_idx1 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs1] softclipped_start1 = [int(string.split("S")[0]) if re.search(r"^[0-9]+S", string) else -1 for string in cigars_dcs1] softclipped_end1 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs1] dist_start_read1 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start1, pos_read1)] dist_end_read1 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end1, pos_read1, end_read1)] - # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping if any(ij is True for ij in softclipped_both_ends_idx1): print(softclipped_both_ends_idx1) @@ -625,7 +649,6 @@ dist_start_read1[nr] = thr + 1000 # use dist of end and set start to very large number ratio_dist_start1 = safe_div(sum([True if x <= thr else False for x in dist_start_read1]), float(sum(softclipped_idx1))) >= threshold_reads ratio_dist_end1 = safe_div(sum([True if x <= thr else False for x in dist_end_read1]), float(sum(softclipped_idx1))) >= threshold_reads - print(key1, "mate1 ab", dist_start_read1, dist_end_read1, cigars_dcs1, ratio1, ratio_dist_start1, ratio_dist_end1) # mate 1 - SSCS ba softclipped_idx4 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs4] @@ -636,7 +659,6 @@ softclipped_end4 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs4] dist_start_read4 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start4, pos_read4)] dist_end_read4 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end4, pos_read4, end_read4)] - # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping if any(ij is True for ij in softclipped_both_ends_idx4): print(softclipped_both_ends_idx4) @@ -648,11 +670,9 @@ dist_start_read4[nr] = thr + 1000 # use dist of end and set start to very large number ratio_dist_start4 = safe_div(sum([True if x <= thr else False for x in dist_start_read4]), float(sum(softclipped_idx4))) >= threshold_reads ratio_dist_end4 = safe_div(sum([True if x <= thr else False for x in dist_end_read4]), float(sum(softclipped_idx4))) >= threshold_reads - print(key1, "mate1 ba", dist_start_read4, dist_end_read4,cigars_dcs4, ratio4, ratio_dist_start4, ratio_dist_end4) # mate 2 - SSCS ab softclipped_idx2 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs2] - #print(sum(softclipped_idx2)) ratio2 = safe_div(sum(softclipped_idx2), float(len(softclipped_idx2))) >= threshold_reads if any(ij is True for ij in softclipped_idx2): softclipped_both_ends_idx2 = [True if (re.search(r"^[0-9]+S", string) and re.search(r"S$", string)) else False for string in cigars_dcs2] @@ -660,7 +680,6 @@ softclipped_end2 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs2] dist_start_read2 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start2, pos_read2)] dist_end_read2 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end2, pos_read2, end_read2)] - # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping if any(ij is True for ij in softclipped_both_ends_idx2): print(softclipped_both_ends_idx2) @@ -671,10 +690,7 @@ else: dist_start_read2[nr] = thr + 1000 # use dist of end and set start to very large number ratio_dist_start2 = safe_div(sum([True if x <= thr else False for x in dist_start_read2]), float(sum(softclipped_idx2))) >= threshold_reads - #print(ratio_dist_end2) - #print([True if x <= thr else False for x in ratio_dist_end2]) ratio_dist_end2 = safe_div(sum([True if x <= thr else False for x in dist_end_read2]), float(sum(softclipped_idx2))) >= threshold_reads - print(key1, "mate2 ab", dist_start_read2, dist_end_read2,cigars_dcs2, ratio2, ratio_dist_start2, ratio_dist_end2) # mate 2 - SSCS ba softclipped_idx3 = [True if re.search(r"^[0-9]+S", string) or re.search(r"S$", string) else False for string in cigars_dcs3] @@ -685,7 +701,6 @@ softclipped_end3 = [int(re.split("[A-Z]", str(string))[-2]) if re.search(r"S$", string) else -1 for string in cigars_dcs3] dist_start_read3 = [(pos - soft) if soft != -1 else thr + 1000 for soft, pos in zip(softclipped_start3, pos_read3)] dist_end_read3 = [(length_read - pos - soft) if soft != -1 else thr + 1000 for soft, pos, length_read in zip(softclipped_end3, pos_read3, end_read3)] - # if read at both ends softclipped --> select end with smallest distance between mut position and softclipping if any(ij is True for ij in softclipped_both_ends_idx3): print(softclipped_both_ends_idx3) @@ -695,10 +710,8 @@ dist_end_read3[nr] = thr + 1000 # use dist of start and set start to a larger number than thresh else: dist_start_read3[nr] = thr + 1000 # use dist of end and set start to very large number - #print([True if x <= thr else False for x in dist_start_read3]) ratio_dist_start3 = safe_div(sum([True if x <= thr else False for x in dist_start_read3]), float(sum(softclipped_idx3))) >= threshold_reads ratio_dist_end3 = safe_div(sum([True if x <= thr else False for x in dist_end_read3]), float(sum(softclipped_idx3))) >= threshold_reads - print(key1, "mate2 ba", dist_start_read3, dist_end_read3,cigars_dcs3, ratio3, ratio_dist_start3, ratio_dist_end3) if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & # contradictory variant all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | @@ -728,25 +741,85 @@ alt3ff = 0 trimmed = False contradictory = False - print(key1, "softclipped_mutation_allMates", softclipped_mutation_allMates) # information of both mates available --> only one mate softclipped elif (((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4)) | (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3))) & all(float(ij) > 0. for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): # all mates available # if distance between softclipping and mutation is at start or end of the read smaller than threshold + min_start1 = min(min([ij[0] for ij in ref_positions1]), min([ij[0] for ij in ref_positions4])) # red + min_start2 = min(min([ij[0] for ij in ref_positions2]), min([ij[0] for ij in ref_positions3])) # blue + max_end1 = max(max([ij[1] for ij in ref_positions1]), max([ij[1] for ij in ref_positions4])) # red + max_end2 = max(max([ij[1] for ij in ref_positions2]), max([ij[1] for ij in ref_positions3])) # blue + if (min_start1 > min_start2) or (max_end1 > max_end2): # if mate1 is red and mate2 is blue + softclipped_mutation_oneOfTwoMates = False + # blue mate at beginning softclipped + if min_start1 > min_start2: + n_spacer_barcode = min_start1 - min_start2 + read_pos2 = read_pos2 - n_spacer_barcode + read_pos3 = read_pos3 - n_spacer_barcode + read_len_median2 = read_len_median2 - n_spacer_barcode + read_len_median3 = read_len_median3 - n_spacer_barcode + # red mate at end softclipped + if max_end1 > max_end2: + n_spacer_barcode = max_end1 - max_end2 + read_len_median1 = read_len_median1 - n_spacer_barcode + read_len_median4 = read_len_median4 - n_spacer_barcode + elif (min_start1 < min_start2) or (max_end1 < max_end2): # if mate1 is blue and mate2 is red + softclipped_mutation_oneOfTwoMates = False + if min_start1 < min_start2: + n_spacer_barcode = min_start2 - min_start1 + read_pos1 = read_pos1 - n_spacer_barcode + read_pos4 = read_pos4 - n_spacer_barcode + read_len_median1 = read_len_median1 - n_spacer_barcode + read_len_median4 = read_len_median4 - n_spacer_barcode + if max_end1 < max_end2: # if mate1 ends after mate 2 starts + n_spacer_barcode = max_end2 - max_end1 + read_len_median2 = read_len_median2 - n_spacer_barcode + read_len_median3 = read_len_median3 - n_spacer_barcode + else: + softclipped_mutation_oneOfTwoMates = True + alt1ff = 0 + alt4ff = 0 + alt2ff = 0 + alt3ff = 0 + trimmed = False + contradictory = False softclipped_mutation_allMates = False - softclipped_mutation_oneOfTwoMates = True softclipped_mutation_oneOfTwoSSCS = False - softclipped_mutation_oneOfTwoSSCS_diffMates = False softclipped_mutation_oneMate = False softclipped_mutation_oneMateOneSSCS = False - alt1ff = 0 - alt4ff = 0 - alt2ff = 0 - alt3ff = 0 - trimmed = False - contradictory = False - print(key1, "softclipped_mutation_oneOfTwoMates", softclipped_mutation_oneOfTwoMates) + + if softclipped_mutation_oneOfTwoMates is False: # check trimming tier + if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): + beg1 = total1new + total1new = 0 + alt1ff = 0 + alt1f = 0 + trimmed = True + + if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): + beg4 = total4new + total4new = 0 + alt4ff = 0 + alt4f = 0 + trimmed = True + + if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): + beg2 = total2new + total2new = 0 + alt2ff = 0 + alt2f = 0 + trimmed = True + + if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): + beg3 = total3new + total3new = 0 + alt3ff = 0 + alt3f = 0 + trimmed = True + details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) + details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) + # information of both mates available --> only one mate softclipped elif (((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & ((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & @@ -764,7 +837,6 @@ alt3ff = 0 trimmed = False contradictory = False - print(key1, "softclipped_mutation_oneOfTwoSSCS", softclipped_mutation_oneOfTwoSSCS, [alt1ff, alt2ff, alt3ff, alt4ff]) # information of one mate available --> all reads of one mate are softclipped elif ((ratio1 & ratio4 & (ratio_dist_start1 | ratio_dist_end1) & (ratio_dist_start4 | ratio_dist_end4) & @@ -772,10 +844,6 @@ (ratio2 & ratio3 & (ratio_dist_start2 | ratio_dist_end2) & (ratio_dist_start3 | ratio_dist_end3) & all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) > 0. for ij in [alt2ff, alt3ff]))): # all mates available # if distance between softclipping and mutation is at start or end of the read smaller than threshold - #if ((((len(dist_start_read1) > 0 | len(dist_end_read1) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1))) & - # ((len(dist_start_read4) > 0 | len(dist_end_read4) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4)))) | - # (((len(dist_start_read2) > 0 | len(dist_end_read2) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2))) & - # ((len(dist_start_read3) > 0 | len(dist_end_read3) > 0 ) & all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3))))): softclipped_mutation_allMates = False softclipped_mutation_oneOfTwoMates = False softclipped_mutation_oneOfTwoSSCS = False @@ -788,17 +856,12 @@ alt3ff = 0 trimmed = False contradictory = False - print(key1, "softclipped_mutation_oneMate", softclipped_mutation_oneMate) # information of one mate available --> only one SSCS is softclipped elif ((((ratio1 & (ratio_dist_start1 | ratio_dist_end1)) | (ratio4 & (ratio_dist_start4 | ratio_dist_end4))) & (all(float(ij) < 0. for ij in [alt2ff, alt3ff]) & all(float(ij) > 0. for ij in [alt1ff, alt4ff]))) | (((ratio2 & (ratio_dist_start2 | ratio_dist_end2)) | (ratio3 & (ratio_dist_start3 | ratio_dist_end3))) & (all(float(ij) < 0. for ij in [alt1ff, alt4ff]) & all(float(ij) < 0. for ij in [alt2ff, alt3ff])))): # all mates available # if distance between softclipping and mutation is at start or end of the read smaller than threshold - #if ((all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read1, dist_end_read1)) | - # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read4, dist_end_read4))) | - # (all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read2, dist_end_read2)) | - # all(ij <= thr or nm <= thr for ij, nm in zip(dist_start_read3, dist_end_read3)))): softclipped_mutation_allMates = False softclipped_mutation_oneOfTwoMates = False softclipped_mutation_oneOfTwoSSCS = False @@ -811,7 +874,6 @@ alt3ff = 0 trimmed = False contradictory = False - print(key1, "softclipped_mutation_oneMateOneSSCS", softclipped_mutation_oneMateOneSSCS) else: if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): @@ -844,9 +906,6 @@ details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) - - #sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]]) - # assign tiers if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | @@ -915,16 +974,49 @@ counter_tier32 += 1 tier_dict[key1]["tier 3.2"] += 1 - #elif (trimmed) and (sum_highTiers > 1): - # tier = "2.5" - # counter_tier25 += 1 - # tier_dict[key1]["tier 2.5"] += 1 - elif (trimmed): tier = "4" counter_tier4 += 1 tier_dict[key1]["tier 4"] += 1 + # assign tiers + if ((all(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & + all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | + (all(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & + all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): + trimmed_actual_high_tier = True + elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) & + any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & + any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & + all(float(ij) >= 0.75 for ij in [alt1ff, alt2ff, alt3ff, alt4ff])): + trimmed_actual_high_tier = True + elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) & + any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & + all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | + (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) & + any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & + all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): + trimmed_actual_high_tier = True + elif (all(int(ij) >= 1 for ij in [total1new_trim, total2new_trim, total3new_trim, total4new_trim]) & + all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt2ff_trim, alt3ff_trim, alt4ff_trim])): + trimmed_actual_high_tier = True + elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) & + any(int(ij) >= 3 for ij in [total2new_trim, total3new_trim]) & + all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim]) & + any(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim])) | + (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) & + any(int(ij) >= 3 for ij in [total1new_trim, total4new_trim]) & + all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]) & + any(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim]))): + trimmed_actual_high_tier = True + elif ((all(int(ij) >= 1 for ij in [total1new_trim, total4new_trim]) & + all(float(ij) >= 0.75 for ij in [alt1ff_trim, alt4ff_trim])) | + (all(int(ij) >= 1 for ij in [total2new_trim, total3new_trim]) & + all(float(ij) >= 0.75 for ij in [alt2ff_trim, alt3ff_trim]))): + trimmed_actual_high_tier = True + else: + trimmed_actual_high_tier = False + elif softclipped_mutation_allMates: tier = "5.1" counter_tier51 += 1 @@ -1008,14 +1100,6 @@ if min_value == 0 or dist2 == 0: min_tags_list_zeros.append(tag) chimera_tags.append(max_tag) - # chimeric = True - # else: - # chimeric = False - - # if mate_b is False: - # text = "pos {}: sample tag: {}; HD a = {}; HD b' = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, min_value, dist2, list(max_tag), chimeric) - # else: - # text = "pos {}: sample tag: {}; HD a' = {}; HD b = {}; similar tag(s): {}; chimeric = {}".format(pos, sample_tag, dist2, min_value, list(max_tag), chimeric) i += 1 chimera_tags = [x for x in chimera_tags if x != []] chimera_tags_new = [] @@ -1072,7 +1156,7 @@ #if key1 not in list(change_tier_after_print.keys()): # change_tier_after_print[key1] = [((row, line, line2))] #else: - change_tier_after_print.append((row, line, line2)) + change_tier_after_print.append((row, line, line2, trimmed_actual_high_tier)) row += 3 @@ -1101,13 +1185,14 @@ tier_dict[key1]["tier 4"] = 0 correct_tier = True - #lines = change_tier_after_print[key1] for sample in change_tier_after_print: row_number = sample[0] line1 = sample[1] line2 = sample[2] + actual_high_tier = sample[3] + current_tier = list(line1)[1] - if correct_tier and list(line1)[1] == "4": + if correct_tier and (current_tier == "4") and actual_high_tier: line1 = list(line1) line1[1] = "2.5" line1 = tuple(line1)