Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 48:e2a655533077 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Wed, 10 Mar 2021 14:37:20 +0000 |
parents | edf8596463a8 |
children | aa45100f5b14 |
comparison
equal
deleted
inserted
replaced
47:edf8596463a8 | 48:e2a655533077 |
---|---|
308 counter_tier7 = 0 | 308 counter_tier7 = 0 |
309 | 309 |
310 row = 1 | 310 row = 1 |
311 tier_dict = {} | 311 tier_dict = {} |
312 chimera_dict = {} | 312 chimera_dict = {} |
313 change_tier_after_print = {} | |
313 for key1, value1 in sorted(mut_dict.items()): | 314 for key1, value1 in sorted(mut_dict.items()): |
314 counts_mut = 0 | 315 counts_mut = 0 |
315 chimeric_tag_list = [] | 316 chimeric_tag_list = [] |
316 chimeric_tag = {} | 317 chimeric_tag = {} |
317 if key1 in pure_tags_dict_short.keys(): | 318 if key1 in pure_tags_dict_short.keys(): |
826 trimmed = True | 827 trimmed = True |
827 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) | 828 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) |
828 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) | 829 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) |
829 | 830 |
830 | 831 |
831 sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]]) | 832 #sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]]) |
832 | 833 |
833 # assign tiers | 834 # assign tiers |
834 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & | 835 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & |
835 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | | 836 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | |
836 (all(int(ij) >= 3 for ij in [total2new, total3new]) & | 837 (all(int(ij) >= 3 for ij in [total2new, total3new]) & |
896 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): | 897 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): |
897 tier = "3.2" | 898 tier = "3.2" |
898 counter_tier32 += 1 | 899 counter_tier32 += 1 |
899 tier_dict[key1]["tier 3.2"] += 1 | 900 tier_dict[key1]["tier 3.2"] += 1 |
900 | 901 |
901 elif (trimmed) and (sum_highTiers > 1): | 902 #elif (trimmed) and (sum_highTiers > 1): |
902 tier = "2.5" | 903 # tier = "2.5" |
903 counter_tier25 += 1 | 904 # counter_tier25 += 1 |
904 tier_dict[key1]["tier 2.5"] += 1 | 905 # tier_dict[key1]["tier 2.5"] += 1 |
905 | 906 |
906 elif (trimmed): | 907 elif (trimmed): |
907 tier = "4" | 908 tier = "4" |
908 counter_tier4 += 1 | 909 counter_tier4 += 1 |
909 tier_dict[key1]["tier 4"] += 1 | 910 tier_dict[key1]["tier 4"] += 1 |
1029 read_pos2 = read_len_median2 = None | 1030 read_pos2 = read_len_median2 = None |
1030 if (read_pos3 == -1): | 1031 if (read_pos3 == -1): |
1031 read_pos3 = read_len_median3 = None | 1032 read_pos3 = read_len_median3 = None |
1032 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 1033 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
1033 ws1.write_row(row, 0, line) | 1034 ws1.write_row(row, 0, line) |
1034 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 1035 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
1035 ws1.write_row(row + 1, 0, line) | 1036 ws1.write_row(row + 1, 0, line2) |
1036 | 1037 |
1037 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1038 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1038 {'type': 'formula', | 1039 {'type': 'formula', |
1039 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 1040 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
1040 'format': format1, | 1041 'format': format1, |
1047 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 1048 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
1048 {'type': 'formula', | 1049 {'type': 'formula', |
1049 'criteria': '=$B${}>="3"'.format(row + 1), | 1050 'criteria': '=$B${}>="3"'.format(row + 1), |
1050 'format': format2, | 1051 'format': format2, |
1051 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | 1052 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) |
1053 if trimmed: | |
1054 if key1 not in list(mut_read_pos_dict.keys()): | |
1055 change_tier_after_print[key1] = [((row, line), (row, line2))] | |
1056 else: | |
1057 change_tier_after_print[key1].append(((row, line), (row, line2))) | |
1052 | 1058 |
1053 row += 3 | 1059 row += 3 |
1054 if chimera_correction: | 1060 if chimera_correction: |
1055 chimeric_dcs_high_tiers = 0 | 1061 chimeric_dcs_high_tiers = 0 |
1056 chimeric_dcs = 0 | 1062 chimeric_dcs = 0 |
1061 if high_tiers == len(tiers): | 1067 if high_tiers == len(tiers): |
1062 chimeric_dcs_high_tiers += high_tiers - 1 | 1068 chimeric_dcs_high_tiers += high_tiers - 1 |
1063 else: | 1069 else: |
1064 chimeric_dcs_high_tiers += high_tiers | 1070 chimeric_dcs_high_tiers += high_tiers |
1065 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) | 1071 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) |
1072 | |
1073 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4 | |
1074 sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]]) | |
1075 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0: | |
1076 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"] | |
1077 tier_dict[key1]["tier 4"] = 0 | |
1078 lines = change_tier_after_print[key1] | |
1079 l_i = 0 | |
1080 for li in lines: | |
1081 row = li[0] | |
1082 new_line = li[1] | |
1083 if l_i == 0: | |
1084 new_line[1] == "2.5" | |
1085 ws1.write_row(row, 0, new_line) | |
1086 else: | |
1087 ws1.write_row(row + 1, 0, new_line) | |
1088 | |
1089 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | |
1090 {'type': 'formula', | |
1091 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | |
1092 'format': format1, | |
1093 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
1094 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | |
1095 {'type': 'formula', | |
1096 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1), | |
1097 'format': format3, | |
1098 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
1099 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | |
1100 {'type': 'formula', | |
1101 'criteria': '=$B${}>="3"'.format(row + 1), | |
1102 'format': format2, | |
1103 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) | |
1104 | |
1105 l_i += 1 | |
1106 | |
1066 # sheet 2 | 1107 # sheet 2 |
1067 if chimera_correction: | 1108 if chimera_correction: |
1068 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', | 1109 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', |
1069 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', | 1110 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', |
1070 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', | 1111 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', |