comparison read2mut.py @ 48:e2a655533077 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Wed, 10 Mar 2021 14:37:20 +0000
parents edf8596463a8
children aa45100f5b14
comparison
equal deleted inserted replaced
47:edf8596463a8 48:e2a655533077
308 counter_tier7 = 0 308 counter_tier7 = 0
309 309
310 row = 1 310 row = 1
311 tier_dict = {} 311 tier_dict = {}
312 chimera_dict = {} 312 chimera_dict = {}
313 change_tier_after_print = {}
313 for key1, value1 in sorted(mut_dict.items()): 314 for key1, value1 in sorted(mut_dict.items()):
314 counts_mut = 0 315 counts_mut = 0
315 chimeric_tag_list = [] 316 chimeric_tag_list = []
316 chimeric_tag = {} 317 chimeric_tag = {}
317 if key1 in pure_tags_dict_short.keys(): 318 if key1 in pure_tags_dict_short.keys():
826 trimmed = True 827 trimmed = True
827 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) 828 details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4)
828 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) 829 details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
829 830
830 831
831 sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]]) 832 #sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]])
832 833
833 # assign tiers 834 # assign tiers
834 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) & 835 if ((all(int(ij) >= 3 for ij in [total1new, total4new]) &
835 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) | 836 all(float(ij) >= 0.75 for ij in [alt1ff, alt4ff])) |
836 (all(int(ij) >= 3 for ij in [total2new, total3new]) & 837 (all(int(ij) >= 3 for ij in [total2new, total3new]) &
896 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))): 897 all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]))):
897 tier = "3.2" 898 tier = "3.2"
898 counter_tier32 += 1 899 counter_tier32 += 1
899 tier_dict[key1]["tier 3.2"] += 1 900 tier_dict[key1]["tier 3.2"] += 1
900 901
901 elif (trimmed) and (sum_highTiers > 1): 902 #elif (trimmed) and (sum_highTiers > 1):
902 tier = "2.5" 903 # tier = "2.5"
903 counter_tier25 += 1 904 # counter_tier25 += 1
904 tier_dict[key1]["tier 2.5"] += 1 905 # tier_dict[key1]["tier 2.5"] += 1
905 906
906 elif (trimmed): 907 elif (trimmed):
907 tier = "4" 908 tier = "4"
908 counter_tier4 += 1 909 counter_tier4 += 1
909 tier_dict[key1]["tier 4"] += 1 910 tier_dict[key1]["tier 4"] += 1
1029 read_pos2 = read_len_median2 = None 1030 read_pos2 = read_len_median2 = None
1030 if (read_pos3 == -1): 1031 if (read_pos3 == -1):
1031 read_pos3 = read_len_median3 = None 1032 read_pos3 = read_len_median3 = None
1032 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) 1033 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
1033 ws1.write_row(row, 0, line) 1034 ws1.write_row(row, 0, line)
1034 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) 1035 line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
1035 ws1.write_row(row + 1, 0, line) 1036 ws1.write_row(row + 1, 0, line2)
1036 1037
1037 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1038 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1038 {'type': 'formula', 1039 {'type': 'formula',
1039 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 1040 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
1040 'format': format1, 1041 'format': format1,
1047 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 1048 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1048 {'type': 'formula', 1049 {'type': 'formula',
1049 'criteria': '=$B${}>="3"'.format(row + 1), 1050 'criteria': '=$B${}>="3"'.format(row + 1),
1050 'format': format2, 1051 'format': format2,
1051 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)}) 1052 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1053 if trimmed:
1054 if key1 not in list(mut_read_pos_dict.keys()):
1055 change_tier_after_print[key1] = [((row, line), (row, line2))]
1056 else:
1057 change_tier_after_print[key1].append(((row, line), (row, line2)))
1052 1058
1053 row += 3 1059 row += 3
1054 if chimera_correction: 1060 if chimera_correction:
1055 chimeric_dcs_high_tiers = 0 1061 chimeric_dcs_high_tiers = 0
1056 chimeric_dcs = 0 1062 chimeric_dcs = 0
1061 if high_tiers == len(tiers): 1067 if high_tiers == len(tiers):
1062 chimeric_dcs_high_tiers += high_tiers - 1 1068 chimeric_dcs_high_tiers += high_tiers - 1
1063 else: 1069 else:
1064 chimeric_dcs_high_tiers += high_tiers 1070 chimeric_dcs_high_tiers += high_tiers
1065 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) 1071 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
1072
1073 # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4
1074 sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]])
1075 if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0:
1076 tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"]
1077 tier_dict[key1]["tier 4"] = 0
1078 lines = change_tier_after_print[key1]
1079 l_i = 0
1080 for li in lines:
1081 row = li[0]
1082 new_line = li[1]
1083 if l_i == 0:
1084 new_line[1] == "2.5"
1085 ws1.write_row(row, 0, new_line)
1086 else:
1087 ws1.write_row(row + 1, 0, new_line)
1088
1089 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1090 {'type': 'formula',
1091 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
1092 'format': format1,
1093 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1094 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1095 {'type': 'formula',
1096 'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
1097 'format': format3,
1098 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1099 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
1100 {'type': 'formula',
1101 'criteria': '=$B${}>="3"'.format(row + 1),
1102 'format': format2,
1103 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
1104
1105 l_i += 1
1106
1066 # sheet 2 1107 # sheet 2
1067 if chimera_correction: 1108 if chimera_correction:
1068 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 1109 header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',
1069 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5', 1110 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', 'tier 2.5',
1070 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', 1111 'tier 3.1', 'tier 3.2', 'tier 4', 'tier 5.1', 'tier 5.2', 'tier 5.3', 'tier 5.4', 'tier 5.5', 'tier 6', 'tier 7', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2',