diff read2mut.py @ 48:e2a655533077 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Wed, 10 Mar 2021 14:37:20 +0000
parents edf8596463a8
children aa45100f5b14
line wrap: on
line diff
--- a/read2mut.py	Tue Mar 09 12:48:29 2021 +0000
+++ b/read2mut.py	Wed Mar 10 14:37:20 2021 +0000
@@ -310,6 +310,7 @@
     row = 1
     tier_dict = {}
     chimera_dict = {}
+    change_tier_after_print = {}
     for key1, value1 in sorted(mut_dict.items()):
         counts_mut = 0
         chimeric_tag_list = []
@@ -828,7 +829,7 @@
                             details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3)
 
 
-                        sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]])
+                        #sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]])
 
                         # assign tiers
                         if ((all(int(ij) >= 3 for ij in [total1new, total4new]) &
@@ -898,10 +899,10 @@
                             counter_tier32 += 1
                             tier_dict[key1]["tier 3.2"] += 1
 
-                        elif (trimmed) and (sum_highTiers > 1):
-                            tier = "2.5"
-                            counter_tier25 += 1
-                            tier_dict[key1]["tier 2.5"] += 1
+                        #elif (trimmed) and (sum_highTiers > 1):
+                        #    tier = "2.5"
+                        #    counter_tier25 += 1
+                        #    tier_dict[key1]["tier 2.5"] += 1
 
                         elif (trimmed):
                             tier = "4"
@@ -1031,8 +1032,8 @@
                             read_pos3 = read_len_median3 = None
                         line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
                         ws1.write_row(row, 0, line)
-                        line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
-                        ws1.write_row(row + 1, 0, line)
+                        line2 = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
+                        ws1.write_row(row + 1, 0, line2)
 
                         ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
                                                {'type': 'formula',
@@ -1049,6 +1050,11 @@
                                                 'criteria': '=$B${}>="3"'.format(row + 1),
                                                 'format': format2,
                                                 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                        if trimmed:
+                            if key1 not in list(mut_read_pos_dict.keys()):
+                                change_tier_after_print[key1] = [((row, line), (row, line2))]
+                            else:
+                                change_tier_after_print[key1].append(((row, line), (row, line2)))
 
                         row += 3
             if chimera_correction:
@@ -1063,6 +1069,41 @@
                     else:
                         chimeric_dcs_high_tiers += high_tiers
                 chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers)
+
+            # move tier 4 counts to tier 2.5 if there other mutations with tier <= 2.4
+            sum_highTiers = sum([tier_dict[key1][ij] for ij in tier_dict[key1].keys()[:6]])
+            if tier_dict[key1]["tier 4"] > 0 and sum_highTiers > 0:
+                tier_dict[key1]["tier 2.5"] = tier_dict[key1]["tier 4"]
+                tier_dict[key1]["tier 4"] = 0
+                lines = change_tier_after_print[key1]
+                l_i = 0
+                for li in lines:
+                    row = li[0]
+                    new_line = li[1]
+                    if l_i == 0:
+                        new_line[1] == "2.5"
+                        ws1.write_row(row, 0, new_line)
+                    else:
+                        ws1.write_row(row + 1, 0, new_line)
+
+                    ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                                               {'type': 'formula',
+                                                'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
+                                                'format': format1,
+                                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                    ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                                               {'type': 'formula',
+                                                'criteria': '=OR($B${}="2.1", $B${}="2.2", $B${}="2.3", $B${}="2.4", $B${}="2.5")'.format(row + 1, row + 1, row + 1, row + 1, row + 1),
+                                                'format': format3,
+                                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+                    ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
+                                               {'type': 'formula',
+                                                'criteria': '=$B${}>="3"'.format(row + 1),
+                                                'format': format2,
+                                                'multi_range': 'L{}:M{} T{}:U{} B{}'.format(row + 1, row + 2, row + 1, row + 2, row + 1, row + 2)})
+
+                    l_i += 1
+
     # sheet 2
     if chimera_correction:
         header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)',