diff read2mut.py @ 7:ded0dc6a20d3 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Mon, 25 Jan 2021 13:21:55 +0000
parents 11a2a34f8a2b
children ced1a529e7cd
line wrap: on
line diff
--- a/read2mut.py	Mon Jan 18 09:49:15 2021 +0000
+++ b/read2mut.py	Mon Jan 25 13:21:55 2021 +0000
@@ -130,11 +130,11 @@
         #    break
         chrom = variant.CHROM
         stop_pos = variant.start
-        chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
+        #chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
         ref = variant.REF
         alt = variant.ALT[0]
-#        nc = variant.format('NC')
-        ad = variant.format('AD')
+        chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt
+
         if len(ref) == len(alt):
             mut_array.append([chrom, stop_pos, ref, alt])
             i += 1
@@ -216,12 +216,12 @@
     # create pure_tags_dict
     pure_tags_dict = {}
     for key1, value1 in sorted(mut_dict.items()):
-    	if len(np.where(np.array(['#'.join(str(i) for i in z)
-                               for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0:
-    		continue
+    	#if len(np.where(np.array(['#'.join(str(i) for i in z)
+        #                       for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0]) == 0:
+    #		continue
 
         i = np.where(np.array(['#'.join(str(i) for i in z)
-                               for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
+                               for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
         ref = mut_array[i, 2]
         alt = mut_array[i, 3]
         pure_tags_dict[key1] = {}
@@ -310,7 +310,7 @@
         chimeric_tag = {}
         if key1 in pure_tags_dict_short.keys():
             i = np.where(np.array(['#'.join(str(i) for i in z)
-                                   for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
+                                   for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
             ref = mut_array[i, 2]
             alt = mut_array[i, 3]
             dcs_median = cvrg_dict[key1][2]
@@ -929,7 +929,7 @@
                             counter_tier6 += 1
                             tier_dict[key1]["tier 6"] += 1
 
-                        chrom, pos = re.split(r'\#', key1)
+                        chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
                         var_id = '-'.join([chrom, str(int(pos)+1), ref, alt])
                         sample_tag = key2[:-5]
                         array2 = np.unique(whole_array)  # remove duplicate sequences to decrease running time
@@ -1067,10 +1067,10 @@
     for key1, value1 in sorted(tier_dict.items()):
         if key1 in pure_tags_dict_short.keys():
             i = np.where(np.array(['#'.join(str(i) for i in z)
-                                   for z in zip(mut_array[:, 0], mut_array[:, 1])]) == key1)[0][0]
+                                   for z in zip(mut_array[:, 0], mut_array[:, 1], mut_array[:, 2], mut_array[:, 3])]) == key1)[0][0]
             ref = mut_array[i, 2]
             alt = mut_array[i, 3]
-            chrom, pos = re.split(r'\#', key1)
+            chrom, pos, ref_a, alt_a = re.split(r'\#', key1)
             ref_count = cvrg_dict[key1][0]
             alt_count = cvrg_dict[key1][1]
             cvrg = ref_count + alt_count
@@ -1154,11 +1154,11 @@
                          ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"),
                          ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"),
                          ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"),
-                         ("Tier 5.1", "variants is close to softclipping in both mates"),
-                         ("Tier 5.2", "variants is close to softclipping in one of the mates"),
-                         ("Tier 5.3", "variants is close to softclipping in one of the SSCS of both mates"),
-                         ("Tier 5.4", "variants is close to softclipping in one mate (no information of second mate"),
-                         ("Tier 5.5", "variants is close to softclipping in one of the SSCS (no information of the second mate"),
+                         ("Tier 5.1", "variant is close to softclipping in both mates"),
+                         ("Tier 5.2", "variant is close to softclipping in one of the mates"),
+                         ("Tier 5.3", "variant is close to softclipping in one of the SSCS of both mates"),
+                         ("Tier 5.4", "variant is close to softclipping in one mate (no information of second mate"),
+                         ("Tier 5.5", "variant is close to softclipping in one of the SSCS (no information of the second mate"),
                          ("Tier 6", "remaining variants")]
     examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289",
                         "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0",