diff mut2sscs.py @ 6:11a2a34f8a2b draft

planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Mon, 18 Jan 2021 09:49:15 +0000
parents e5953c54cfb5
children ded0dc6a20d3
line wrap: on
line diff
--- a/mut2sscs.py	Tue Oct 27 12:46:55 2020 +0000
+++ b/mut2sscs.py	Mon Jan 18 09:49:15 2021 +0000
@@ -27,12 +27,13 @@
 
 import numpy as np
 import pysam
+from cyvcf2 import VCF
 
 
 def make_argparser():
-    parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
+    parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
     parser.add_argument('--mutFile',
-                        help='TABULAR file with DCS mutations.')
+                        help='VCR file with DCS mutations.')
     parser.add_argument('--bamFile',
                         help='BAM file with aligned SSCS reads.')
     parser.add_argument('--outputJson',
@@ -54,74 +55,77 @@
     if os.path.isfile(file2) is False:
         sys.exit("Error: Could not find '{}'".format(file2))
 
-    # 1. read mut file
-    with open(file1, 'r') as mut:
-        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
-
-    # 2 read SSCS bam file
-    # pysam.index(file2)
+    # read SSCS bam file
+#    pysam.index(file2)
     bam = pysam.AlignmentFile(file2, "rb")
 
     # get tags
     mut_pos_dict = {}
     ref_pos_dict = {}
-    if mut_array.shape == (1,13):
-        mut_array = mut_array.reshape((1, len(mut_array)))
 
-    for m in range(0, len(mut_array[:, 0])):
-        print(str(m + 1) + " of " + str(len(mut_array[:, 0])))
-        chrom = mut_array[m, 1]
-        stop_pos = mut_array[m, 2].astype(int)
+    for variant in VCF(file1):
+        chrom = variant.CHROM
+        stop_pos = variant.start
         chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
-        ref = mut_array[m, 9]
-        alt = mut_array[m, 10]
+        ref = variant.REF
+        alt = variant.ALT[0]
+#        nc = variant.format('NC')
+        ad = variant.format('AD')
+
+        if len(ref) == len(alt):
 
-        for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=1000000000):
-            if pileupcolumn.reference_pos == stop_pos - 1:
-                count_alt = 0
-                count_ref = 0
-                count_indel = 0
-                print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
-                      "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
-                for pileupread in pileupcolumn.pileups:
-                    if not pileupread.is_del and not pileupread.is_refskip:
-                        tag = pileupread.alignment.query_name
-                        abba = tag[-2:]
-                        # query position is None if is_del or is_refskip is set.
-                        if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
-                            count_alt += 1
-                            if chrom_stop_pos in mut_pos_dict:
-                                if abba in mut_pos_dict[chrom_stop_pos]:
-                                    mut_pos_dict[chrom_stop_pos][abba] += 1
+            for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
+                if pileupcolumn.reference_pos == stop_pos:
+                    count_alt = 0
+                    count_ref = 0
+                    count_indel = 0
+                    print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
+                          "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
+                    for pileupread in pileupcolumn.pileups:
+                        if not pileupread.is_del and not pileupread.is_refskip:
+                            tag = pileupread.alignment.query_name
+                            abba = tag[-2:]
+                            # query position is None if is_del or is_refskip is set.
+                            if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
+                                count_alt += 1
+                                if chrom_stop_pos in mut_pos_dict:
+                                    if abba in mut_pos_dict[chrom_stop_pos]:
+                                        mut_pos_dict[chrom_stop_pos][abba] += 1
+                                    else:
+                                        mut_pos_dict[chrom_stop_pos][abba] = 1
                                 else:
+                                    mut_pos_dict[chrom_stop_pos] = {}
                                     mut_pos_dict[chrom_stop_pos][abba] = 1
-                            else:
-                                mut_pos_dict[chrom_stop_pos] = {}
-                                mut_pos_dict[chrom_stop_pos][abba] = 1
-                        elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
-                            count_ref += 1
-                            if chrom_stop_pos in ref_pos_dict:
-                                if abba in ref_pos_dict[chrom_stop_pos]:
-                                    ref_pos_dict[chrom_stop_pos][abba] += 1
+                                if chrom_stop_pos not in ref_pos_dict:
+                                    ref_pos_dict[chrom_stop_pos] = {}
+                                    ref_pos_dict[chrom_stop_pos][abba] = 0
+
+                            elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
+                                count_ref += 1
+                                if chrom_stop_pos in ref_pos_dict:
+                                    if abba in ref_pos_dict[chrom_stop_pos]:
+                                        ref_pos_dict[chrom_stop_pos][abba] += 1
+                                    else:
+                                        ref_pos_dict[chrom_stop_pos][abba] = 1
                                 else:
+                                    ref_pos_dict[chrom_stop_pos] = {}
                                     ref_pos_dict[chrom_stop_pos][abba] = 1
                             else:
-                                ref_pos_dict[chrom_stop_pos] = {}
-                                ref_pos_dict[chrom_stop_pos][abba] = 1
-                        else:
-                            count_indel += 1
+                                count_indel += 1
 
-                print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" %
-                      (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel))
+                    print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" %
+                          (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel))
 
-        # if mutation is in DCS file but not in SSCS, then set counts to NA
-        if chrom_stop_pos not in mut_pos_dict.keys():
-            mut_pos_dict[chrom_stop_pos] = {}
-            mut_pos_dict[chrom_stop_pos]["ab"] = 0
-            mut_pos_dict[chrom_stop_pos]["ba"] = 0
-            ref_pos_dict[chrom_stop_pos] = {}
-            ref_pos_dict[chrom_stop_pos]["ab"] = 0
-            ref_pos_dict[chrom_stop_pos]["ba"] = 0
+            # if mutation is in DCS file but not in SSCS, then set counts to NA
+            if chrom_stop_pos not in mut_pos_dict.keys():
+                mut_pos_dict[chrom_stop_pos] = {}
+                mut_pos_dict[chrom_stop_pos]["ab"] = 0
+                mut_pos_dict[chrom_stop_pos]["ba"] = 0
+                ref_pos_dict[chrom_stop_pos] = {}
+                ref_pos_dict[chrom_stop_pos]["ab"] = 0
+                ref_pos_dict[chrom_stop_pos]["ba"] = 0
+        else:
+            print("indels are currently not evaluated")
     bam.close()
 
     # save counts
@@ -131,3 +135,4 @@
 
 if __name__ == '__main__':
     sys.exit(mut2sscs(sys.argv))
+