diff pileup_to_vcf.py @ 4:fafa105e5f58

Fix base quality
author Jim Johnson <jj@umn.edu>
date Tue, 19 Mar 2013 13:04:40 -0500
parents aa76c8dd97e6
children d90f56ecd2f9
line wrap: on
line diff
--- a/pileup_to_vcf.py	Tue Mar 19 12:49:45 2013 -0500
+++ b/pileup_to_vcf.py	Tue Mar 19 13:04:40 2013 -0500
@@ -155,12 +155,13 @@
       mc = 0 # match count
       while bi < len(bases):
         base = bases[bi] 
-        if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33)
+        # if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33)
         bi += 1
         if base in '<>' :	#reference skip (between paired reads)
+          if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
           qi += 1
-          pass;
         elif base in '.,' : # match reference on forward/reverse strand
+          if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
           mc += 1
           adp += 1
           qual = ord(quals[qi]) -33
@@ -169,6 +170,7 @@
           if qual >= min_base_quality:
             qdp += 1
         elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand
+          if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
           adp += 1
           qual = ord(quals[qi]) -33
           qi += 1
@@ -183,6 +185,7 @@
           bi += len(indel_len_str) # increment the index by the len of the len string
           indel_len = int(indel_len_str)
           indel = bases[bi:bi+indel_len].upper() # get the indel bases
+          if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-2,base,indel)
           bi += indel_len # increment the index by the len of the indel 
           if base == '+':
             # record insertion variant
@@ -198,11 +201,14 @@
             else:
               deletions[indel] = 1
         elif base == '*' : # a deletion from a prior base
+          if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
           qi += 1
         elif base == '^' : #start of read (followed by read quality char)
+          if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-1,base,bases[bi])
           read_mapping_qual = ord(bases[bi]) - 33 
           bi += 1
         elif base == '$' : #end of read
+          if debug: print >> sys.stderr, "%3d\t%s" % (bi-1,base)
           pass # no associated quality in either bases or quals
       dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp
       if debug: print >> sys.stderr, "match count: %d" % mc