Mercurial > repos > jjohnson > pileup_to_vcf
diff pileup_to_vcf.py @ 4:fafa105e5f58
Fix base quality
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 19 Mar 2013 13:04:40 -0500 |
parents | aa76c8dd97e6 |
children | d90f56ecd2f9 |
line wrap: on
line diff
--- a/pileup_to_vcf.py Tue Mar 19 12:49:45 2013 -0500 +++ b/pileup_to_vcf.py Tue Mar 19 13:04:40 2013 -0500 @@ -155,12 +155,13 @@ mc = 0 # match count while bi < len(bases): base = bases[bi] - if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) + # if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) bi += 1 if base in '<>' : #reference skip (between paired reads) + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) qi += 1 - pass; elif base in '.,' : # match reference on forward/reverse strand + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) mc += 1 adp += 1 qual = ord(quals[qi]) -33 @@ -169,6 +170,7 @@ if qual >= min_base_quality: qdp += 1 elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) adp += 1 qual = ord(quals[qi]) -33 qi += 1 @@ -183,6 +185,7 @@ bi += len(indel_len_str) # increment the index by the len of the len string indel_len = int(indel_len_str) indel = bases[bi:bi+indel_len].upper() # get the indel bases + if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-2,base,indel) bi += indel_len # increment the index by the len of the indel if base == '+': # record insertion variant @@ -198,11 +201,14 @@ else: deletions[indel] = 1 elif base == '*' : # a deletion from a prior base + if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) qi += 1 elif base == '^' : #start of read (followed by read quality char) + if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-1,base,bases[bi]) read_mapping_qual = ord(bases[bi]) - 33 bi += 1 elif base == '$' : #end of read + if debug: print >> sys.stderr, "%3d\t%s" % (bi-1,base) pass # no associated quality in either bases or quals dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp if debug: print >> sys.stderr, "match count: %d" % mc