comparison pileup_to_vcf.py @ 4:fafa105e5f58

Fix base quality
author Jim Johnson <jj@umn.edu>
date Tue, 19 Mar 2013 13:04:40 -0500
parents aa76c8dd97e6
children d90f56ecd2f9
comparison
equal deleted inserted replaced
3:aa76c8dd97e6 4:fafa105e5f58
153 qdp = 0 # read coverage depth with base call quality passing minimum 153 qdp = 0 # read coverage depth with base call quality passing minimum
154 variants = dict() # variant -> count 154 variants = dict() # variant -> count
155 mc = 0 # match count 155 mc = 0 # match count
156 while bi < len(bases): 156 while bi < len(bases):
157 base = bases[bi] 157 base = bases[bi]
158 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) 158 # if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33)
159 bi += 1 159 bi += 1
160 if base in '<>' : #reference skip (between paired reads) 160 if base in '<>' : #reference skip (between paired reads)
161 qi += 1 161 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
162 pass; 162 qi += 1
163 elif base in '.,' : # match reference on forward/reverse strand 163 elif base in '.,' : # match reference on forward/reverse strand
164 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
164 mc += 1 165 mc += 1
165 adp += 1 166 adp += 1
166 qual = ord(quals[qi]) -33 167 qual = ord(quals[qi]) -33
167 qi += 1 168 qi += 1
168 # count match 169 # count match
169 if qual >= min_base_quality: 170 if qual >= min_base_quality:
170 qdp += 1 171 qdp += 1
171 elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand 172 elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand
173 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
172 adp += 1 174 adp += 1
173 qual = ord(quals[qi]) -33 175 qual = ord(quals[qi]) -33
174 qi += 1 176 qi += 1
175 # record snp variant 177 # record snp variant
176 if qual >= min_base_quality: 178 if qual >= min_base_quality:
181 m = re.match(indel_len_pattern,bases[bi:]) 183 m = re.match(indel_len_pattern,bases[bi:])
182 indel_len_str = m.group(0) # get the indel len string 184 indel_len_str = m.group(0) # get the indel len string
183 bi += len(indel_len_str) # increment the index by the len of the len string 185 bi += len(indel_len_str) # increment the index by the len of the len string
184 indel_len = int(indel_len_str) 186 indel_len = int(indel_len_str)
185 indel = bases[bi:bi+indel_len].upper() # get the indel bases 187 indel = bases[bi:bi+indel_len].upper() # get the indel bases
188 if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-2,base,indel)
186 bi += indel_len # increment the index by the len of the indel 189 bi += indel_len # increment the index by the len of the indel
187 if base == '+': 190 if base == '+':
188 # record insertion variant 191 # record insertion variant
189 if indel in insertions: 192 if indel in insertions:
190 insertions[indel] += 1 193 insertions[indel] += 1
196 if indel in deletions: 199 if indel in deletions:
197 deletions[indel] += 1 200 deletions[indel] += 1
198 else: 201 else:
199 deletions[indel] = 1 202 deletions[indel] = 1
200 elif base == '*' : # a deletion from a prior base 203 elif base == '*' : # a deletion from a prior base
204 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33)
201 qi += 1 205 qi += 1
202 elif base == '^' : #start of read (followed by read quality char) 206 elif base == '^' : #start of read (followed by read quality char)
207 if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-1,base,bases[bi])
203 read_mapping_qual = ord(bases[bi]) - 33 208 read_mapping_qual = ord(bases[bi]) - 33
204 bi += 1 209 bi += 1
205 elif base == '$' : #end of read 210 elif base == '$' : #end of read
211 if debug: print >> sys.stderr, "%3d\t%s" % (bi-1,base)
206 pass # no associated quality in either bases or quals 212 pass # no associated quality in either bases or quals
207 dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp 213 dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp
208 if debug: print >> sys.stderr, "match count: %d" % mc 214 if debug: print >> sys.stderr, "match count: %d" % mc
209 if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions) 215 if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions)
210 if dp < max(1,min_coverage) : 216 if dp < max(1,min_coverage) :