Mercurial > repos > jjohnson > pileup_to_vcf
comparison pileup_to_vcf.py @ 4:fafa105e5f58
Fix base quality
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 19 Mar 2013 13:04:40 -0500 |
parents | aa76c8dd97e6 |
children | d90f56ecd2f9 |
comparison
equal
deleted
inserted
replaced
3:aa76c8dd97e6 | 4:fafa105e5f58 |
---|---|
153 qdp = 0 # read coverage depth with base call quality passing minimum | 153 qdp = 0 # read coverage depth with base call quality passing minimum |
154 variants = dict() # variant -> count | 154 variants = dict() # variant -> count |
155 mc = 0 # match count | 155 mc = 0 # match count |
156 while bi < len(bases): | 156 while bi < len(bases): |
157 base = bases[bi] | 157 base = bases[bi] |
158 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) | 158 # if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi,base,qi,quals[qi],ord(quals[qi]) - 33) |
159 bi += 1 | 159 bi += 1 |
160 if base in '<>' : #reference skip (between paired reads) | 160 if base in '<>' : #reference skip (between paired reads) |
161 qi += 1 | 161 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) |
162 pass; | 162 qi += 1 |
163 elif base in '.,' : # match reference on forward/reverse strand | 163 elif base in '.,' : # match reference on forward/reverse strand |
164 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) | |
164 mc += 1 | 165 mc += 1 |
165 adp += 1 | 166 adp += 1 |
166 qual = ord(quals[qi]) -33 | 167 qual = ord(quals[qi]) -33 |
167 qi += 1 | 168 qi += 1 |
168 # count match | 169 # count match |
169 if qual >= min_base_quality: | 170 if qual >= min_base_quality: |
170 qdp += 1 | 171 qdp += 1 |
171 elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand | 172 elif base in 'ACGTNacgtn' : # SNP on forward/reverse strand |
173 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) | |
172 adp += 1 | 174 adp += 1 |
173 qual = ord(quals[qi]) -33 | 175 qual = ord(quals[qi]) -33 |
174 qi += 1 | 176 qi += 1 |
175 # record snp variant | 177 # record snp variant |
176 if qual >= min_base_quality: | 178 if qual >= min_base_quality: |
181 m = re.match(indel_len_pattern,bases[bi:]) | 183 m = re.match(indel_len_pattern,bases[bi:]) |
182 indel_len_str = m.group(0) # get the indel len string | 184 indel_len_str = m.group(0) # get the indel len string |
183 bi += len(indel_len_str) # increment the index by the len of the len string | 185 bi += len(indel_len_str) # increment the index by the len of the len string |
184 indel_len = int(indel_len_str) | 186 indel_len = int(indel_len_str) |
185 indel = bases[bi:bi+indel_len].upper() # get the indel bases | 187 indel = bases[bi:bi+indel_len].upper() # get the indel bases |
188 if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-2,base,indel) | |
186 bi += indel_len # increment the index by the len of the indel | 189 bi += indel_len # increment the index by the len of the indel |
187 if base == '+': | 190 if base == '+': |
188 # record insertion variant | 191 # record insertion variant |
189 if indel in insertions: | 192 if indel in insertions: |
190 insertions[indel] += 1 | 193 insertions[indel] += 1 |
196 if indel in deletions: | 199 if indel in deletions: |
197 deletions[indel] += 1 | 200 deletions[indel] += 1 |
198 else: | 201 else: |
199 deletions[indel] = 1 | 202 deletions[indel] = 1 |
200 elif base == '*' : # a deletion from a prior base | 203 elif base == '*' : # a deletion from a prior base |
204 if debug: print >> sys.stderr, "%3d\t%s\t%3d\t%s %3d" % (bi-1,base,qi,quals[qi],ord(quals[qi]) - 33) | |
201 qi += 1 | 205 qi += 1 |
202 elif base == '^' : #start of read (followed by read quality char) | 206 elif base == '^' : #start of read (followed by read quality char) |
207 if debug: print >> sys.stderr, "%3d\t%s%s" % (bi-1,base,bases[bi]) | |
203 read_mapping_qual = ord(bases[bi]) - 33 | 208 read_mapping_qual = ord(bases[bi]) - 33 |
204 bi += 1 | 209 bi += 1 |
205 elif base == '$' : #end of read | 210 elif base == '$' : #end of read |
211 if debug: print >> sys.stderr, "%3d\t%s" % (bi-1,base) | |
206 pass # no associated quality in either bases or quals | 212 pass # no associated quality in either bases or quals |
207 dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp | 213 dp = rdp if dp_as == 'ref' else qdp if dp_as == 'qual' else sdp if dp_as == 'source' else adp |
208 if debug: print >> sys.stderr, "match count: %d" % mc | 214 if debug: print >> sys.stderr, "match count: %d" % mc |
209 if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions) | 215 if debug: print >> sys.stderr, "snps: %s\tins: %s\tdel: %s" % (snps,insertions,deletions) |
210 if dp < max(1,min_coverage) : | 216 if dp < max(1,min_coverage) : |