Mercurial > repos > jjohnson > pileup_to_vcf
comparison pileup_to_vcf.py @ 9:c0a6e8f595ec default tip
Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 11 Apr 2013 10:28:10 -0500 |
parents | e77ab15bbce9 |
children |
comparison
equal
deleted
inserted
replaced
8:07cd87e94fbe | 9:c0a6e8f595ec |
---|---|
32 import optparse | 32 import optparse |
33 from optparse import OptionParser | 33 from optparse import OptionParser |
34 | 34 |
35 vcf_header = """\ | 35 vcf_header = """\ |
36 ##fileformat=VCFv4.0 | 36 ##fileformat=VCFv4.0 |
37 ##source=pileup_to_vcf.pyV1.1 | 37 ##source=pileup_to_vcf.pyV1.2 |
38 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\"> | 38 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\"> |
39 ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\"> | 39 ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\"> |
40 ##FILTER=<ID=DP,Description=\"Minimum depth of %s\"> | 40 ##FILTER=<ID=DP,Description=\"Minimum depth of %s\"> |
41 ##FILTER=<ID=SAF,Description=\"Allele frequency of at least %s with base quality minimum %d\"> | 41 ##FILTER=<ID=SAF,Description=\"Allele frequency of at least %s with base quality minimum %d\"> |
42 #CHROM POS ID REF ALT QUAL FILTER INFO\ | 42 #CHROM POS ID REF ALT QUAL FILTER INFO\ |
53 parser.add_option( '-r', '--report_depth', type='choice', choices=['source','ref','qual','all'], default='ref', dest='report_depth', help='Coverage depth as: source - as reported in the ipleup source, ref - reads rcovering base, qual - reads with base call above min_base_quality, all - reads spanning base (includes indels)' ) | 53 parser.add_option( '-r', '--report_depth', type='choice', choices=['source','ref','qual','all'], default='ref', dest='report_depth', help='Coverage depth as: source - as reported in the ipleup source, ref - reads rcovering base, qual - reads with base call above min_base_quality, all - reads spanning base (includes indels)' ) |
54 parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' ) | 54 parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' ) |
55 parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) | 55 parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) |
56 parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) | 56 parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) |
57 parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) | 57 parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) |
58 # ID to use | |
59 parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' ) | |
58 # select columns | 60 # select columns |
59 parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) | 61 parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) |
60 parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) | 62 parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) |
61 parser.add_option( '-R', '--ref_col', type='int', default='3', dest='ref_col', help='The ordinal position (starting with 1) of the reference base column' ) | 63 parser.add_option( '-R', '--ref_col', type='int', default='3', dest='ref_col', help='The ordinal position (starting with 1) of the reference base column' ) |
62 parser.add_option( '-D', '--coverage_col', type='int', default='4', dest='coverage_col', help='The ordinal position (starting with 1) of the read coverage depth column' ) | 64 parser.add_option( '-D', '--coverage_col', type='int', default='4', dest='coverage_col', help='The ordinal position (starting with 1) of the read coverage depth column' ) |
114 except Exception, e: | 116 except Exception, e: |
115 print >> sys.stderr, "failed: %s" % e | 117 print >> sys.stderr, "failed: %s" % e |
116 exit(3) | 118 exit(3) |
117 else: | 119 else: |
118 outputFile = sys.stdout | 120 outputFile = sys.stdout |
121 | |
122 vcf_id = options.id if options.id else "." | |
119 | 123 |
120 indel_len_pattern = '([1-9][0-9]*)' | 124 indel_len_pattern = '([1-9][0-9]*)' |
121 ref_skip_pattern = '[<>]' | 125 ref_skip_pattern = '[<>]' |
122 | 126 |
123 SAF_header = "SAF"; | 127 SAF_header = "SAF"; |
249 saf = v/float(dp) | 253 saf = v/float(dp) |
250 if saf >= min_allele_freq: | 254 if saf >= min_allele_freq: |
251 alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this | 255 alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this |
252 safs.append(saf) | 256 safs.append(saf) |
253 if len(alts) > 0: | 257 if len(alts) > 0: |
254 vcf_id = "." | |
255 vcf_qual = "." | 258 vcf_qual = "." |
256 vcf_filter = "PASS" | 259 vcf_filter = "PASS" |
257 # if not allow_multiples, report only the most freq alt | 260 # if not allow_multiples, report only the most freq alt |
258 if not allow_multiples and len(alts) > 1: | 261 if not allow_multiples and len(alts) > 1: |
259 max_saf = max(safs) | 262 max_saf = max(safs) |