Mercurial > repos > jjohnson > pileup_to_vcf
comparison pileup_to_vcf.py @ 9:c0a6e8f595ec default tip
Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift
| author | Jim Johnson <jj@umn.edu> |
|---|---|
| date | Thu, 11 Apr 2013 10:28:10 -0500 |
| parents | e77ab15bbce9 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:07cd87e94fbe | 9:c0a6e8f595ec |
|---|---|
| 32 import optparse | 32 import optparse |
| 33 from optparse import OptionParser | 33 from optparse import OptionParser |
| 34 | 34 |
| 35 vcf_header = """\ | 35 vcf_header = """\ |
| 36 ##fileformat=VCFv4.0 | 36 ##fileformat=VCFv4.0 |
| 37 ##source=pileup_to_vcf.pyV1.1 | 37 ##source=pileup_to_vcf.pyV1.2 |
| 38 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\"> | 38 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\"> |
| 39 ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\"> | 39 ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\"> |
| 40 ##FILTER=<ID=DP,Description=\"Minimum depth of %s\"> | 40 ##FILTER=<ID=DP,Description=\"Minimum depth of %s\"> |
| 41 ##FILTER=<ID=SAF,Description=\"Allele frequency of at least %s with base quality minimum %d\"> | 41 ##FILTER=<ID=SAF,Description=\"Allele frequency of at least %s with base quality minimum %d\"> |
| 42 #CHROM POS ID REF ALT QUAL FILTER INFO\ | 42 #CHROM POS ID REF ALT QUAL FILTER INFO\ |
| 53 parser.add_option( '-r', '--report_depth', type='choice', choices=['source','ref','qual','all'], default='ref', dest='report_depth', help='Coverage depth as: source - as reported in the ipleup source, ref - reads rcovering base, qual - reads with base call above min_base_quality, all - reads spanning base (includes indels)' ) | 53 parser.add_option( '-r', '--report_depth', type='choice', choices=['source','ref','qual','all'], default='ref', dest='report_depth', help='Coverage depth as: source - as reported in the ipleup source, ref - reads rcovering base, qual - reads with base call above min_base_quality, all - reads spanning base (includes indels)' ) |
| 54 parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' ) | 54 parser.add_option( '-b', '--min_base_quality', type='int', default='0', dest='min_base_quality', help='The minimum base quality for a base call to be counted' ) |
| 55 parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) | 55 parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) |
| 56 parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) | 56 parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) |
| 57 parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) | 57 parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) |
| 58 # ID to use | |
| 59 parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' ) | |
| 58 # select columns | 60 # select columns |
| 59 parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) | 61 parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) |
| 60 parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) | 62 parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) |
| 61 parser.add_option( '-R', '--ref_col', type='int', default='3', dest='ref_col', help='The ordinal position (starting with 1) of the reference base column' ) | 63 parser.add_option( '-R', '--ref_col', type='int', default='3', dest='ref_col', help='The ordinal position (starting with 1) of the reference base column' ) |
| 62 parser.add_option( '-D', '--coverage_col', type='int', default='4', dest='coverage_col', help='The ordinal position (starting with 1) of the read coverage depth column' ) | 64 parser.add_option( '-D', '--coverage_col', type='int', default='4', dest='coverage_col', help='The ordinal position (starting with 1) of the read coverage depth column' ) |
| 114 except Exception, e: | 116 except Exception, e: |
| 115 print >> sys.stderr, "failed: %s" % e | 117 print >> sys.stderr, "failed: %s" % e |
| 116 exit(3) | 118 exit(3) |
| 117 else: | 119 else: |
| 118 outputFile = sys.stdout | 120 outputFile = sys.stdout |
| 121 | |
| 122 vcf_id = options.id if options.id else "." | |
| 119 | 123 |
| 120 indel_len_pattern = '([1-9][0-9]*)' | 124 indel_len_pattern = '([1-9][0-9]*)' |
| 121 ref_skip_pattern = '[<>]' | 125 ref_skip_pattern = '[<>]' |
| 122 | 126 |
| 123 SAF_header = "SAF"; | 127 SAF_header = "SAF"; |
| 249 saf = v/float(dp) | 253 saf = v/float(dp) |
| 250 if saf >= min_allele_freq: | 254 if saf >= min_allele_freq: |
| 251 alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this | 255 alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this |
| 252 safs.append(saf) | 256 safs.append(saf) |
| 253 if len(alts) > 0: | 257 if len(alts) > 0: |
| 254 vcf_id = "." | |
| 255 vcf_qual = "." | 258 vcf_qual = "." |
| 256 vcf_filter = "PASS" | 259 vcf_filter = "PASS" |
| 257 # if not allow_multiples, report only the most freq alt | 260 # if not allow_multiples, report only the most freq alt |
| 258 if not allow_multiples and len(alts) > 1: | 261 if not allow_multiples and len(alts) > 1: |
| 259 max_saf = max(safs) | 262 max_saf = max(safs) |
