diff pileup_to_vcf.py @ 9:c0a6e8f595ec default tip

Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift
author Jim Johnson <jj@umn.edu>
date Thu, 11 Apr 2013 10:28:10 -0500
parents e77ab15bbce9
children
line wrap: on
line diff
--- a/pileup_to_vcf.py	Thu Mar 28 14:55:50 2013 -0500
+++ b/pileup_to_vcf.py	Thu Apr 11 10:28:10 2013 -0500
@@ -34,7 +34,7 @@
 
 vcf_header =  """\
 ##fileformat=VCFv4.0
-##source=pileup_to_vcf.pyV1.1
+##source=pileup_to_vcf.pyV1.2
 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">
 ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\">
 ##FILTER=<ID=DP,Description=\"Minimum depth of %s\">
@@ -55,6 +55,8 @@
   parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' )
   parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' )
   parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' )
+  # ID to use 
+  parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' )
   # select columns
   parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' )
   parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' )
@@ -117,6 +119,8 @@
   else:
     outputFile = sys.stdout
 
+  vcf_id = options.id if options.id else "."
+
   indel_len_pattern = '([1-9][0-9]*)'
   ref_skip_pattern = '[<>]'
 
@@ -251,7 +255,6 @@
             alts.append(vcf_ref[:len(vcf_ref) - len(k)])   # TODO alt will be a substring of vcf_ref,  test this
             safs.append(saf)
       if len(alts) > 0:
-        vcf_id = "."
         vcf_qual = "." 
         vcf_filter = "PASS"
         # if not allow_multiples, report only the most freq alt