Mercurial > repos > jjohnson > pileup_to_vcf
diff pileup_to_vcf.py @ 9:c0a6e8f595ec default tip
Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 11 Apr 2013 10:28:10 -0500 |
parents | e77ab15bbce9 |
children |
line wrap: on
line diff
--- a/pileup_to_vcf.py Thu Mar 28 14:55:50 2013 -0500 +++ b/pileup_to_vcf.py Thu Apr 11 10:28:10 2013 -0500 @@ -34,7 +34,7 @@ vcf_header = """\ ##fileformat=VCFv4.0 -##source=pileup_to_vcf.pyV1.1 +##source=pileup_to_vcf.pyV1.2 ##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\"> ##INFO=<ID=SAF,Number=.,Type=Float,Description=\"Specific Allele Frequency\"> ##FILTER=<ID=DP,Description=\"Minimum depth of %s\"> @@ -55,6 +55,8 @@ parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) + # ID to use + parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' ) # select columns parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) @@ -117,6 +119,8 @@ else: outputFile = sys.stdout + vcf_id = options.id if options.id else "." + indel_len_pattern = '([1-9][0-9]*)' ref_skip_pattern = '[<>]' @@ -251,7 +255,6 @@ alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this safs.append(saf) if len(alts) > 0: - vcf_id = "." vcf_qual = "." vcf_filter = "PASS" # if not allow_multiples, report only the most freq alt