# HG changeset patch # User Jim Johnson # Date 1365694090 18000 # Node ID c0a6e8f595ec207b624b5fe5e31fdf7b215cb573 # Parent 07cd87e94fbec9a50c9ddfd24d69c5f0f4fe0ca8 Add option to set VCF ID field value, this can be used to ID germline variants for SnpSift diff -r 07cd87e94fbe -r c0a6e8f595ec README --- a/README Thu Mar 28 14:55:50 2013 -0500 +++ b/README Thu Apr 11 10:28:10 2013 -0500 @@ -2,4 +2,4 @@ Filters on read coverage, base quality and the frequency of a variant. The VCF info is populated with a SAF tag that give the specific frequency (0 - 1) of a variant being observed. This is used for filtering in the mmuff ( Missense Mutation and Frameshift Finder) workflow. - +The VCF ID field can be optionally set, which can be used with SnpSift annotate or SnpSift filter. diff -r 07cd87e94fbe -r c0a6e8f595ec pileup_to_vcf.py --- a/pileup_to_vcf.py Thu Mar 28 14:55:50 2013 -0500 +++ b/pileup_to_vcf.py Thu Apr 11 10:28:10 2013 -0500 @@ -34,7 +34,7 @@ vcf_header = """\ ##fileformat=VCFv4.0 -##source=pileup_to_vcf.pyV1.1 +##source=pileup_to_vcf.pyV1.2 ##INFO= ##INFO= ##FILTER= @@ -55,6 +55,8 @@ parser.add_option( '-f', '--min_allele_freq', type='float', default='.5', dest='min_allele_freq', help='The minimum frequency of an allele for it to be reported (default .5)' ) parser.add_option( '-m', '--allow_multiples', action="store_true", dest='allow_multiples', default=False, help='Allow multiple alleles to be reported' ) parser.add_option( '-s', '--snps_only', action="store_true", dest='snps_only', default=False, help='Only report SNPs, not indels' ) + # ID to use + parser.add_option( '-I', '--id', dest='id', default=None, help='The value for the VCF ID field' ) # select columns parser.add_option( '-C', '--chrom_col', type='int', default='1', dest='chrom_col', help='The ordinal position (starting with 1) of the chromosome column' ) parser.add_option( '-P', '--pos_col', type='int', default='2', dest='pos_col', help='The ordinal position (starting with 1) of the position column' ) @@ -117,6 +119,8 @@ else: outputFile = sys.stdout + vcf_id = options.id if options.id else "." + indel_len_pattern = '([1-9][0-9]*)' ref_skip_pattern = '[<>]' @@ -251,7 +255,6 @@ alts.append(vcf_ref[:len(vcf_ref) - len(k)]) # TODO alt will be a substring of vcf_ref, test this safs.append(saf) if len(alts) > 0: - vcf_id = "." vcf_qual = "." vcf_filter = "PASS" # if not allow_multiples, report only the most freq alt diff -r 07cd87e94fbe -r c0a6e8f595ec pileup_to_vcf.xml --- a/pileup_to_vcf.xml Thu Mar 28 14:55:50 2013 -0500 +++ b/pileup_to_vcf.xml Thu Apr 11 10:28:10 2013 -0500 @@ -1,4 +1,4 @@ - + Converts a pileup to VCF with filtering pileup_to_vcf.py -i $input_file -o $output_file #if $min_cvrg.__str__ != '': @@ -15,6 +15,9 @@ #end if $allow_multiples $snps_only + #if $vcf_id.__str__ != '': + --id $vcf_id + #end if #if $cols.select_order == 'yes' : #if $chrom_col.__str__ != '': --chrom_col $chrom_col @@ -65,6 +68,9 @@ + + ^\S*$ +