Mercurial > repos > superyuan > refeditor
changeset 1:85bdf226b67b default tip
add vcf2genotypes and test-data
author | superyuan <shuaiyuan.emory@gmail.com> |
---|---|
date | Mon, 30 Jun 2014 10:49:54 -0400 |
parents | 9259f2939357 |
children | |
files | test-data/hg19test.fa test-data/test.fastq test-data/test.vcf tool_conf.xml tools/refeditor/vcf2genotypes.py tools/refeditor/vcf2genotypes.xml |
diffstat | 6 files changed, 109 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hg19test.fa Mon Jun 30 10:49:54 2014 -0400 @@ -0,0 +1,12 @@ +>chr1 +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +TCAGTGTCCTCCCATGGATGTTAATGTTCTCCATATGATGTCAGTGTCCT +CCATATGATGTCAGTGTCCTCCATATGACATCAATATCCTCCATATGATA +TCAATATCCTCTGTATTGATATTGATATTGATATTTGGAGGATATCAATA +TCCTCCAAATGATGTCAGTGTCCTCCATATGATGTCAATGTCCTCCATAT +GATGTCAATATCCTCCGTATGATGTCAATATCCTCCGTATGATGTCAATA +TCCTCCATATGATGTCAGTGTCCTCTGTATGACATCAATATCCTCCATAC +GATGCCCCTGTCCTTCATATGATGTCAGTGTCCTTTTGTGAGCACCAGTG +TCCTTTGTATGACATCAGTAGTCTCCCATGAATGTCACTGTCTTCCCATA +GATGTCAGTGTCCTCTccaaaagacaagcagaagctgttcatggaatgat +gtgggggaaccttccagaaagaggcaacatcatgtgctaaggtcccaggt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.fastq Mon Jun 30 10:49:54 2014 -0400 @@ -0,0 +1,4 @@ +@ERR000038.1 BGI-FC302WFAAXX_5_1_1204:1431 +CCTTCATATGATGTCAGAGTCCTCTGTATGACCTCA ++ +IIIIII$IIII(IIIIIIIIIIIII2@IIIII:I>&
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.vcf Mon Jun 30 10:49:54 2014 -0400 @@ -0,0 +1,8 @@ +##fileformat=VCFv4.0 +##fileDate=20110201 +##reference=NCBI37 +##source=Affymetrix_Axiom_DB +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA18501 NA18502 NA18504 NA18505 NA18507 NA18508 NA18516 NA18517 NA18522 NA18523 NA18852 NA18853 NA18855 NA18856 NA18858 NA18859 NA18861 NA18862 NA18870 NA18871 NA18912 NA18913 NA19092 NA19093 NA19098 NA19099 NA19101 NA19102 NA19116 NA19119 NA19127 NA19128 NA19130 NA19131 NA19137 NA19138 NA19140 NA19141 NA19143 NA19144 NA19152 NA19153 NA19159 NA19160 NA19171 NA19172 NA19192 NA19193 NA19200 NA19201 NA19203 NA19204 NA19206 NA19207 NA19209 NA19210 NA19222 NA19223 NA19238 NA19239 +1 305 rs5 C T . PASS . GT 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/0 0/1 0/1 0/0 +1 334 . A C . PASS . GT 0/0 0/0 ./. 0/0 ./. 0/0 ./. ./. 0/0 ./. 0/0 0/0 ./. 0/0 ./. 0/0 0/0 0/0 ./. ./. 0/0 ./. 0/0 0/0 ./. 0/0 ./. ./. ./. 0/0 0/0 ./. 0/0 0/0 0/0 ./. 0/0 0/0 0/0 ./. 0/0 ./. ./. 0/0 ./. 0/0 ./. 0/0 0/0 0/0 ./. ./. 0/0 0/0 0/0 ./. 0/0 0/0 0/1 0/0
--- a/tool_conf.xml Thu Jun 12 04:56:10 2014 -0400 +++ b/tool_conf.xml Mon Jun 30 10:49:54 2014 -0400 @@ -1,6 +1,7 @@ <?xml version="1.0"?> <toolbox> <section name="Reference Editor" id="refeditor"> + <tool file="refeditor/vcf2genotypes.xml" /> <tool file="refeditor/DiploidConstructor.xml" /> <tool file="refeditor/MappingConverter.xml" /> </section>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/refeditor/vcf2genotypes.py Mon Jun 30 10:49:54 2014 -0400 @@ -0,0 +1,66 @@ +# Filename: vcf2genotypes.py +# Author: Shuai Yuan +# Version: 06/29/2012 +# +# This script is a wrapper for vcf2genotypes +# +# vcf2genotypes is launched based on these inputs: +# -i input VCF file +# -p target individual +# -o output genotypes file + +import sys +import os +import re +import string +import commands +from tempfile import NamedTemporaryFile + +# This function is exceedingly useful, perhaps package for reuse? +def getopts(argv): + opts = {} + while argv: + if argv[0][0] == '-': + opts[argv[0]] = argv[1] + argv = argv[2:] + else: + argv = argv[1:] + return opts + +def main(): + args = sys.argv[1:] + + try: + opts = getopts(args) + except IndexError: + print "Usage:" + return 0 + + vcf = opts.get("-i") + if vcf == None: + print "No input VCF file specified." + return -1 + + individual = opts.get("-p") + if individual == None: + print "No valid individual specified." + return -2 + + outputfile = opts.get("-o") + if outputfile == None: + print "No output file specified." + return -6 + + +# All inputs have been specified at this point, now validate. + + #generate command + commandline = "vcf2genotypes %s %s > %s " % (vcf, individual, outputfile) + #run + errorcode, stdout = commands.getstatusoutput(commandline) + + #return error code + return errorcode + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/refeditor/vcf2genotypes.xml Mon Jun 30 10:49:54 2014 -0400 @@ -0,0 +1,18 @@ +<tool id="vcf2genotypes1" name="vcf2genotypes" version="1.0.1"> + <description>Extracts genotypes for a particular individual from VCF File</description> + <command interpreter="python">vcf2genotypes.py + -i '$input' + -p '$person' + -o $out_file1 + </command> + <inputs> + <param format="vcf" name="input" type="data" label="Input VCF File"/> + <param format="txt" name="person" type="text" value="" label="Target individual (e.g. NA19238)"/> + + </inputs> + <outputs> + <data format="genotypes" name="out_file1" metadata_source="input"/> + </outputs> + <help> +</help> +</tool>