Mercurial > repos > saskia-hiltemann > cgatools_v17
diff tools/cgatools17/vcf_to_listVariants.py @ 1:3a2e0f376f26 draft
Minor change to tv2vcf.xml to allow for workflow automation
author | dgdekoning |
---|---|
date | Wed, 21 Oct 2015 10:09:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/cgatools17/vcf_to_listVariants.py Wed Oct 21 10:09:15 2015 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +"""[License: GNU General Public License v3 (GPLv3)] + + This is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + Documentation as defined by: + <http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms> +""" + +import sys,os,os.path,argparse,textwrap,datetime + +def convert_vcf_to_listVariants(filename_in, filename_out): + if(filename_out == "-"): + fh_out = sys.stdout + else: + fh_out = open(filename_out,"w") + + i = 1 + + fh_out.write("variantId\tchromosome\tbegin\tend\tvarType\treference\talleleSeq\txRef\n") + + with open(filename_in, 'r') as fh_in: + for line in fh_in: + line_s = line.strip() + if((len(line_s) >= 6) and line_s[0] != "#"): + params = line.split("\t") + + if(len(params[4]) == 1):# single base substitution + fh_out.write(str(i)) # id + + fh_out.write("\t"+params[0]) # chr + fh_out.write("\t"+str(int(params[1])-1)) # begin + fh_out.write("\t"+params[1]) # end + fh_out.write("\tsnp") + fh_out.write("\t"+params[3]) # reference + fh_out.write("\t"+params[4]) # alleleSeq + fh_out.write("\t"+params[2]) # dbsnpid / annotation id + + fh_out.write("\n") + i += 1 + # else: # indel... + + if(filename_out != "-"): + fh_out.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("-i","--input", help="input file (VCF)") + parser.add_argument("-o","--output",help="output filename; '-' for stdout",default="-") + + args = parser.parse_args() + + convert_vcf_to_listVariants(args.input,args.output)