diff tools/cgatools17/vcf_to_listVariants.py @ 1:3a2e0f376f26 draft

Minor change to tv2vcf.xml to allow for workflow automation
author dgdekoning
date Wed, 21 Oct 2015 10:09:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cgatools17/vcf_to_listVariants.py	Wed Oct 21 10:09:15 2015 -0400
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+
+"""[License: GNU General Public License v3 (GPLv3)]
+ 
+ This is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ Documentation as defined by:
+ <http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
+"""
+
+import sys,os,os.path,argparse,textwrap,datetime
+
+def convert_vcf_to_listVariants(filename_in, filename_out):
+	if(filename_out == "-"):
+		fh_out = sys.stdout
+	else:
+		fh_out = open(filename_out,"w")
+	
+	i = 1
+	
+	fh_out.write("variantId\tchromosome\tbegin\tend\tvarType\treference\talleleSeq\txRef\n")
+	
+	with open(filename_in, 'r') as fh_in:
+		for line in fh_in:
+			line_s = line.strip()
+			if((len(line_s) >= 6) and line_s[0] != "#"):
+				params = line.split("\t")
+				
+				if(len(params[4]) == 1):# single base substitution
+					fh_out.write(str(i))									# id
+					
+					fh_out.write("\t"+params[0])							# chr
+					fh_out.write("\t"+str(int(params[1])-1))				# begin
+					fh_out.write("\t"+params[1])							# end
+					fh_out.write("\tsnp")
+					fh_out.write("\t"+params[3])							# reference
+					fh_out.write("\t"+params[4])							# alleleSeq
+					fh_out.write("\t"+params[2])							# dbsnpid / annotation id
+					
+					fh_out.write("\n")
+					i += 1
+				# else: # indel...
+	
+	if(filename_out != "-"):
+		fh_out.close()
+
+if __name__ == "__main__":
+	parser = argparse.ArgumentParser()
+	
+	parser.add_argument("-i","--input", help="input  file (VCF)")
+	parser.add_argument("-o","--output",help="output filename; '-' for stdout",default="-")
+	
+	args = parser.parse_args()
+	
+	convert_vcf_to_listVariants(args.input,args.output)