view tools/cgatools17/vcf_to_listVariants.py @ 11:9c23244ced42 draft

planemo upload for repository https://bitbucket.org/EMCbioinf/galaxy-tool-shed-tools commit c475b4222a15cdadc6085865f4d13426249fec25-dirty
author yhoogstrate
date Wed, 11 Nov 2015 04:12:43 -0500
parents 3a2e0f376f26
children
line wrap: on
line source

#!/usr/bin/env python

"""[License: GNU General Public License v3 (GPLv3)]
 
 This is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 
 This is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.

 Documentation as defined by:
 <http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
"""

import sys,os,os.path,argparse,textwrap,datetime

def convert_vcf_to_listVariants(filename_in, filename_out):
	if(filename_out == "-"):
		fh_out = sys.stdout
	else:
		fh_out = open(filename_out,"w")
	
	i = 1
	
	fh_out.write("variantId\tchromosome\tbegin\tend\tvarType\treference\talleleSeq\txRef\n")
	
	with open(filename_in, 'r') as fh_in:
		for line in fh_in:
			line_s = line.strip()
			if((len(line_s) >= 6) and line_s[0] != "#"):
				params = line.split("\t")
				
				if(len(params[4]) == 1):# single base substitution
					fh_out.write(str(i))									# id
					
					fh_out.write("\t"+params[0])							# chr
					fh_out.write("\t"+str(int(params[1])-1))				# begin
					fh_out.write("\t"+params[1])							# end
					fh_out.write("\tsnp")
					fh_out.write("\t"+params[3])							# reference
					fh_out.write("\t"+params[4])							# alleleSeq
					fh_out.write("\t"+params[2])							# dbsnpid / annotation id
					
					fh_out.write("\n")
					i += 1
				# else: # indel...
	
	if(filename_out != "-"):
		fh_out.close()

if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	
	parser.add_argument("-i","--input", help="input  file (VCF)")
	parser.add_argument("-o","--output",help="output filename; '-' for stdout",default="-")
	
	args = parser.parse_args()
	
	convert_vcf_to_listVariants(args.input,args.output)