Mercurial > repos > rreumerman > snptools
diff snpsplit.py @ 4:bd5692103d5b draft
Uploaded
author | rreumerman |
---|---|
date | Fri, 05 Apr 2013 05:00:40 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snpsplit.py Fri Apr 05 05:00:40 2013 -0400 @@ -0,0 +1,45 @@ +'''This script takes a tab-delimited file containting position, ref base, mut base and splits any multicharacter ref or mut base entries into seperate lines and calculating the new positions''' + +import sys + +if len(sys.argv) != 3: + exit("snpsplit takes exactly two arguments (input and output file), no more and no less") + +input_name = sys.argv[1] +output_name = sys.argv[2] + +try: + in_file = open(input_name) +except IOError as e: + exit("Error trying to open '"+input_name+"': {1}".format(e.errno, e.strerror)) + +try: + out_file = open(output_name, 'w') +except IOError as e: + exit("Error trying to open '"+output_name+"': {1}".format(e.errno, e.strerror)) + +def splitter(cells): + global out_lines + for i in range(0,len(cells[1])): + if cells[1][i] == cells[2][i]: continue + out_file.write(str(int(cells[0])+i)+'\t'+cells[1][i]+'\t'+cells[2][i]+'\n') + out_lines += 1 + +in_lines=out_lines=0 +out_file.write("Position\tRef\tMut\n") +for line in in_file: + in_lines += 1 + cells = line.rstrip().split('\t') + if not str(line[0]).isdigit(): + out_file.write(line) + continue + + # Can only deal with SNPs/MNPs, not indels. + if len(cells[1]) != len(cells[2]): continue + splitter(cells) + +in_file.close() +out_file.close() + +print "Lines read: %s" % in_lines +print "Lines printed: %s" % out_lines