Mercurial > repos > rreumerman > snptools
view snpsplit.py @ 5:b6786c2247b1 draft
Uploaded
author | rreumerman |
---|---|
date | Fri, 05 Apr 2013 05:05:30 -0400 |
parents | bd5692103d5b |
children |
line wrap: on
line source
'''This script takes a tab-delimited file containting position, ref base, mut base and splits any multicharacter ref or mut base entries into seperate lines and calculating the new positions''' import sys if len(sys.argv) != 3: exit("snpsplit takes exactly two arguments (input and output file), no more and no less") input_name = sys.argv[1] output_name = sys.argv[2] try: in_file = open(input_name) except IOError as e: exit("Error trying to open '"+input_name+"': {1}".format(e.errno, e.strerror)) try: out_file = open(output_name, 'w') except IOError as e: exit("Error trying to open '"+output_name+"': {1}".format(e.errno, e.strerror)) def splitter(cells): global out_lines for i in range(0,len(cells[1])): if cells[1][i] == cells[2][i]: continue out_file.write(str(int(cells[0])+i)+'\t'+cells[1][i]+'\t'+cells[2][i]+'\n') out_lines += 1 in_lines=out_lines=0 out_file.write("Position\tRef\tMut\n") for line in in_file: in_lines += 1 cells = line.rstrip().split('\t') if not str(line[0]).isdigit(): out_file.write(line) continue # Can only deal with SNPs/MNPs, not indels. if len(cells[1]) != len(cells[2]): continue splitter(cells) in_file.close() out_file.close() print "Lines read: %s" % in_lines print "Lines printed: %s" % out_lines