4
|
1 '''This script takes a tab-delimited file containting position, ref base, mut base and splits any multicharacter ref or mut base entries into seperate lines and calculating the new positions'''
|
|
2
|
|
3 import sys
|
|
4
|
|
5 if len(sys.argv) != 3:
|
|
6 exit("snpsplit takes exactly two arguments (input and output file), no more and no less")
|
|
7
|
|
8 input_name = sys.argv[1]
|
|
9 output_name = sys.argv[2]
|
|
10
|
|
11 try:
|
|
12 in_file = open(input_name)
|
|
13 except IOError as e:
|
|
14 exit("Error trying to open '"+input_name+"': {1}".format(e.errno, e.strerror))
|
|
15
|
|
16 try:
|
|
17 out_file = open(output_name, 'w')
|
|
18 except IOError as e:
|
|
19 exit("Error trying to open '"+output_name+"': {1}".format(e.errno, e.strerror))
|
|
20
|
|
21 def splitter(cells):
|
|
22 global out_lines
|
|
23 for i in range(0,len(cells[1])):
|
|
24 if cells[1][i] == cells[2][i]: continue
|
|
25 out_file.write(str(int(cells[0])+i)+'\t'+cells[1][i]+'\t'+cells[2][i]+'\n')
|
|
26 out_lines += 1
|
|
27
|
|
28 in_lines=out_lines=0
|
|
29 out_file.write("Position\tRef\tMut\n")
|
|
30 for line in in_file:
|
|
31 in_lines += 1
|
|
32 cells = line.rstrip().split('\t')
|
|
33 if not str(line[0]).isdigit():
|
|
34 out_file.write(line)
|
|
35 continue
|
|
36
|
|
37 # Can only deal with SNPs/MNPs, not indels.
|
|
38 if len(cells[1]) != len(cells[2]): continue
|
|
39 splitter(cells)
|
|
40
|
|
41 in_file.close()
|
|
42 out_file.close()
|
|
43
|
|
44 print "Lines read: %s" % in_lines
|
|
45 print "Lines printed: %s" % out_lines
|