annotate tools/solid_tools/maq_cs_wrapper.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 #Guruprasad Ananda
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 #MAQ mapper for SOLiD colourspace-reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 import sys, os, zipfile, tempfile, subprocess
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 sys.stderr.write( "%s\n" % msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 out_fname = sys.argv[1].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 out_f2 = open(sys.argv[2].strip(),'r+')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 ref_fname = sys.argv[3].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 f3_read_fname = sys.argv[4].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 f3_qual_fname = sys.argv[5].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 paired = sys.argv[6]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 if paired == 'yes':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 r3_read_fname = sys.argv[7].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 r3_qual_fname = sys.argv[8].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 min_mapqual = int(sys.argv[9].strip())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 max_mismatch = int(sys.argv[10].strip())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 out_f3name = sys.argv[11].strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 subprocess_dict = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 ref_csfa = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 ref_bfa = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 ref_csbfa = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 cmd2_1 = 'maq fasta2csfa %s > %s 2>&1' %(ref_fname,ref_csfa.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 cmd2_2 = 'maq fasta2bfa %s %s 2>&1' %(ref_csfa.name,ref_csbfa.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 cmd2_3 = 'maq fasta2bfa %s %s 2>&1' %(ref_fname,ref_bfa.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 os.system(cmd2_1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 os.system(cmd2_2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 os.system(cmd2_3)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 except Exception, erf:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 stop_err(str(erf)+"Error processing reference sequence")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 if paired == 'yes': #paired end reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 tmpf = tempfile.NamedTemporaryFile() #forward reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 tmpr = tempfile.NamedTemporaryFile() #reverse reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 tmps = tempfile.NamedTemporaryFile() #single reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 tmpffastq = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 tmprfastq = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 tmpsfastq = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 cmd1 = "solid2fastq_modified.pl 'yes' %s %s %s %s %s %s %s 2>&1" %(tmpf.name,tmpr.name,tmps.name,f3_read_fname,f3_qual_fname,r3_read_fname,r3_qual_fname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 os.system(cmd1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 os.system('gunzip -c %s >> %s' %(tmpf.name,tmpffastq.name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 os.system('gunzip -c %s >> %s' %(tmpr.name,tmprfastq.name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 os.system('gunzip -c %s >> %s' %(tmps.name,tmpsfastq.name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 except Exception, eq:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 stop_err("Error converting data to fastq format." + str(eq))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 #make a temp directory where the split fastq files will be stored
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 split_dir = tempfile.mkdtemp()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 split_file_prefix_f = tempfile.mktemp(dir=split_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 split_file_prefix_r = tempfile.mktemp(dir=split_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 splitcmd_f = 'split -a 2 -l %d %s %s' %(32000000,tmpffastq.name,split_file_prefix_f) #32M lines correspond to 8M reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 splitcmd_r = 'split -a 2 -l %d %s %s' %(32000000,tmprfastq.name,split_file_prefix_r) #32M lines correspond to 8M reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 os.system(splitcmd_f)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 os.system(splitcmd_r)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 os.chdir(split_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 ii = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 for fastq in os.listdir(split_dir):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 if not fastq.startswith(split_file_prefix_f.split("/")[-1]):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 fastq_r = split_file_prefix_r + fastq.split(split_file_prefix_f.split("/")[-1])[1] #find the reverse strand fastq corresponding to formward strand fastq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 tmpbfq_f = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 tmpbfq_r = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 cmd3 = 'maq fastq2bfq %s %s 2>&1; maq fastq2bfq %s %s 2>&1; maq map -c %s.csmap %s %s %s 1>/dev/null 2>&1; maq mapview %s.csmap > %s.txt' %(fastq,tmpbfq_f.name,fastq_r,tmpbfq_r.name,fastq,ref_csbfa.name,tmpbfq_f.name,tmpbfq_r.name,fastq,fastq)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 subprocess_dict['sp'+str(ii+1)] = subprocess.Popen([cmd3],shell=True,stdout=subprocess.PIPE)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 ii += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 all_done = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 for j,k in enumerate(subprocess_dict.keys()):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 if subprocess_dict['sp'+str(j+1)].wait() != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 err = subprocess_dict['sp'+str(j+1)].communicate()[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 if err != None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 stop_err("Mapping error: %s" %err)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 all_done = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 if all_done:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 cmdout = "for map in *.txt; do cat $map >> %s; done" %(out_fname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 os.system(cmdout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 tmpcsmap = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 cmd_cat_csmap = "for csmap in *.csmap; do cat $csmap >> %s; done" %(tmpcsmap.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 os.system(cmd_cat_csmap)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 tmppileup = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 cmdpileup = "maq pileup -m %s -q %s %s %s > %s" %(max_mismatch,min_mapqual,ref_bfa.name,tmpcsmap.name,tmppileup.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 os.system(cmdpileup)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 tmppileup.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 print >> out_f2, "#chr\tposition\tref_nt\tcoverage\tSNP_count\tA_count\tT_count\tG_count\tC_count"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 for line in file(tmppileup.name):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 elems = line.strip().split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 ref_nt = elems[2].capitalize()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 read_nt = elems[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 coverage = int(elems[3])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 a,t,g,c = 0,0,0,0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 ref_nt_count = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 for ch in read_nt:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 ch = ch.capitalize()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 if ch not in ['A','T','G','C',',','.']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 if ch in [',','.']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 ch = ref_nt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 ref_nt_count += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 nt_ind = ['A','T','G','C'].index(ch)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 if nt_ind == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 a+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 elif nt_ind == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 t+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 elif nt_ind == 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 g+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 c+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 except ValueError, we:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 print >>sys.stderr, we
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 print >> out_f2, "%s\t%s\t%s\t%s\t%s\t%s" %("\t".join(elems[:4]),coverage-ref_nt_count,a,t,g,c)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 except Exception, er2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 stop_err("Encountered error while mapping: %s" %(str(er2)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 else: #single end reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 tmpf = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 tmpfastq = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 cmd1 = "solid2fastq_modified.pl 'no' %s %s %s %s %s %s %s 2>&1" %(tmpf.name,None,None,f3_read_fname,f3_qual_fname,None,None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 os.system(cmd1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 os.system('gunzip -c %s >> %s' %(tmpf.name,tmpfastq.name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 tmpf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 stop_err("Error converting data to fastq format.")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 #make a temp directory where the split fastq files will be stored
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 split_dir = tempfile.mkdtemp()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 split_file_prefix = tempfile.mktemp(dir=split_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 splitcmd = 'split -a 2 -l %d %s %s' %(32000000,tmpfastq.name,split_file_prefix) #32M lines correspond to 8M reads
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 os.system(splitcmd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 os.chdir(split_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 for i,fastq in enumerate(os.listdir(split_dir)):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 tmpbfq = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 cmd3 = 'maq fastq2bfq %s %s 2>&1; maq map -c %s.csmap %s %s 1>/dev/null 2>&1; maq mapview %s.csmap > %s.txt' %(fastq,tmpbfq.name,fastq,ref_csbfa.name,tmpbfq.name,fastq,fastq)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 subprocess_dict['sp'+str(i+1)] = subprocess.Popen([cmd3],shell=True,stdout=subprocess.PIPE)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 all_done = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 for j,k in enumerate(subprocess_dict.keys()):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 if subprocess_dict['sp'+str(j+1)].wait() != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 err = subprocess_dict['sp'+str(j+1)].communicate()[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 if err != None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 stop_err("Mapping error: %s" %err)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 all_done = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 if all_done:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 cmdout = "for map in *.txt; do cat $map >> %s; done" %(out_fname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 os.system(cmdout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 tmpcsmap = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 cmd_cat_csmap = "for csmap in *.csmap; do cat $csmap >> %s; done" %(tmpcsmap.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 os.system(cmd_cat_csmap)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 tmppileup = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 cmdpileup = "maq pileup -m %s -q %s %s %s > %s" %(max_mismatch,min_mapqual,ref_bfa.name,tmpcsmap.name,tmppileup.name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 os.system(cmdpileup)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 tmppileup.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 print >> out_f2, "#chr\tposition\tref_nt\tcoverage\tSNP_count\tA_count\tT_count\tG_count\tC_count"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 for line in file(tmppileup.name):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 elems = line.strip().split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 ref_nt = elems[2].capitalize()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 read_nt = elems[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 coverage = int(elems[3])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 a,t,g,c = 0,0,0,0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 ref_nt_count = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 for ch in read_nt:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 ch = ch.capitalize()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 if ch not in ['A','T','G','C',',','.']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 if ch in [',','.']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 ch = ref_nt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 ref_nt_count += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 nt_ind = ['A','T','G','C'].index(ch)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 if nt_ind == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 a+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 elif nt_ind == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 t+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 elif nt_ind == 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 g+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 c+=1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 print >> out_f2, "%s\t%s\t%s\t%s\t%s\t%s" %("\t".join(elems[:4]),coverage-ref_nt_count,a,t,g,c)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 except Exception, er2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 stop_err("Encountered error while mapping: %s" %(str(er2)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 #Build custom track from pileup
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 chr_list=[]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 out_f2.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 fcov = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 fout_a = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 fout_t = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 fout_g = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 fout_c = tempfile.NamedTemporaryFile()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 fcov.write('''track type=wiggle_0 name="Coverage track" description="Coverage track (from Galaxy)" color=0,0,0 visibility=2\n''')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 fout_a.write('''track type=wiggle_0 name="Track A" description="Track A (from Galaxy)" color=255,0,0 visibility=2\n''')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 fout_t.write('''track type=wiggle_0 name="Track T" description="Track T (from Galaxy)" color=0,255,0 visibility=2\n''')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 fout_g.write('''track type=wiggle_0 name="Track G" description="Track G (from Galaxy)" color=0,0,255 visibility=2\n''')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 fout_c.write('''track type=wiggle_0 name="Track C" description="Track C (from Galaxy)" color=255,0,255 visibility=2\n''')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 for line in out_f2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 if line.startswith("#"):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 elems = line.split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226 chr = elems[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 if chr not in chr_list:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 chr_list.append(chr)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 if not (chr.startswith('chr') or chr.startswith('scaffold')):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 chr = 'chr'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 header = "variableStep chrom=%s" %(chr)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 fcov.write("%s\n" %(header))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 fout_a.write("%s\n" %(header))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 fout_t.write("%s\n" %(header))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 fout_g.write("%s\n" %(header))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 fout_c.write("%s\n" %(header))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 pos = int(elems[1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 cov = int(elems[3])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 a = int(elems[5])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 t = int(elems[6])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 g = int(elems[7])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 c = int(elems[8])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 fcov.write("%s\t%s\n" %(pos,cov))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 a_freq = a*100./cov
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 t_freq = t*100./cov
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 g_freq = g*100./cov
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 c_freq = c*100./cov
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 except ZeroDivisionError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 a_freq=t_freq=g_freq=c_freq=0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 fout_a.write("%s\t%s\n" %(pos,a_freq))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 fout_t.write("%s\t%s\n" %(pos,t_freq))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 fout_g.write("%s\t%s\n" %(pos,g_freq))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 fout_c.write("%s\t%s\n" %(pos,c_freq))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 fcov.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 fout_a.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 fout_g.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 fout_t.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 fout_c.seek(0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 os.system("cat %s %s %s %s %s | cat > %s" %(fcov.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_f3name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 if __name__=="__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 __main__()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270