2
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os, sys, string, shutil, subprocess, tempfile, re
|
|
4
|
|
5 def cleanup(tmpdir):
|
|
6 # cleanup
|
|
7 try:
|
|
8 if os.path.exists( tmpdir ):
|
|
9 shutil.rmtree( tmpdir )
|
|
10 except Exception, e:
|
|
11 stop_err( 'Error cleaning. ' + str( e ) )
|
|
12
|
|
13 # In the unlikely event of a fire, please use the nearest emergency exit
|
|
14 def stop_err( msg ):
|
|
15 sys.stderr.write( '%s\n' % msg )
|
|
16 sys.exit()
|
|
17
|
|
18 def __main__():
|
|
19 tooldir = os.path.dirname(sys.argv[0])
|
|
20 executable = tooldir + "/jemultiplexer_embase_1.0.4_bundle.jar"
|
|
21 if not os.path.exists(executable):
|
|
22 stop_err( "The file \"%s\" was not found. " % ( executable ) )
|
|
23 mpxdata = sys.argv[1]
|
|
24 output1 = sys.argv[2]
|
|
25 output1id = sys.argv[3]
|
|
26 barcodes = sys.argv[4]
|
|
27 barcode_list = sys.argv[5]
|
|
28 newfilepath = sys.argv[6]
|
|
29 extension = sys.argv[7]
|
|
30 barlen = sys.argv[8]
|
|
31 qualityFormat = sys.argv[9]
|
|
32 maxMismatches = sys.argv[10]
|
|
33 minBaseQuality = sys.argv[11]
|
|
34 minMismatchingDelta = sys.argv[12]
|
|
35 xTrimLen = sys.argv[13]
|
|
36 zTrimLen = sys.argv[14]
|
|
37 clipBarcode = sys.argv[15]
|
|
38 addBarcodeToHeader = sys.argv[16]
|
|
39 gzipOutput = sys.argv[17]
|
|
40 barcodeDiagFile = sys.argv[18]
|
|
41 rChar = sys.argv[19]
|
|
42 barcodeReadPos = sys.argv[20]
|
|
43 barcodeForSampleMatching = sys.argv[21]
|
|
44 redundantBarcode = sys.argv[22]
|
|
45 strict = sys.argv[23]
|
|
46 MpxData2 = sys.argv[24]
|
|
47
|
|
48 ## create the tmpdir & co
|
|
49 tmpdir = newfilepath + "/demultiplex_" + output1id
|
|
50 oldnames=[]
|
|
51 stat = tmpdir+"/jemultiplexer_out_stats.txt" #the default output stat file name
|
|
52 try:
|
|
53 if not os.path.isdir(tmpdir):
|
|
54 os.mkdir(tmpdir)
|
|
55 except Exception, e:
|
|
56 stop_err( "Error creating directory \"%s\". %s" % ( tmpdir, str( e ) ) )
|
|
57
|
|
58 #output file extension
|
|
59 if gzipOutput == "true":
|
|
60 outputExtension=".gz"
|
|
61 else:
|
|
62 outputExtension=""
|
|
63
|
|
64 # Reconstructing the output file names as jemultiplexer writes them
|
|
65 if MpxData2 != "single":
|
|
66 oldnames.append('unassigned_1.txt' + outputExtension)
|
|
67 oldnames.append('unassigned_2.txt' + outputExtension)
|
|
68 else:
|
|
69 oldnames.append('unassigned_1.txt' + outputExtension)
|
|
70 if barcodeDiagFile=="true":
|
|
71 oldnames.append('barcode_match_report.txt')
|
|
72 if barcode_list == "none": # If a .bs file was given
|
|
73 bc = open(barcodes, "r")
|
|
74 bcw = open( tmpdir + "/barcodes.txt", "w" )
|
|
75 for line in bc.readlines():
|
|
76 l = line.split()
|
|
77 if l[0] != "":
|
|
78 if MpxData2 != "single":
|
|
79 oldnames.append(l[0]+'_'+l[1]+'_1.txt' + outputExtension)
|
|
80 oldnames.append(l[0]+'_'+l[1]+'_2.txt' + outputExtension)
|
|
81 bcw.write(l[0] + "\t" + l[1] + "\t" + l[0]+'_'+l[1]+'_1.txt' + outputExtension + "\t" + l[0]+'_'+l[1]+'_2.txt' + outputExtension + "\n")
|
|
82 else:
|
|
83 oldnames.append(l[0]+'_'+l[1]+'_1.txt' + outputExtension)
|
|
84 bcw.write(l[0] + "\t" + l[1] + "\t" + l[0]+'_'+l[1]+'_1.txt' + outputExtension + "\n")
|
|
85 bc.close()
|
|
86 bcw.close()
|
|
87 elif barcodes == "none": # If the text area was used
|
|
88 lines = barcode_list.split("__cr____cn__")
|
|
89 #bc = csv.writer( open( tmpdir + "/barcodes.txt", "w" ), delimiter = "\t" )
|
|
90 bc = open( tmpdir + "/barcodes.txt", "w" )
|
|
91 for l in lines:
|
|
92 l = l.replace("__tc__", " ").split()
|
|
93 if len(l) < 2:
|
|
94 continue
|
|
95 if l[0] != "":
|
|
96 if MpxData2 != "single":
|
|
97 oldnames.append(l[0]+'_'+l[1]+'_1.txt' + outputExtension)
|
|
98 oldnames.append(l[0]+'_'+l[1]+'_2.txt' + outputExtension)
|
|
99 bc.write(l[0] + "\t" + l[1] + "\t" + l[0]+'_'+l[1]+'_1.txt' + outputExtension + "\t" + l[0]+'_'+l[1]+'_2.txt' + outputExtension + "\n")
|
|
100 else:
|
|
101 oldnames.append(l[0]+'_'+l[1]+'_1.txt' + outputExtension)
|
|
102 bc.write(l[0] + "\t" + l[1] + "\t" + l[0]+'_'+l[1]+'_1.txt' + outputExtension + "\n")
|
|
103 bc.close()
|
|
104
|
|
105 ## Building the command line
|
|
106 cmd = "java -Xmx8g -jar " + executable
|
|
107 cmd+= " F1="
|
|
108 cmd+= mpxdata
|
|
109 ## if we have PE data
|
|
110 if MpxData2 != "single":
|
|
111 cmd+= " F2="
|
|
112 cmd+= MpxData2
|
|
113 cmd+= " BPOS="
|
|
114 cmd+= barcodeReadPos
|
|
115 cmd+= " BRED="
|
|
116 cmd+= redundantBarcode
|
|
117 cmd+= " BM="
|
|
118 cmd+= barcodeForSampleMatching
|
|
119 cmd+= " S="
|
|
120 cmd+= strict
|
|
121 cmd+= " BF="
|
|
122 cmd+= tmpdir + "/barcodes.txt"
|
|
123 cmd+= " OUTPUT_DIR=\""
|
|
124 cmd+= tmpdir
|
|
125 cmd+= "\""
|
|
126 cmd+= " BCLEN="
|
|
127 cmd+= barlen
|
|
128 cmd+= " QUALITY_FORMAT="
|
|
129 cmd+= qualityFormat
|
|
130 cmd+= " MAX_MISMATCHES="
|
|
131 cmd+= maxMismatches
|
|
132 cmd+= " MIN_BASE_QUALITY="
|
|
133 cmd+= minBaseQuality
|
|
134 cmd+= " MMD="
|
|
135 cmd+= minMismatchingDelta
|
|
136 cmd+= " XT="
|
|
137 cmd+= xTrimLen
|
|
138 cmd+= " ZT="
|
|
139 cmd+= zTrimLen
|
|
140 cmd+= " C="
|
|
141 cmd+= clipBarcode
|
|
142 cmd+= " ADD="
|
|
143 cmd+= addBarcodeToHeader
|
|
144 cmd+= " GZ="
|
|
145 cmd+= gzipOutput
|
|
146 if rChar=="2":
|
|
147 cmd+= " RCHAR="
|
|
148 cmd+= ":"
|
|
149 elif rChar=="3":
|
|
150 cmd+= " RCHAR="
|
|
151 cmd+= "_"
|
|
152 elif rChar=="4":
|
|
153 cmd+= " RCHAR="
|
|
154 cmd+= "-"
|
|
155 if barcodeDiagFile=="true":
|
|
156 cmd+= " BARCODE_DIAG_FILE="
|
|
157 cmd+= 'barcode_match_report.txt'
|
|
158
|
|
159
|
|
160 # Executing jemultiplexer
|
|
161 # status = os.system(cmd)
|
|
162 try:
|
|
163 tmperr = tempfile.NamedTemporaryFile( dir=tmpdir ).name
|
|
164 tmpout = tempfile.NamedTemporaryFile( dir=tmpdir ).name
|
|
165 tmp_stderr = open( tmperr, 'wb' )
|
|
166 tmp_stdout = open( tmpout, 'wb' )
|
|
167 proc = subprocess.Popen( args=cmd,
|
|
168 shell=True,
|
|
169 cwd=tmpdir,
|
|
170 stdout=tmp_stdout.fileno(),
|
|
171 stderr=tmp_stderr.fileno() )
|
|
172 returncode = proc.wait()
|
|
173 tmp_stderr.close()
|
|
174 tmp_stdout.close()
|
|
175 # get stderr, allowing for case where it's very large
|
|
176 tmp_stderr = open( tmperr, 'rb' )
|
|
177 stderr = ''
|
|
178 buffsize = 1048576
|
|
179 try:
|
|
180 while True:
|
|
181 stderr += tmp_stderr.read( buffsize )
|
|
182 if not stderr or len( stderr ) % buffsize != 0:
|
|
183 break
|
|
184 except OverflowError:
|
|
185 pass
|
|
186 tmp_stderr.close()
|
|
187 if returncode != 0:
|
|
188 raise Exception, stderr
|
|
189 except Exception, e:
|
|
190 #cleanup(tmpdir)
|
|
191 stop_err( 'Error demultiplexing sequence. ' + str( e ) )
|
|
192
|
|
193 # Creating the required paths for multiple outputs
|
|
194 newnames=[]
|
|
195 if os.path.isdir(tmpdir):
|
|
196 for f in oldnames:
|
|
197 tmpf = tmpdir+"/"+f
|
|
198 if os.path.isfile(tmpf):
|
|
199 # check the size
|
|
200 if os.path.getsize(tmpf) == 0:
|
|
201 stop_err( 'The output file: ' + f + ' is empty, there may be an error with your barcode file or settings.')
|
|
202 name = f
|
|
203 s = "primary_"
|
|
204 s+= output1id
|
|
205 s+= "_"
|
|
206 s+= string.replace(name, "_", "-")
|
|
207 s+= "_visible_"
|
|
208 if extension == "fastqillumina":
|
|
209 if gzipOutput == "true":
|
|
210 s+="gz"
|
|
211 else:
|
|
212 s+= extension
|
|
213 else:
|
|
214 if f=="barcode_match_report.txt":
|
|
215 s+="text"
|
|
216 else:
|
|
217 if gzipOutput == "true":
|
|
218 s+="gz"
|
|
219 else:
|
|
220 s+="fastqsanger"
|
|
221 newnames.append(newfilepath+"/"+s)
|
|
222 # Adding the appropriate prefixes to the old filenames
|
|
223 for i in range(len(oldnames)):
|
|
224 oldnames[i] = tmpdir+"/"+oldnames[i]
|
|
225
|
|
226 ### NUMSTAT rewriting ###
|
|
227 htmlout = open(output1, "w")
|
|
228 numstat = open(stat, "r")
|
|
229 htmlout.write("<html><head><title>numStat</title></head><body><!--\n")
|
|
230 for l in numstat.readlines():
|
|
231 htmlout.write(l)
|
|
232 numstat.seek(0)
|
|
233 htmlout.write("-->\n<h2>Please refresh your history to display the new datasets</h2>\n<h3>numStat</h3><table border=\"1\">\n")
|
|
234 htmlout.write("<tr><td>%s</td></tr>\n" % (cmd ))
|
|
235 for l in numstat.readlines():
|
|
236 l = l.split()
|
|
237 htmlout.write("<tr><td>%s</td><td>%s</td></tr>\n" % (l[0], l[1]) )
|
|
238 htmlout.write("</table></body></html>")
|
|
239 numstat.close()
|
|
240 htmlout.close()
|
|
241
|
|
242 #~ # Moving the first file (the mandatory output file defined in the xml (the txt stats))
|
|
243 #~ shutil.move(stat,output1)
|
|
244 #~
|
|
245 #~ # add a warning to the numStat
|
|
246 #~ statfh = open(stat,'a')
|
|
247 #~ statfh.write("\nRefresh you library to see the new DataSets.\n")
|
|
248 #~ statfh.close()
|
|
249
|
|
250 # Moving everything where it will be seen properly by Galaxy
|
|
251 for i in range(len(oldnames)):
|
|
252 shutil.move(oldnames[i],newnames[i])
|
|
253
|
|
254 # cleanup
|
|
255 cleanup(tmpdir)
|
|
256
|
|
257 # Ta-da!
|
|
258 if __name__=="__main__": __main__()
|