Mercurial > repos > artbio > repenrich
comparison RepEnrich.py @ 0:f6f0f1e5e940 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
author | artbio |
---|---|
date | Wed, 02 Aug 2017 05:17:29 -0400 |
parents | |
children | 15e3e29f310e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f6f0f1e5e940 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import csv | |
5 import os | |
6 import shlex | |
7 | |
8 import shutil | |
9 import subprocess | |
10 import sys | |
11 | |
12 import numpy | |
13 | |
14 | |
15 parser = argparse.ArgumentParser(description=''' | |
16 Part II: Conducting the alignments to the psuedogenomes. Before\ | |
17 doing this step you will require 1) a bamfile of the unique\ | |
18 alignments with index 2) a fastq file of the reads mapping to\ | |
19 more than one location. These files can be obtained using the\ | |
20 following bowtie options [EXAMPLE: bowtie -S -m 1\ | |
21 --max multimap.fastq mm9 mate1_reads.fastq] Once you have the\ | |
22 unique alignment bamfile and the reads mapping to more than one\ | |
23 location in a fastq file you can run this step. EXAMPLE: python\ | |
24 master_output.py\ | |
25 /users/nneretti/data/annotation/hg19/hg19_repeatmasker.txt\ | |
26 /users/nneretti/datasets/repeatmapping/POL3/Pol3_human/ | |
27 HeLa_InputChIPseq_Rep1 HeLa_InputChIPseq_Rep1\ | |
28 /users/nneretti/data/annotation/hg19/setup_folder\ | |
29 HeLa_InputChIPseq_Rep1_multimap.fastq\ | |
30 HeLa_InputChIPseq_Rep1.bam''') | |
31 parser.add_argument('--version', action='version', version='%(prog)s 0.1') | |
32 parser.add_argument('annotation_file', action='store', | |
33 metavar='annotation_file', | |
34 help='List RepeatMasker.org annotation file for your\ | |
35 organism. The file may be downloaded from the\ | |
36 RepeatMasker.org website. Example:\ | |
37 /data/annotation/hg19/hg19_repeatmasker.txt') | |
38 parser.add_argument('outputfolder', action='store', metavar='outputfolder', | |
39 help='List folder to contain results.\ | |
40 Example: /outputfolder') | |
41 parser.add_argument('outputprefix', action='store', metavar='outputprefix', | |
42 help='Enter prefix name for data.\ | |
43 Example: HeLa_InputChIPseq_Rep1') | |
44 parser.add_argument('setup_folder', action='store', metavar='setup_folder', | |
45 help='List folder that contains the repeat element\ | |
46 pseudogenomes.\ | |
47 Example: /data/annotation/hg19/setup_folder') | |
48 parser.add_argument('fastqfile', action='store', metavar='fastqfile', | |
49 help='Enter file for the fastq reads that map to multiple\ | |
50 locations. Example: /data/multimap.fastq') | |
51 parser.add_argument('alignment_bam', action='store', metavar='alignment_bam', | |
52 help='Enter bamfile output for reads that map uniquely.\ | |
53 Example /bamfiles/old.bam') | |
54 parser.add_argument('--pairedend', action='store', dest='pairedend', | |
55 default='FALSE', | |
56 help='Designate this option for paired-end sequencing.\ | |
57 Default FALSE change to TRUE') | |
58 parser.add_argument('--collapserepeat', action='store', dest='collapserepeat', | |
59 metavar='collapserepeat', default='Simple_repeat', | |
60 help='Designate this option to generate a collapsed repeat\ | |
61 type. Uncollapsed output is generated in addition to\ | |
62 collapsed repeat type. Simple_repeat is default to\ | |
63 simplify downstream analysis. You can change the\ | |
64 default to another repeat name to collapse a\ | |
65 seperate specific repeat instead or if the name of\ | |
66 Simple_repeat is different for your organism.\ | |
67 Default Simple_repeat') | |
68 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', | |
69 metavar='fastqfile2', default='none', | |
70 help='Enter fastqfile2 when using paired-end option.\ | |
71 Default none') | |
72 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', | |
73 default="1", type=int, | |
74 help='Enter available cpus per node. The more cpus the\ | |
75 faster RepEnrich performs. RepEnrich is designed to\ | |
76 only work on one node. Default: "1"') | |
77 parser.add_argument('--allcountmethod', action='store', dest='allcountmethod', | |
78 metavar='allcountmethod', default="FALSE", | |
79 help='By default the pipeline only outputs the fraction\ | |
80 count method. Consdidered to be the best way to\ | |
81 count multimapped reads. Changing this option will\ | |
82 include the unique count method, a conservative\ | |
83 count, and the total count method, a liberal\ | |
84 counting strategy. Our evaluation of simulated data\ | |
85 indicated fraction counting is best.\ | |
86 Default = FALSE, change to TRUE') | |
87 parser.add_argument('--is_bed', action='store', dest='is_bed', | |
88 metavar='is_bed', default='FALSE', | |
89 help='Is the annotation file a bed file.\ | |
90 This is also a compatible format. The file needs to\ | |
91 be a tab seperated bed with optional fields.\ | |
92 Ex. format: chr\tstart\tend\tName_element\tclass\ | |
93 \tfamily. The class and family should identical to\ | |
94 name_element if not applicable.\ | |
95 Default FALSE change to TRUE') | |
96 args = parser.parse_args() | |
97 | |
98 # parameters | |
99 annotation_file = args.annotation_file | |
100 outputfolder = args.outputfolder | |
101 outputfile_prefix = args.outputprefix | |
102 setup_folder = args.setup_folder | |
103 repeat_bed = setup_folder + os.path.sep + 'repnames.bed' | |
104 unique_mapper_bam = args.alignment_bam | |
105 fastqfile_1 = args.fastqfile | |
106 fastqfile_2 = args.fastqfile2 | |
107 cpus = args.cpus | |
108 b_opt = "-k1 -p " + str(1) + " --quiet" | |
109 simple_repeat = args.collapserepeat | |
110 paired_end = args.pairedend | |
111 allcountmethod = args.allcountmethod | |
112 is_bed = args.is_bed | |
113 | |
114 ############################################################################## | |
115 # check that the programs we need are available | |
116 try: | |
117 subprocess.call(shlex.split("coverageBed -h"), | |
118 stdout=open(os.devnull, 'wb'), | |
119 stderr=open(os.devnull, 'wb')) | |
120 subprocess.call(shlex.split("bowtie --version"), | |
121 stdout=open(os.devnull, 'wb'), | |
122 stderr=open(os.devnull, 'wb')) | |
123 except OSError: | |
124 print("Error: Bowtie or BEDTools not loaded") | |
125 raise | |
126 | |
127 ############################################################################## | |
128 # define a csv reader that reads space deliminated files | |
129 print('Preparing for analysis using RepEnrich...') | |
130 csv.field_size_limit(sys.maxsize) | |
131 | |
132 | |
133 def import_text(filename, separator): | |
134 for line in csv.reader(open(filename), delimiter=separator, | |
135 skipinitialspace=True): | |
136 if line: | |
137 yield line | |
138 | |
139 | |
140 ############################################################################## | |
141 # build dictionaries to convert repclass and rep families' | |
142 if is_bed == "FALSE": | |
143 repeatclass = {} | |
144 repeatfamily = {} | |
145 fin = import_text(annotation_file, ' ') | |
146 x = 0 | |
147 for line in fin: | |
148 if x > 2: | |
149 classfamily = [] | |
150 classfamily = line[10].split(os.path.sep) | |
151 line9 = line[9].replace("(", "_").replace( | |
152 ")", "_").replace("/", "_") | |
153 repeatclass[line9] = classfamily[0] | |
154 if len(classfamily) == 2: | |
155 repeatfamily[line9] = classfamily[1] | |
156 else: | |
157 repeatfamily[line9] = classfamily[0] | |
158 x += 1 | |
159 if is_bed == "TRUE": | |
160 repeatclass = {} | |
161 repeatfamily = {} | |
162 fin = open(annotation_file, 'r') | |
163 for line in fin: | |
164 line = line.strip('\n') | |
165 line = line.split('\t') | |
166 theclass = line[4] | |
167 thefamily = line[5] | |
168 line3 = line[3].replace("(", "_").replace(")", "_").replace("/", "_") | |
169 repeatclass[line3] = theclass | |
170 repeatfamily[line3] = thefamily | |
171 fin.close() | |
172 | |
173 ############################################################################## | |
174 # build list of repeats initializing dictionaries for downstream analysis' | |
175 fin = import_text(setup_folder + os.path.sep + 'repgenomes_key.txt', '\t') | |
176 repeat_key = {} | |
177 rev_repeat_key = {} | |
178 repeat_list = [] | |
179 reptotalcounts = {} | |
180 classfractionalcounts = {} | |
181 familyfractionalcounts = {} | |
182 classtotalcounts = {} | |
183 familytotalcounts = {} | |
184 reptotalcounts_simple = {} | |
185 fractionalcounts = {} | |
186 i = 0 | |
187 for line in fin: | |
188 reptotalcounts[line[0]] = 0 | |
189 fractionalcounts[line[0]] = 0 | |
190 if line[0] in repeatclass: | |
191 classtotalcounts[repeatclass[line[0]]] = 0 | |
192 classfractionalcounts[repeatclass[line[0]]] = 0 | |
193 if line[0] in repeatfamily: | |
194 familytotalcounts[repeatfamily[line[0]]] = 0 | |
195 familyfractionalcounts[repeatfamily[line[0]]] = 0 | |
196 if line[0] in repeatfamily: | |
197 if repeatfamily[line[0]] == simple_repeat: | |
198 reptotalcounts_simple[simple_repeat] = 0 | |
199 else: | |
200 reptotalcounts_simple[line[0]] = 0 | |
201 repeat_list.append(line[0]) | |
202 repeat_key[line[0]] = int(line[1]) | |
203 rev_repeat_key[int(line[1])] = line[0] | |
204 fin.close() | |
205 ############################################################################## | |
206 # map the repeats to the psuedogenomes: | |
207 if not os.path.exists(outputfolder): | |
208 os.mkdir(outputfolder) | |
209 ############################################################################## | |
210 # Conduct the regions sorting | |
211 print('Conducting region sorting on unique mapping reads....') | |
212 fileout = outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt' | |
213 with open(fileout, 'w') as stdout: | |
214 command = shlex.split("coverageBed -abam " + unique_mapper_bam + " -b " + | |
215 setup_folder + os.path.sep + 'repnames.bed') | |
216 p = subprocess.Popen(command, stdout=stdout) | |
217 p.communicate() | |
218 stdout.close() | |
219 filein = open(outputfolder + os.path.sep + outputfile_prefix | |
220 + '_regionsorter.txt', 'r') | |
221 counts = {} | |
222 sumofrepeatreads = 0 | |
223 for line in filein: | |
224 line = line.split('\t') | |
225 if not str(repeat_key[line[3]]) in counts: | |
226 counts[str(repeat_key[line[3]])] = 0 | |
227 counts[str(repeat_key[line[3]])] += int(line[4]) | |
228 sumofrepeatreads += int(line[4]) | |
229 print('Identified ' + str(sumofrepeatreads) + | |
230 'unique reads that mapped to repeats.') | |
231 ############################################################################## | |
232 if paired_end == 'TRUE': | |
233 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
234 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') | |
235 if not os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): | |
236 os.mkdir(outputfolder + os.path.sep + 'pair2_bowtie') | |
237 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' | |
238 folder_pair2 = outputfolder + os.path.sep + 'pair2_bowtie' | |
239 ############################################################################## | |
240 print("Processing repeat psuedogenomes...") | |
241 ps = [] | |
242 psb = [] | |
243 ticker = 0 | |
244 for metagenome in repeat_list: | |
245 metagenomepath = setup_folder + os.path.sep + metagenome | |
246 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
247 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
248 with open(file1, 'w') as stdout: | |
249 command = shlex.split("bowtie " + b_opt + " " + | |
250 metagenomepath + " " + fastqfile_1) | |
251 p = subprocess.Popen(command, stdout=stdout) | |
252 with open(file2, 'w') as stdout: | |
253 command = shlex.split("bowtie " + b_opt + " " + | |
254 metagenomepath + " " + fastqfile_2) | |
255 pp = subprocess.Popen(command, stdout=stdout) | |
256 ps.append(p) | |
257 ticker += 1 | |
258 psb.append(pp) | |
259 ticker += 1 | |
260 if ticker == cpus: | |
261 for p in ps: | |
262 p.communicate() | |
263 for p in psb: | |
264 p.communicate() | |
265 ticker = 0 | |
266 psb = [] | |
267 ps = [] | |
268 if len(ps) > 0: | |
269 for p in ps: | |
270 p.communicate() | |
271 stdout.close() | |
272 | |
273 ############################################################################## | |
274 # combine the output from both read pairs: | |
275 print('sorting and combining the output for both read pairs...') | |
276 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
277 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
278 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
279 for metagenome in repeat_list: | |
280 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
281 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
282 fileout = sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
283 with open(fileout, 'w') as stdout: | |
284 p1 = subprocess.Popen(['cat', file1, file2], | |
285 stdout=subprocess.PIPE) | |
286 p2 = subprocess.Popen(['cut', '-f1', "-d "], stdin=p1.stdout, | |
287 stdout=subprocess.PIPE) | |
288 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, | |
289 stdout=subprocess.PIPE) | |
290 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, | |
291 stdout=subprocess.PIPE) | |
292 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) | |
293 p5.communicate() | |
294 stdout.close() | |
295 print('completed ...') | |
296 ############################################################################## | |
297 if paired_end == 'FALSE': | |
298 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
299 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') | |
300 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' | |
301 ############################################################################## | |
302 ps = [] | |
303 ticker = 0 | |
304 print("Processing repeat psuedogenomes...") | |
305 for metagenome in repeat_list: | |
306 metagenomepath = setup_folder + os.path.sep + metagenome | |
307 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
308 with open(file1, 'w') as stdout: | |
309 command = shlex.split("bowtie " + b_opt + " " + | |
310 metagenomepath + " " + fastqfile_1) | |
311 p = subprocess.Popen(command, stdout=stdout) | |
312 ps.append(p) | |
313 ticker += 1 | |
314 if ticker == cpus: | |
315 for p in ps: | |
316 p.communicate() | |
317 ticker = 0 | |
318 ps = [] | |
319 if len(ps) > 0: | |
320 for p in ps: | |
321 p.communicate() | |
322 stdout.close() | |
323 | |
324 ############################################################################## | |
325 # combine the output from both read pairs: | |
326 print('Sorting and combining the output for both read pairs....') | |
327 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
328 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
329 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
330 for metagenome in repeat_list: | |
331 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
332 fileout = sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
333 with open(fileout, 'w') as stdout: | |
334 p1 = subprocess.Popen(['cat', file1], stdout=subprocess.PIPE) | |
335 p2 = subprocess.Popen(['cut', '-f1'], stdin=p1.stdout, | |
336 stdout=subprocess.PIPE) | |
337 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, | |
338 stdout=subprocess.PIPE) | |
339 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, | |
340 stdout=subprocess.PIPE) | |
341 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) | |
342 p5.communicate() | |
343 stdout.close() | |
344 print('completed ...') | |
345 | |
346 ############################################################################## | |
347 # build a file of repeat keys for all reads | |
348 print('Writing and processing intermediate files...') | |
349 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
350 readid = {} | |
351 sumofrepeatreads = 0 | |
352 for rep in repeat_list: | |
353 for data in import_text(sorted_bowtie + os.path.sep + | |
354 rep + '.bowtie', '\t'): | |
355 readid[data[0]] = '' | |
356 for rep in repeat_list: | |
357 for data in import_text(sorted_bowtie + os.path.sep | |
358 + rep + '.bowtie', '\t'): | |
359 readid[data[0]] += str(repeat_key[rep]) + str(',') | |
360 for subfamilies in readid.values(): | |
361 if subfamilies not in counts: | |
362 counts[subfamilies] = 0 | |
363 counts[subfamilies] += 1 | |
364 sumofrepeatreads += 1 | |
365 del readid | |
366 print('Identified ' + str(sumofrepeatreads) + | |
367 ' reads that mapped to repeats for unique and multimappers.') | |
368 | |
369 ############################################################################## | |
370 print("Conducting final calculations...") | |
371 | |
372 | |
373 def convert(x): | |
374 ''' | |
375 build a converter to numeric label for repeat and yield a combined list | |
376 of repnames seperated by backslash | |
377 ''' | |
378 x = x.strip(',') | |
379 x = x.split(',') | |
380 global repname | |
381 repname = "" | |
382 for i in x: | |
383 repname = repname + os.path.sep + rev_repeat_key[int(i)] | |
384 | |
385 | |
386 # building the total counts for repeat element enrichment... | |
387 for x in counts.keys(): | |
388 count = counts[x] | |
389 x = x.strip(',') | |
390 x = x.split(',') | |
391 for i in x: | |
392 reptotalcounts[rev_repeat_key[int(i)]] += int(count) | |
393 # building the fractional counts for repeat element enrichment... | |
394 for x in counts.keys(): | |
395 count = counts[x] | |
396 x = x.strip(',') | |
397 x = x.split(',') | |
398 splits = len(x) | |
399 for i in x: | |
400 fractionalcounts[rev_repeat_key[int(i)]] += float( | |
401 numpy.divide(float(count), float(splits))) | |
402 # building categorized table of repeat element enrichment... | |
403 repcounts = {} | |
404 repcounts['other'] = 0 | |
405 for key in counts.keys(): | |
406 convert(key) | |
407 repcounts[repname] = counts[key] | |
408 # building the total counts for class enrichment... | |
409 for key in reptotalcounts.keys(): | |
410 classtotalcounts[repeatclass[key]] += reptotalcounts[key] | |
411 # building total counts for family enrichment... | |
412 for key in reptotalcounts.keys(): | |
413 familytotalcounts[repeatfamily[key]] += reptotalcounts[key] | |
414 # building unique counts table' | |
415 repcounts2 = {} | |
416 for rep in repeat_list: | |
417 if "/" + rep in repcounts: | |
418 repcounts2[rep] = repcounts["/" + rep] | |
419 else: | |
420 repcounts2[rep] = 0 | |
421 # building the fractionalcounts counts for class enrichment... | |
422 for key in fractionalcounts.keys(): | |
423 classfractionalcounts[repeatclass[key]] += fractionalcounts[key] | |
424 # building fractional counts for family enrichment... | |
425 for key in fractionalcounts.keys(): | |
426 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key] | |
427 | |
428 ############################################################################## | |
429 print('Writing final output and removing intermediate files...') | |
430 # print output to file of the categorized counts and total overlapping counts: | |
431 if allcountmethod == "TRUE": | |
432 fout1 = open(outputfolder + os.path.sep + outputfile_prefix | |
433 + '_total_counts.txt', 'w') | |
434 for key in reptotalcounts.keys(): | |
435 fout1.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
436 repeatfamily[key] + '\t' + str(reptotalcounts[key]) | |
437 + '\n') | |
438 fout2 = open(outputfolder + os.path.sep + outputfile_prefix | |
439 + '_class_total_counts.txt', 'w') | |
440 for key in classtotalcounts.keys(): | |
441 fout2.write(str(key) + '\t' + str(classtotalcounts[key]) + '\n') | |
442 fout3 = open(outputfolder + os.path.sep + outputfile_prefix | |
443 + '_family_total_counts.txt', 'w') | |
444 for key in familytotalcounts.keys(): | |
445 fout3.write(str(key) + '\t' + str(familytotalcounts[key]) + '\n') | |
446 fout4 = open(outputfolder + os.path.sep + outputfile_prefix + | |
447 '_unique_counts.txt', 'w') | |
448 for key in repcounts2.keys(): | |
449 fout4.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
450 repeatfamily[key] + '\t' + str(repcounts2[key]) + '\n') | |
451 fout5 = open(outputfolder + os.path.sep + outputfile_prefix | |
452 + '_class_fraction_counts.txt', 'w') | |
453 for key in classfractionalcounts.keys(): | |
454 fout5.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') | |
455 fout6 = open(outputfolder + os.path.sep + outputfile_prefix + | |
456 '_family_fraction_counts.txt', 'w') | |
457 for key in familyfractionalcounts.keys(): | |
458 fout6.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n') | |
459 fout7 = open(outputfolder + os.path.sep + outputfile_prefix | |
460 + '_fraction_counts.txt', 'w') | |
461 for key in fractionalcounts.keys(): | |
462 fout7.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
463 repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) | |
464 + '\n') | |
465 fout1.close() | |
466 fout2.close() | |
467 fout3.close() | |
468 fout4.close() | |
469 fout5.close() | |
470 fout6.close() | |
471 fout7.close() | |
472 else: | |
473 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + | |
474 '_class_fraction_counts.txt', 'w') | |
475 for key in classfractionalcounts.keys(): | |
476 fout1.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') | |
477 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + | |
478 '_family_fraction_counts.txt', 'w') | |
479 for key in familyfractionalcounts.keys(): | |
480 fout2.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n') | |
481 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + | |
482 '_fraction_counts.txt', 'w') | |
483 for key in fractionalcounts.keys(): | |
484 fout3.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
485 repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) | |
486 + '\n') | |
487 fout1.close() | |
488 fout2.close() | |
489 fout3.close() | |
490 ############################################################################## | |
491 # Remove Large intermediate files | |
492 if os.path.exists(outputfolder + os.path.sep + outputfile_prefix + | |
493 '_regionsorter.txt'): | |
494 os.remove(outputfolder + os.path.sep + outputfile_prefix + | |
495 '_regionsorter.txt') | |
496 if os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
497 shutil.rmtree(outputfolder + os.path.sep + 'pair1_bowtie') | |
498 if os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): | |
499 shutil.rmtree(outputfolder + os.path.sep + 'pair2_bowtie') | |
500 if os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
501 shutil.rmtree(outputfolder + os.path.sep + 'sorted_bowtie') | |
502 print("... Done") |