Mercurial > repos > artbio > repenrich
comparison RepEnrich.py @ 0:f6f0f1e5e940 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
| author | artbio |
|---|---|
| date | Wed, 02 Aug 2017 05:17:29 -0400 |
| parents | |
| children | 15e3e29f310e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f6f0f1e5e940 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import csv | |
| 5 import os | |
| 6 import shlex | |
| 7 | |
| 8 import shutil | |
| 9 import subprocess | |
| 10 import sys | |
| 11 | |
| 12 import numpy | |
| 13 | |
| 14 | |
| 15 parser = argparse.ArgumentParser(description=''' | |
| 16 Part II: Conducting the alignments to the psuedogenomes. Before\ | |
| 17 doing this step you will require 1) a bamfile of the unique\ | |
| 18 alignments with index 2) a fastq file of the reads mapping to\ | |
| 19 more than one location. These files can be obtained using the\ | |
| 20 following bowtie options [EXAMPLE: bowtie -S -m 1\ | |
| 21 --max multimap.fastq mm9 mate1_reads.fastq] Once you have the\ | |
| 22 unique alignment bamfile and the reads mapping to more than one\ | |
| 23 location in a fastq file you can run this step. EXAMPLE: python\ | |
| 24 master_output.py\ | |
| 25 /users/nneretti/data/annotation/hg19/hg19_repeatmasker.txt\ | |
| 26 /users/nneretti/datasets/repeatmapping/POL3/Pol3_human/ | |
| 27 HeLa_InputChIPseq_Rep1 HeLa_InputChIPseq_Rep1\ | |
| 28 /users/nneretti/data/annotation/hg19/setup_folder\ | |
| 29 HeLa_InputChIPseq_Rep1_multimap.fastq\ | |
| 30 HeLa_InputChIPseq_Rep1.bam''') | |
| 31 parser.add_argument('--version', action='version', version='%(prog)s 0.1') | |
| 32 parser.add_argument('annotation_file', action='store', | |
| 33 metavar='annotation_file', | |
| 34 help='List RepeatMasker.org annotation file for your\ | |
| 35 organism. The file may be downloaded from the\ | |
| 36 RepeatMasker.org website. Example:\ | |
| 37 /data/annotation/hg19/hg19_repeatmasker.txt') | |
| 38 parser.add_argument('outputfolder', action='store', metavar='outputfolder', | |
| 39 help='List folder to contain results.\ | |
| 40 Example: /outputfolder') | |
| 41 parser.add_argument('outputprefix', action='store', metavar='outputprefix', | |
| 42 help='Enter prefix name for data.\ | |
| 43 Example: HeLa_InputChIPseq_Rep1') | |
| 44 parser.add_argument('setup_folder', action='store', metavar='setup_folder', | |
| 45 help='List folder that contains the repeat element\ | |
| 46 pseudogenomes.\ | |
| 47 Example: /data/annotation/hg19/setup_folder') | |
| 48 parser.add_argument('fastqfile', action='store', metavar='fastqfile', | |
| 49 help='Enter file for the fastq reads that map to multiple\ | |
| 50 locations. Example: /data/multimap.fastq') | |
| 51 parser.add_argument('alignment_bam', action='store', metavar='alignment_bam', | |
| 52 help='Enter bamfile output for reads that map uniquely.\ | |
| 53 Example /bamfiles/old.bam') | |
| 54 parser.add_argument('--pairedend', action='store', dest='pairedend', | |
| 55 default='FALSE', | |
| 56 help='Designate this option for paired-end sequencing.\ | |
| 57 Default FALSE change to TRUE') | |
| 58 parser.add_argument('--collapserepeat', action='store', dest='collapserepeat', | |
| 59 metavar='collapserepeat', default='Simple_repeat', | |
| 60 help='Designate this option to generate a collapsed repeat\ | |
| 61 type. Uncollapsed output is generated in addition to\ | |
| 62 collapsed repeat type. Simple_repeat is default to\ | |
| 63 simplify downstream analysis. You can change the\ | |
| 64 default to another repeat name to collapse a\ | |
| 65 seperate specific repeat instead or if the name of\ | |
| 66 Simple_repeat is different for your organism.\ | |
| 67 Default Simple_repeat') | |
| 68 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', | |
| 69 metavar='fastqfile2', default='none', | |
| 70 help='Enter fastqfile2 when using paired-end option.\ | |
| 71 Default none') | |
| 72 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', | |
| 73 default="1", type=int, | |
| 74 help='Enter available cpus per node. The more cpus the\ | |
| 75 faster RepEnrich performs. RepEnrich is designed to\ | |
| 76 only work on one node. Default: "1"') | |
| 77 parser.add_argument('--allcountmethod', action='store', dest='allcountmethod', | |
| 78 metavar='allcountmethod', default="FALSE", | |
| 79 help='By default the pipeline only outputs the fraction\ | |
| 80 count method. Consdidered to be the best way to\ | |
| 81 count multimapped reads. Changing this option will\ | |
| 82 include the unique count method, a conservative\ | |
| 83 count, and the total count method, a liberal\ | |
| 84 counting strategy. Our evaluation of simulated data\ | |
| 85 indicated fraction counting is best.\ | |
| 86 Default = FALSE, change to TRUE') | |
| 87 parser.add_argument('--is_bed', action='store', dest='is_bed', | |
| 88 metavar='is_bed', default='FALSE', | |
| 89 help='Is the annotation file a bed file.\ | |
| 90 This is also a compatible format. The file needs to\ | |
| 91 be a tab seperated bed with optional fields.\ | |
| 92 Ex. format: chr\tstart\tend\tName_element\tclass\ | |
| 93 \tfamily. The class and family should identical to\ | |
| 94 name_element if not applicable.\ | |
| 95 Default FALSE change to TRUE') | |
| 96 args = parser.parse_args() | |
| 97 | |
| 98 # parameters | |
| 99 annotation_file = args.annotation_file | |
| 100 outputfolder = args.outputfolder | |
| 101 outputfile_prefix = args.outputprefix | |
| 102 setup_folder = args.setup_folder | |
| 103 repeat_bed = setup_folder + os.path.sep + 'repnames.bed' | |
| 104 unique_mapper_bam = args.alignment_bam | |
| 105 fastqfile_1 = args.fastqfile | |
| 106 fastqfile_2 = args.fastqfile2 | |
| 107 cpus = args.cpus | |
| 108 b_opt = "-k1 -p " + str(1) + " --quiet" | |
| 109 simple_repeat = args.collapserepeat | |
| 110 paired_end = args.pairedend | |
| 111 allcountmethod = args.allcountmethod | |
| 112 is_bed = args.is_bed | |
| 113 | |
| 114 ############################################################################## | |
| 115 # check that the programs we need are available | |
| 116 try: | |
| 117 subprocess.call(shlex.split("coverageBed -h"), | |
| 118 stdout=open(os.devnull, 'wb'), | |
| 119 stderr=open(os.devnull, 'wb')) | |
| 120 subprocess.call(shlex.split("bowtie --version"), | |
| 121 stdout=open(os.devnull, 'wb'), | |
| 122 stderr=open(os.devnull, 'wb')) | |
| 123 except OSError: | |
| 124 print("Error: Bowtie or BEDTools not loaded") | |
| 125 raise | |
| 126 | |
| 127 ############################################################################## | |
| 128 # define a csv reader that reads space deliminated files | |
| 129 print('Preparing for analysis using RepEnrich...') | |
| 130 csv.field_size_limit(sys.maxsize) | |
| 131 | |
| 132 | |
| 133 def import_text(filename, separator): | |
| 134 for line in csv.reader(open(filename), delimiter=separator, | |
| 135 skipinitialspace=True): | |
| 136 if line: | |
| 137 yield line | |
| 138 | |
| 139 | |
| 140 ############################################################################## | |
| 141 # build dictionaries to convert repclass and rep families' | |
| 142 if is_bed == "FALSE": | |
| 143 repeatclass = {} | |
| 144 repeatfamily = {} | |
| 145 fin = import_text(annotation_file, ' ') | |
| 146 x = 0 | |
| 147 for line in fin: | |
| 148 if x > 2: | |
| 149 classfamily = [] | |
| 150 classfamily = line[10].split(os.path.sep) | |
| 151 line9 = line[9].replace("(", "_").replace( | |
| 152 ")", "_").replace("/", "_") | |
| 153 repeatclass[line9] = classfamily[0] | |
| 154 if len(classfamily) == 2: | |
| 155 repeatfamily[line9] = classfamily[1] | |
| 156 else: | |
| 157 repeatfamily[line9] = classfamily[0] | |
| 158 x += 1 | |
| 159 if is_bed == "TRUE": | |
| 160 repeatclass = {} | |
| 161 repeatfamily = {} | |
| 162 fin = open(annotation_file, 'r') | |
| 163 for line in fin: | |
| 164 line = line.strip('\n') | |
| 165 line = line.split('\t') | |
| 166 theclass = line[4] | |
| 167 thefamily = line[5] | |
| 168 line3 = line[3].replace("(", "_").replace(")", "_").replace("/", "_") | |
| 169 repeatclass[line3] = theclass | |
| 170 repeatfamily[line3] = thefamily | |
| 171 fin.close() | |
| 172 | |
| 173 ############################################################################## | |
| 174 # build list of repeats initializing dictionaries for downstream analysis' | |
| 175 fin = import_text(setup_folder + os.path.sep + 'repgenomes_key.txt', '\t') | |
| 176 repeat_key = {} | |
| 177 rev_repeat_key = {} | |
| 178 repeat_list = [] | |
| 179 reptotalcounts = {} | |
| 180 classfractionalcounts = {} | |
| 181 familyfractionalcounts = {} | |
| 182 classtotalcounts = {} | |
| 183 familytotalcounts = {} | |
| 184 reptotalcounts_simple = {} | |
| 185 fractionalcounts = {} | |
| 186 i = 0 | |
| 187 for line in fin: | |
| 188 reptotalcounts[line[0]] = 0 | |
| 189 fractionalcounts[line[0]] = 0 | |
| 190 if line[0] in repeatclass: | |
| 191 classtotalcounts[repeatclass[line[0]]] = 0 | |
| 192 classfractionalcounts[repeatclass[line[0]]] = 0 | |
| 193 if line[0] in repeatfamily: | |
| 194 familytotalcounts[repeatfamily[line[0]]] = 0 | |
| 195 familyfractionalcounts[repeatfamily[line[0]]] = 0 | |
| 196 if line[0] in repeatfamily: | |
| 197 if repeatfamily[line[0]] == simple_repeat: | |
| 198 reptotalcounts_simple[simple_repeat] = 0 | |
| 199 else: | |
| 200 reptotalcounts_simple[line[0]] = 0 | |
| 201 repeat_list.append(line[0]) | |
| 202 repeat_key[line[0]] = int(line[1]) | |
| 203 rev_repeat_key[int(line[1])] = line[0] | |
| 204 fin.close() | |
| 205 ############################################################################## | |
| 206 # map the repeats to the psuedogenomes: | |
| 207 if not os.path.exists(outputfolder): | |
| 208 os.mkdir(outputfolder) | |
| 209 ############################################################################## | |
| 210 # Conduct the regions sorting | |
| 211 print('Conducting region sorting on unique mapping reads....') | |
| 212 fileout = outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt' | |
| 213 with open(fileout, 'w') as stdout: | |
| 214 command = shlex.split("coverageBed -abam " + unique_mapper_bam + " -b " + | |
| 215 setup_folder + os.path.sep + 'repnames.bed') | |
| 216 p = subprocess.Popen(command, stdout=stdout) | |
| 217 p.communicate() | |
| 218 stdout.close() | |
| 219 filein = open(outputfolder + os.path.sep + outputfile_prefix | |
| 220 + '_regionsorter.txt', 'r') | |
| 221 counts = {} | |
| 222 sumofrepeatreads = 0 | |
| 223 for line in filein: | |
| 224 line = line.split('\t') | |
| 225 if not str(repeat_key[line[3]]) in counts: | |
| 226 counts[str(repeat_key[line[3]])] = 0 | |
| 227 counts[str(repeat_key[line[3]])] += int(line[4]) | |
| 228 sumofrepeatreads += int(line[4]) | |
| 229 print('Identified ' + str(sumofrepeatreads) + | |
| 230 'unique reads that mapped to repeats.') | |
| 231 ############################################################################## | |
| 232 if paired_end == 'TRUE': | |
| 233 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
| 234 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') | |
| 235 if not os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): | |
| 236 os.mkdir(outputfolder + os.path.sep + 'pair2_bowtie') | |
| 237 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' | |
| 238 folder_pair2 = outputfolder + os.path.sep + 'pair2_bowtie' | |
| 239 ############################################################################## | |
| 240 print("Processing repeat psuedogenomes...") | |
| 241 ps = [] | |
| 242 psb = [] | |
| 243 ticker = 0 | |
| 244 for metagenome in repeat_list: | |
| 245 metagenomepath = setup_folder + os.path.sep + metagenome | |
| 246 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 247 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
| 248 with open(file1, 'w') as stdout: | |
| 249 command = shlex.split("bowtie " + b_opt + " " + | |
| 250 metagenomepath + " " + fastqfile_1) | |
| 251 p = subprocess.Popen(command, stdout=stdout) | |
| 252 with open(file2, 'w') as stdout: | |
| 253 command = shlex.split("bowtie " + b_opt + " " + | |
| 254 metagenomepath + " " + fastqfile_2) | |
| 255 pp = subprocess.Popen(command, stdout=stdout) | |
| 256 ps.append(p) | |
| 257 ticker += 1 | |
| 258 psb.append(pp) | |
| 259 ticker += 1 | |
| 260 if ticker == cpus: | |
| 261 for p in ps: | |
| 262 p.communicate() | |
| 263 for p in psb: | |
| 264 p.communicate() | |
| 265 ticker = 0 | |
| 266 psb = [] | |
| 267 ps = [] | |
| 268 if len(ps) > 0: | |
| 269 for p in ps: | |
| 270 p.communicate() | |
| 271 stdout.close() | |
| 272 | |
| 273 ############################################################################## | |
| 274 # combine the output from both read pairs: | |
| 275 print('sorting and combining the output for both read pairs...') | |
| 276 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
| 277 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
| 278 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
| 279 for metagenome in repeat_list: | |
| 280 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 281 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
| 282 fileout = sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
| 283 with open(fileout, 'w') as stdout: | |
| 284 p1 = subprocess.Popen(['cat', file1, file2], | |
| 285 stdout=subprocess.PIPE) | |
| 286 p2 = subprocess.Popen(['cut', '-f1', "-d "], stdin=p1.stdout, | |
| 287 stdout=subprocess.PIPE) | |
| 288 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, | |
| 289 stdout=subprocess.PIPE) | |
| 290 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, | |
| 291 stdout=subprocess.PIPE) | |
| 292 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) | |
| 293 p5.communicate() | |
| 294 stdout.close() | |
| 295 print('completed ...') | |
| 296 ############################################################################## | |
| 297 if paired_end == 'FALSE': | |
| 298 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
| 299 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') | |
| 300 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' | |
| 301 ############################################################################## | |
| 302 ps = [] | |
| 303 ticker = 0 | |
| 304 print("Processing repeat psuedogenomes...") | |
| 305 for metagenome in repeat_list: | |
| 306 metagenomepath = setup_folder + os.path.sep + metagenome | |
| 307 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 308 with open(file1, 'w') as stdout: | |
| 309 command = shlex.split("bowtie " + b_opt + " " + | |
| 310 metagenomepath + " " + fastqfile_1) | |
| 311 p = subprocess.Popen(command, stdout=stdout) | |
| 312 ps.append(p) | |
| 313 ticker += 1 | |
| 314 if ticker == cpus: | |
| 315 for p in ps: | |
| 316 p.communicate() | |
| 317 ticker = 0 | |
| 318 ps = [] | |
| 319 if len(ps) > 0: | |
| 320 for p in ps: | |
| 321 p.communicate() | |
| 322 stdout.close() | |
| 323 | |
| 324 ############################################################################## | |
| 325 # combine the output from both read pairs: | |
| 326 print('Sorting and combining the output for both read pairs....') | |
| 327 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
| 328 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
| 329 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
| 330 for metagenome in repeat_list: | |
| 331 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
| 332 fileout = sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
| 333 with open(fileout, 'w') as stdout: | |
| 334 p1 = subprocess.Popen(['cat', file1], stdout=subprocess.PIPE) | |
| 335 p2 = subprocess.Popen(['cut', '-f1'], stdin=p1.stdout, | |
| 336 stdout=subprocess.PIPE) | |
| 337 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, | |
| 338 stdout=subprocess.PIPE) | |
| 339 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, | |
| 340 stdout=subprocess.PIPE) | |
| 341 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) | |
| 342 p5.communicate() | |
| 343 stdout.close() | |
| 344 print('completed ...') | |
| 345 | |
| 346 ############################################################################## | |
| 347 # build a file of repeat keys for all reads | |
| 348 print('Writing and processing intermediate files...') | |
| 349 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
| 350 readid = {} | |
| 351 sumofrepeatreads = 0 | |
| 352 for rep in repeat_list: | |
| 353 for data in import_text(sorted_bowtie + os.path.sep + | |
| 354 rep + '.bowtie', '\t'): | |
| 355 readid[data[0]] = '' | |
| 356 for rep in repeat_list: | |
| 357 for data in import_text(sorted_bowtie + os.path.sep | |
| 358 + rep + '.bowtie', '\t'): | |
| 359 readid[data[0]] += str(repeat_key[rep]) + str(',') | |
| 360 for subfamilies in readid.values(): | |
| 361 if subfamilies not in counts: | |
| 362 counts[subfamilies] = 0 | |
| 363 counts[subfamilies] += 1 | |
| 364 sumofrepeatreads += 1 | |
| 365 del readid | |
| 366 print('Identified ' + str(sumofrepeatreads) + | |
| 367 ' reads that mapped to repeats for unique and multimappers.') | |
| 368 | |
| 369 ############################################################################## | |
| 370 print("Conducting final calculations...") | |
| 371 | |
| 372 | |
| 373 def convert(x): | |
| 374 ''' | |
| 375 build a converter to numeric label for repeat and yield a combined list | |
| 376 of repnames seperated by backslash | |
| 377 ''' | |
| 378 x = x.strip(',') | |
| 379 x = x.split(',') | |
| 380 global repname | |
| 381 repname = "" | |
| 382 for i in x: | |
| 383 repname = repname + os.path.sep + rev_repeat_key[int(i)] | |
| 384 | |
| 385 | |
| 386 # building the total counts for repeat element enrichment... | |
| 387 for x in counts.keys(): | |
| 388 count = counts[x] | |
| 389 x = x.strip(',') | |
| 390 x = x.split(',') | |
| 391 for i in x: | |
| 392 reptotalcounts[rev_repeat_key[int(i)]] += int(count) | |
| 393 # building the fractional counts for repeat element enrichment... | |
| 394 for x in counts.keys(): | |
| 395 count = counts[x] | |
| 396 x = x.strip(',') | |
| 397 x = x.split(',') | |
| 398 splits = len(x) | |
| 399 for i in x: | |
| 400 fractionalcounts[rev_repeat_key[int(i)]] += float( | |
| 401 numpy.divide(float(count), float(splits))) | |
| 402 # building categorized table of repeat element enrichment... | |
| 403 repcounts = {} | |
| 404 repcounts['other'] = 0 | |
| 405 for key in counts.keys(): | |
| 406 convert(key) | |
| 407 repcounts[repname] = counts[key] | |
| 408 # building the total counts for class enrichment... | |
| 409 for key in reptotalcounts.keys(): | |
| 410 classtotalcounts[repeatclass[key]] += reptotalcounts[key] | |
| 411 # building total counts for family enrichment... | |
| 412 for key in reptotalcounts.keys(): | |
| 413 familytotalcounts[repeatfamily[key]] += reptotalcounts[key] | |
| 414 # building unique counts table' | |
| 415 repcounts2 = {} | |
| 416 for rep in repeat_list: | |
| 417 if "/" + rep in repcounts: | |
| 418 repcounts2[rep] = repcounts["/" + rep] | |
| 419 else: | |
| 420 repcounts2[rep] = 0 | |
| 421 # building the fractionalcounts counts for class enrichment... | |
| 422 for key in fractionalcounts.keys(): | |
| 423 classfractionalcounts[repeatclass[key]] += fractionalcounts[key] | |
| 424 # building fractional counts for family enrichment... | |
| 425 for key in fractionalcounts.keys(): | |
| 426 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key] | |
| 427 | |
| 428 ############################################################################## | |
| 429 print('Writing final output and removing intermediate files...') | |
| 430 # print output to file of the categorized counts and total overlapping counts: | |
| 431 if allcountmethod == "TRUE": | |
| 432 fout1 = open(outputfolder + os.path.sep + outputfile_prefix | |
| 433 + '_total_counts.txt', 'w') | |
| 434 for key in reptotalcounts.keys(): | |
| 435 fout1.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
| 436 repeatfamily[key] + '\t' + str(reptotalcounts[key]) | |
| 437 + '\n') | |
| 438 fout2 = open(outputfolder + os.path.sep + outputfile_prefix | |
| 439 + '_class_total_counts.txt', 'w') | |
| 440 for key in classtotalcounts.keys(): | |
| 441 fout2.write(str(key) + '\t' + str(classtotalcounts[key]) + '\n') | |
| 442 fout3 = open(outputfolder + os.path.sep + outputfile_prefix | |
| 443 + '_family_total_counts.txt', 'w') | |
| 444 for key in familytotalcounts.keys(): | |
| 445 fout3.write(str(key) + '\t' + str(familytotalcounts[key]) + '\n') | |
| 446 fout4 = open(outputfolder + os.path.sep + outputfile_prefix + | |
| 447 '_unique_counts.txt', 'w') | |
| 448 for key in repcounts2.keys(): | |
| 449 fout4.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
| 450 repeatfamily[key] + '\t' + str(repcounts2[key]) + '\n') | |
| 451 fout5 = open(outputfolder + os.path.sep + outputfile_prefix | |
| 452 + '_class_fraction_counts.txt', 'w') | |
| 453 for key in classfractionalcounts.keys(): | |
| 454 fout5.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') | |
| 455 fout6 = open(outputfolder + os.path.sep + outputfile_prefix + | |
| 456 '_family_fraction_counts.txt', 'w') | |
| 457 for key in familyfractionalcounts.keys(): | |
| 458 fout6.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n') | |
| 459 fout7 = open(outputfolder + os.path.sep + outputfile_prefix | |
| 460 + '_fraction_counts.txt', 'w') | |
| 461 for key in fractionalcounts.keys(): | |
| 462 fout7.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
| 463 repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) | |
| 464 + '\n') | |
| 465 fout1.close() | |
| 466 fout2.close() | |
| 467 fout3.close() | |
| 468 fout4.close() | |
| 469 fout5.close() | |
| 470 fout6.close() | |
| 471 fout7.close() | |
| 472 else: | |
| 473 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + | |
| 474 '_class_fraction_counts.txt', 'w') | |
| 475 for key in classfractionalcounts.keys(): | |
| 476 fout1.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') | |
| 477 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + | |
| 478 '_family_fraction_counts.txt', 'w') | |
| 479 for key in familyfractionalcounts.keys(): | |
| 480 fout2.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n') | |
| 481 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + | |
| 482 '_fraction_counts.txt', 'w') | |
| 483 for key in fractionalcounts.keys(): | |
| 484 fout3.write(str(key) + '\t' + repeatclass[key] + '\t' + | |
| 485 repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) | |
| 486 + '\n') | |
| 487 fout1.close() | |
| 488 fout2.close() | |
| 489 fout3.close() | |
| 490 ############################################################################## | |
| 491 # Remove Large intermediate files | |
| 492 if os.path.exists(outputfolder + os.path.sep + outputfile_prefix + | |
| 493 '_regionsorter.txt'): | |
| 494 os.remove(outputfolder + os.path.sep + outputfile_prefix + | |
| 495 '_regionsorter.txt') | |
| 496 if os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): | |
| 497 shutil.rmtree(outputfolder + os.path.sep + 'pair1_bowtie') | |
| 498 if os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): | |
| 499 shutil.rmtree(outputfolder + os.path.sep + 'pair2_bowtie') | |
| 500 if os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
| 501 shutil.rmtree(outputfolder + os.path.sep + 'sorted_bowtie') | |
| 502 print("... Done") |
