comparison pima_report.py @ 31:a859de7cce94 draft

Uploaded
author greg
date Tue, 27 Jun 2023 13:39:14 +0000 (19 months ago)
parents 134a0879d0b6
children 163260afc1b1
comparison
equal deleted inserted replaced
30:134a0879d0b6 31:a859de7cce94
27 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None, 27 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None,
28 kraken2_version=None, lrn_risk_amr_file=None, lrn_risk_blacklist_file=None, lrn_risk_vf_file=None, 28 kraken2_version=None, lrn_risk_amr_file=None, lrn_risk_blacklist_file=None, lrn_risk_vf_file=None,
29 minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None, 29 minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None,
30 ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, read_type=None, 30 ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, read_type=None,
31 reference_insertions_file=None, samtools_version=None, varscan_version=None): 31 reference_insertions_file=None, samtools_version=None, varscan_version=None):
32 self.ofh = open("process_log.txt", "w")
33
34 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file))
35 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files))
36 self.ofh.write("analysis_name: %s\n" % str(analysis_name))
37 self.ofh.write("assembler_version: %s\n" % str(assembler_version))
38 self.ofh.write("assembly_fasta_file: %s\n" % str(assembly_fasta_file))
39 self.ofh.write("assembly_name: %s\n" % str(assembly_name))
40 self.ofh.write("bedtools_version: %s\n" % str(bedtools_version))
41 self.ofh.write("blastn_version: %s\n" % str(blastn_version))
42 self.ofh.write("circos_files: %s\n" % str(circos_files))
43 self.ofh.write("compute_sequence_length_file: %s\n" % str(compute_sequence_length_file))
44 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file))
45 self.ofh.write("dbkey: %s\n" % str(dbkey))
46 self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file))
47 self.ofh.write("dnadiff_version: %s\n" % str(dnadiff_version))
48 self.ofh.write("errors_file: %s\n" % str(errors_file))
49 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files))
50 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files))
51 self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file))
52 self.ofh.write("gzipped: %s\n" % str(gzipped))
53 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file))
54 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file))
55 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file))
56 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file))
57 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version))
58 self.ofh.write("lrn_risk_amr_file: %s\n" % str(lrn_risk_amr_file))
59 self.ofh.write("lrn_risk_blacklist_file: %s\n" % str(lrn_risk_blacklist_file))
60 self.ofh.write("lrn_risk_vf_file: %s\n" % str(lrn_risk_vf_file))
61 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version))
62 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file))
63 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files))
64 self.ofh.write("ont_file: %s\n" % str(ont_file))
65 self.ofh.write("pima_css: %s\n" % str(pima_css))
66 self.ofh.write("plasmids_file: %s\n" % str(plasmids_file))
67 self.ofh.write("quast_report_file: %s\n" % str(quast_report_file))
68 self.ofh.write("read_type: %s\n" % str(read_type))
69 self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file))
70 self.ofh.write("samtools_version: %s\n" % str(samtools_version))
71 self.ofh.write("varscan_version: %s\n" % str(varscan_version))
72 32
73 # General 33 # General
74 self.doc = None 34 self.doc = None
75 self.report_md = 'pima_report.md' 35 self.report_md = 'pima_report.md'
76 36
77 # Inputs 37 # Inputs
78 self.amr_deletions_file = amr_deletions_file 38 self.amr_deletions_file = amr_deletions_file
79 self.amr_matrix_files = amr_matrix_files 39 self.amr_matrix_files = amr_matrix_files
80 self.analysis_name = analysis_name.split('_')[0] 40 self.analysis_name = analysis_name.split('_')[0]
81 self.ofh.write("self.analysis_name: %s\n" % str(self.analysis_name))
82 if assembler_version is None: 41 if assembler_version is None:
83 self.assembler_version = 'assembler (version unknown)' 42 self.assembler_version = 'assembler (version unknown)'
84 else: 43 else:
85 if read_type == 'ont': 44 if read_type == 'ont':
86 # Assembler is flye. 45 # Assembler is flye.
215 self.ont_raw_fastq = None 174 self.ont_raw_fastq = None
216 175
217 # Actions 176 # Actions
218 self.did_guppy_ont_fast5 = False 177 self.did_guppy_ont_fast5 = False
219 self.did_qcat_ont_fastq = False 178 self.did_qcat_ont_fastq = False
220 self.ofh.write("self.read_type: %s\n" % str(self.read_type))
221 if self.read_type == 'ONT': 179 if self.read_type == 'ONT':
222 self.info_ont_fastq(self.ont_file) 180 self.info_ont_fastq(self.ont_file)
223 else: 181 else:
224 self.info_illumina_fastq([self.illumina_forward_read_file, self.illumina_reverse_read_file]) 182 self.info_illumina_fastq([self.illumina_forward_read_file, self.illumina_reverse_read_file])
225 self.load_contig_info() 183 self.load_contig_info()
226 184
227 def run_command(self, command): 185 def run_command(self, command):
228 self.ofh.write("\nXXXXXX In run_command, command:\n%s\n\n" % str(command))
229 try: 186 try:
230 return re.split('\\n', subprocess.check_output(command, shell=True).decode('utf-8')) 187 return re.split('\\n', subprocess.check_output(command, shell=True).decode('utf-8'))
231 except Exception: 188 except Exception:
232 message = 'Command %s failed: exiting...' % command 189 message = 'Command %s failed: exiting...' % command
233 sys.exit(message) 190 sys.exit(message)
234 191
235 def format_kmg(self, number, decimals=0): 192 def format_kmg(self, number, decimals=0):
236 self.ofh.write("\nXXXXXX In format_kmg, number:\n%s\n" % str(number))
237 self.ofh.write("XXXXXX In format_kmg, decimals:\n%s\n\n" % str(decimals))
238 if number == 0: 193 if number == 0:
239 return '0' 194 return '0'
240 magnitude_powers = [10**9, 10**6, 10**3, 1] 195 magnitude_powers = [10**9, 10**6, 10**3, 1]
241 magnitude_units = ['G', 'M', 'K', ''] 196 magnitude_units = ['G', 'M', 'K', '']
242 for i in range(len(magnitude_units)): 197 for i in range(len(magnitude_units)):
277 self.assembly = self.load_fasta(self.assembly_fasta_file) 232 self.assembly = self.load_fasta(self.assembly_fasta_file)
278 self.num_assembly_contigs = len(self.assembly) 233 self.num_assembly_contigs = len(self.assembly)
279 self.assembly_size = self.format_kmg(sum([len(x) for x in self.assembly]), decimals=1) 234 self.assembly_size = self.format_kmg(sum([len(x) for x in self.assembly]), decimals=1)
280 235
281 def info_illumina_fastq(self, illumina_read_files): 236 def info_illumina_fastq(self, illumina_read_files):
282 self.ofh.write("\nXXXXXX In info_illumina_fastq\n\n")
283 if self.gzipped: 237 if self.gzipped:
284 opener = 'gunzip -c' 238 opener = 'gunzip -c'
285 else: 239 else:
286 opener = 'cat' 240 opener = 'cat'
287 for fastq_file in illumina_read_files: 241 for fastq_file in illumina_read_files:
288 command = ' '.join([opener, 242 command = ' '.join([opener,
289 fastq_file, 243 fastq_file,
290 '| awk \'{getline;s += length($1);getline;getline;}END{print s/(NR/4)"\t"(NR/4)"\t"s}\'']) 244 '| awk \'{getline;s += length($1);getline;getline;}END{print s/(NR/4)"\t"(NR/4)"\t"s}\''])
291 output = self.run_command(command)
292 self.ofh.write("output:\n%s\n" % str(output))
293 self.ofh.write("re.split('\\t', self.run_command(command)[0]:\n%s\n" % str(re.split('\\t', self.run_command(command)[0])))
294 values = [] 245 values = []
295 for i in re.split('\\t', self.run_command(command)[0]): 246 for i in re.split('\\t', self.run_command(command)[0]):
296 if i == '': 247 if i == '':
297 values.append(float('nan')) 248 values.append(float('nan'))
298 else: 249 else:
299 values.append(float(i)) 250 values.append(float(i))
300 self.ofh.write("values:\n%s\n" % str(values))
301 self.ofh.write("values[0]:\n%s\n" % str(values[0]))
302 self.illumina_length_mean += values[0] 251 self.illumina_length_mean += values[0]
303 self.ofh.write("values[1]:\n%s\n" % str(values[1]))
304 self.illumina_read_count += int(values[1]) 252 self.illumina_read_count += int(values[1])
305 self.ofh.write("values[2]:\n%s\n" % str(values[2]))
306 self.illumina_bases += int(values[2]) 253 self.illumina_bases += int(values[2])
307 self.illumina_length_mean /= 2 254 self.illumina_length_mean /= 2
308 self.illumina_bases = self.format_kmg(self.illumina_bases, decimals=1) 255 self.illumina_bases = self.format_kmg(self.illumina_bases, decimals=1)
309 256
310 def start_doc(self): 257 def start_doc(self):
316 self.doc.new_line() 263 self.doc.new_line()
317 self.doc.new_line('<div style="page-break-after: always;"></div>') 264 self.doc.new_line('<div style="page-break-after: always;"></div>')
318 self.doc.new_line() 265 self.doc.new_line()
319 266
320 def add_run_information(self): 267 def add_run_information(self):
321 self.ofh.write("\nXXXXXX In add_run_information\n\n")
322 self.doc.new_line() 268 self.doc.new_line()
323 self.doc.new_header(1, 'Run information') 269 self.doc.new_header(1, 'Run information')
324 # Tables in md.utils are implemented as a wrapping function. 270 # Tables in md.utils are implemented as a wrapping function.
325 Table_list = [ 271 Table_list = [
326 "Category", 272 "Category",
343 # FIXME: the following doesn't work. 289 # FIXME: the following doesn't work.
344 # self.add_table_of_contents() 290 # self.add_table_of_contents()
345 self.doc.new_line() 291 self.doc.new_line()
346 292
347 def add_ont_library_information(self): 293 def add_ont_library_information(self):
348 self.ofh.write("\nXXXXXX In add_ont_library_information\n\n")
349 if self.ont_n50 is None: 294 if self.ont_n50 is None:
350 return 295 return
351 self.doc.new_line() 296 self.doc.new_line()
352 self.doc.new_header(2, 'ONT library statistics') 297 self.doc.new_header(2, 'ONT library statistics')
353 Table_List = [ 298 Table_List = [
368 ] 313 ]
369 self.doc.new_table(columns=2, rows=7, text=Table_List, text_align='left') 314 self.doc.new_table(columns=2, rows=7, text=Table_List, text_align='left')
370 self.doc.new_line() 315 self.doc.new_line()
371 316
372 def add_illumina_library_information(self): 317 def add_illumina_library_information(self):
373 self.ofh.write("\nXXXXXX In add_illumina_library_information\n\n")
374 if self.illumina_length_mean is None: 318 if self.illumina_length_mean is None:
375 return 319 return
376 self.doc.new_line() 320 self.doc.new_line()
377 self.doc.new_header(2, 'Illumina library statistics') 321 self.doc.new_header(2, 'Illumina library statistics')
378 Table_List = [ 322 Table_List = [
400 if small_contigs.shape[0] > 0: 344 if small_contigs.shape[0] > 0:
401 warning = 'Assembly produced {:d} small contigs ({:s}); assembly may include spurious sequences.'.format(small_contigs.shape[0], ', '.join(small_contigs['contig'])) 345 warning = 'Assembly produced {:d} small contigs ({:s}); assembly may include spurious sequences.'.format(small_contigs.shape[0], ', '.join(small_contigs['contig']))
402 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) 346 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning))
403 347
404 def add_assembly_information(self): 348 def add_assembly_information(self):
405 self.ofh.write("\nXXXXXX In add_assembly_information\n\n")
406 if self.assembly_fasta_file is None: 349 if self.assembly_fasta_file is None:
407 return 350 return
408 self.load_assembly() 351 self.load_assembly()
409 self.doc.new_line() 352 self.doc.new_line()
410 self.doc.new_header(2, 'Assembly statistics') 353 self.doc.new_header(2, 'Assembly statistics')
417 str(self.assembly_size), 360 str(self.assembly_size),
418 ] 361 ]
419 self.doc.new_table(columns=2, rows=3, text=Table_List, text_align='left') 362 self.doc.new_table(columns=2, rows=3, text=Table_List, text_align='left')
420 363
421 def info_ont_fastq(self, fastq_file): 364 def info_ont_fastq(self, fastq_file):
422 self.ofh.write("\nXXXXXX In info_ont_fastq, fastq_file:\n%s\n\n" % str(fastq_file))
423 opener = 'cat' 365 opener = 'cat'
424 if self.gzipped: 366 if self.gzipped:
425 opener = 'gunzip -c' 367 opener = 'gunzip -c'
426 else: 368 else:
427 opener = 'cat' 369 opener = 'cat'
467 return '<br>'.join(out) 409 return '<br>'.join(out)
468 else: 410 else:
469 return string 411 return string
470 412
471 def add_contig_info(self): 413 def add_contig_info(self):
472 self.ofh.write("\nXXXXXX In add_contig_info\n\n")
473 if self.contig_info is None or self.read_type not in self.contig_info.index: 414 if self.contig_info is None or self.read_type not in self.contig_info.index:
474 return 415 return
475 self.doc.new_line() 416 self.doc.new_line()
476 self.doc.new_header(2, 'Assembly coverage by ' + self.read_type) 417 self.doc.new_header(2, 'Assembly coverage by ' + self.read_type)
477 Table_List = ["Contig", "Length (bp)", "Coverage (X)"] 418 Table_List = ["Contig", "Length (bp)", "Coverage (X)"]
481 Table_List = Table_List + formatted.iloc[i, :].values.tolist() 422 Table_List = Table_List + formatted.iloc[i, :].values.tolist()
482 row_count = int(len(Table_List) / 3) 423 row_count = int(len(Table_List) / 3)
483 self.doc.new_table(columns=3, rows=row_count, text=Table_List, text_align='left') 424 self.doc.new_table(columns=3, rows=row_count, text=Table_List, text_align='left')
484 425
485 def add_assembly_notes(self): 426 def add_assembly_notes(self):
486 self.ofh.write("\nXXXXXX In add_assembly_notes\n\n")
487 if len(self.assembly_notes) == 0: 427 if len(self.assembly_notes) == 0:
488 return 428 return
489 self.doc.new_line() 429 self.doc.new_line()
490 self.doc.new_line('<div style="page-break-after: always;"></div>') 430 self.doc.new_line('<div style="page-break-after: always;"></div>')
491 self.doc.new_line() 431 self.doc.new_line()
492 self.doc.new_header(2, self.assembly_notes_title) 432 self.doc.new_header(2, self.assembly_notes_title)
493 for note in self.assembly_notes: 433 for note in self.assembly_notes:
494 self.doc.new_line(note) 434 self.doc.new_line(note)
495 435
496 def add_contamination(self): 436 def add_contamination(self):
497 self.ofh.write("\nXXXXXX In add_contamination\n\n")
498 if self.kraken2_report_file is None: 437 if self.kraken2_report_file is None:
499 return 438 return
500 # Read in the Kraken fractions and pull out the useful parts 439 # Read in the Kraken fractions and pull out the useful parts
501 kraken_fracs = pandas.read_csv(self.kraken2_report_file, delimiter='\t', header=None) 440 kraken_fracs = pandas.read_csv(self.kraken2_report_file, delimiter='\t', header=None)
502 kraken_fracs.index = kraken_fracs.iloc[:, 4].values 441 kraken_fracs.index = kraken_fracs.iloc[:, 4].values
520 self.methods[self.contamination_methods_title] = '' 459 self.methods[self.contamination_methods_title] = ''
521 method = '%s was used to assign the raw reads into taxa.' % self.kraken2_version.rstrip('report') 460 method = '%s was used to assign the raw reads into taxa.' % self.kraken2_version.rstrip('report')
522 self.methods[self.contamination_methods_title] = self.methods[self.contamination_methods_title].append(pandas.Series(method)) 461 self.methods[self.contamination_methods_title] = self.methods[self.contamination_methods_title].append(pandas.Series(method))
523 462
524 def add_alignment(self): 463 def add_alignment(self):
525 self.ofh.write("\nXXXXXX In add_alignment\n\n")
526 if self.quast_report_file is not None: 464 if self.quast_report_file is not None:
527 # Process quast values. 465 # Process quast values.
528 quast_report = pandas.read_csv(self.quast_report_file, header=0, index_col=0, sep='\t') 466 quast_report = pandas.read_csv(self.quast_report_file, header=0, index_col=0, sep='\t')
529 quast_mismatches = int(float(quast_report.loc['# mismatches per 100 kbp', :][0]) * (float(quast_report.loc['Total length (>= 0 bp)', :][0]) / 100000.)) 467 quast_mismatches = int(float(quast_report.loc['# mismatches per 100 kbp', :][0]) * (float(quast_report.loc['Total length (>= 0 bp)', :][0]) / 100000.))
530 quast_indels = int(float(quast_report.loc['# indels per 100 kbp', :][0]) * (float(quast_report.loc['Total length (>= 0 bp)', :][0]) / 100000.)) 468 quast_indels = int(float(quast_report.loc['# indels per 100 kbp', :][0]) * (float(quast_report.loc['Total length (>= 0 bp)', :][0]) / 100000.))
553 for circos_file in self.circos_files: 491 for circos_file in self.circos_files:
554 contig = os.path.basename(circos_file) 492 contig = os.path.basename(circos_file)
555 contig_title = 'Alignment to %s' % contig 493 contig_title = 'Alignment to %s' % contig
556 self.doc.new_line() 494 self.doc.new_line()
557 self.doc.new_header(level=3, title=contig_title) 495 self.doc.new_header(level=3, title=contig_title)
558 self.doc.new_line('Blue color indicates query sequences aligned to the reference sequence, which is shown in yellow') 496 self.doc.new_line('Blue indicates query sequences aligned to the reference sequence, yellow indicates no alignment')
559 self.doc.new_line(self.doc.new_inline_image(text='contig_title', path=os.path.abspath(circos_file))) 497 self.doc.new_line(self.doc.new_inline_image(text='contig_title', path=os.path.abspath(circos_file)))
560 self.doc.new_line('<div style="page-break-after: always;"></div>') 498 self.doc.new_line('<div style="page-break-after: always;"></div>')
561 self.doc.new_line() 499 self.doc.new_line()
562 if self.dbkey == 'ref_genome': 500 if self.dbkey == 'ref_genome':
563 headers = ["* Chromosome - NC_007530.2 Bacillus anthracis str. 'Ames Ancestor', complete sequence", 501 headers = ["* Chromosome - NC_007530.2 Bacillus anthracis str. 'Ames Ancestor', complete sequence",
567 self.methods[self.reference_genome_title] = self.methods[self.reference_genome_title].append(pandas.Series(method)) 505 self.methods[self.reference_genome_title] = self.methods[self.reference_genome_title].append(pandas.Series(method))
568 method = 'The genome assembly was aligned against the reference sequence using %s.' % self.dnadiff_version 506 method = 'The genome assembly was aligned against the reference sequence using %s.' % self.dnadiff_version
569 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) 507 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method))
570 508
571 def add_features(self): 509 def add_features(self):
572 self.ofh.write("\nXXXXXX In add_features\n\n")
573 if len(self.feature_bed_files) == 0: 510 if len(self.feature_bed_files) == 0:
574 return 511 return
575 for bbf in self.feature_bed_files: 512 for bbf in self.feature_bed_files:
576 if os.path.getsize(bbf) > 0: 513 if os.path.getsize(bbf) > 0:
577 best = pandas.read_csv(filepath_or_buffer=bbf, sep='\t', header=None) 514 best = pandas.read_csv(filepath_or_buffer=bbf, sep='\t', header=None)
578 self.feature_hits[os.path.basename(bbf)] = best 515 self.feature_hits[os.path.basename(bbf)] = best
579 if len(self.feature_hits) == 0: 516 if len(self.feature_hits) == 0:
580 return 517 return
581 self.ofh.write("self.feature_hits: %s\n" % str(self.feature_hits))
582 self.doc.new_line() 518 self.doc.new_line()
583 self.doc.new_header(level=2, title=self.feature_title) 519 self.doc.new_header(level=2, title=self.feature_title)
584 for feature_name in self.feature_hits.index.tolist(): 520 for feature_name in self.feature_hits.index.tolist():
585 self.ofh.write("feature_name: %s\n" % str(feature_name))
586 features = self.feature_hits[feature_name].copy() 521 features = self.feature_hits[feature_name].copy()
587 self.ofh.write("features: %s\n" % str(features))
588 if features.shape[0] == 0: 522 if features.shape[0] == 0:
589 continue 523 continue
590 features.iloc[:, 1] = features.iloc[:, 1].apply(lambda x: '{:,}'.format(x)) 524 features.iloc[:, 1] = features.iloc[:, 1].apply(lambda x: '{:,}'.format(x))
591 features.iloc[:, 2] = features.iloc[:, 2].apply(lambda x: '{:,}'.format(x)) 525 features.iloc[:, 2] = features.iloc[:, 2].apply(lambda x: '{:,}'.format(x))
592 self.doc.new_line() 526 self.doc.new_line()
593 self.doc.new_header(level=3, title=feature_name) 527 self.doc.new_header(level=3, title=feature_name)
594 if (features.shape[0] == 0): 528 if (features.shape[0] == 0):
595 continue 529 continue
596 for contig in pandas.unique(features.iloc[:, 0]): 530 for contig in pandas.unique(features.iloc[:, 0]):
597 self.ofh.write("contig: %s\n" % str(contig))
598 self.doc.new_line(contig) 531 self.doc.new_line(contig)
599 contig_features = features.loc[(features.iloc[:, 0] == contig), :] 532 contig_features = features.loc[(features.iloc[:, 0] == contig), :]
600 self.ofh.write("contig_features: %s\n" % str(contig_features))
601 Table_List = ['Start', 'Stop', 'Feature', 'Identity (%)', 'Strand'] 533 Table_List = ['Start', 'Stop', 'Feature', 'Identity (%)', 'Strand']
602 for i in range(contig_features.shape[0]): 534 for i in range(contig_features.shape[0]):
603 self.ofh.write("i: %s\n" % str(i))
604 feature = contig_features.iloc[i, :].copy(deep=True) 535 feature = contig_features.iloc[i, :].copy(deep=True)
605 self.ofh.write("feature: %s\n" % str(feature))
606 feature[4] = '{:.3f}'.format(feature[4]) 536 feature[4] = '{:.3f}'.format(feature[4])
607 self.ofh.write("feature[1:].values.tolist(): %s\n" % str(feature[1:].values.tolist()))
608 Table_List = Table_List + feature[1:].values.tolist() 537 Table_List = Table_List + feature[1:].values.tolist()
609 self.ofh.write("Table_List: %s\n" % str(Table_List))
610 row_count = int(len(Table_List) / 5) 538 row_count = int(len(Table_List) / 5)
611 self.ofh.write("row_count: %s\n" % str(row_count))
612 self.doc.new_line() 539 self.doc.new_line()
613 self.ofh.write("Before new_table, len(Table_List):: %s\n" % str(len(Table_List)))
614 self.doc.new_table(columns=5, rows=row_count, text=Table_List, text_align='left') 540 self.doc.new_table(columns=5, rows=row_count, text=Table_List, text_align='left')
615 blastn_version = 'The genome assembly was queried for features using %s.' % self.blastn_version 541 blastn_version = 'The genome assembly was queried for features using %s.' % self.blastn_version
616 bedtools_version = 'Feature hits were clustered using %s and the highest scoring hit for each cluster was reported.' % self.bedtools_version 542 bedtools_version = 'Feature hits were clustered using %s and the highest scoring hit for each cluster was reported.' % self.bedtools_version
617 method = '%s %s' % (blastn_version, bedtools_version) 543 method = '%s %s' % (blastn_version, bedtools_version)
618 self.methods[self.feature_methods_title] = self.methods[self.feature_methods_title].append(pandas.Series(method)) 544 self.methods[self.feature_methods_title] = self.methods[self.feature_methods_title].append(pandas.Series(method))
619 545
620 def add_feature_plots(self): 546 def add_feature_plots(self):
621 self.ofh.write("\nXXXXXX In add_feature_plots\n\n")
622 if len(self.feature_png_files) == 0: 547 if len(self.feature_png_files) == 0:
623 return 548 return
624 self.doc.new_line() 549 self.doc.new_line()
625 self.doc.new_header(level=2, title='Feature Plots') 550 self.doc.new_header(level=2, title='Feature Plots')
626 self.doc.new_paragraph('Only contigs with features are shown') 551 self.doc.new_paragraph('Only contigs with features are shown')
627 for feature_png_file in self.feature_png_files: 552 for feature_png_file in self.feature_png_files:
628 self.doc.new_line(self.doc.new_inline_image(text='Analysis', path=os.path.abspath(feature_png_file))) 553 self.doc.new_line(self.doc.new_inline_image(text='Analysis', path=os.path.abspath(feature_png_file)))
629 554
630 def add_mutations(self): 555 def add_mutations(self):
631 self.ofh.write("\nXXXXXX In add_mutations\n\n")
632 if len(self.mutation_regions_tsv_files) == 0: 556 if len(self.mutation_regions_tsv_files) == 0:
633 return 557 return
634 try: 558 try:
635 mutation_regions = pandas.read_csv(self.mutation_regions_bed_file, sep='\t', header=0, index_col=False) 559 mutation_regions = pandas.read_csv(self.mutation_regions_bed_file, sep='\t', header=0, index_col=False)
636 except Exception: 560 except Exception:
638 return 562 return
639 amr_mutations = pandas.Series(dtype=object) 563 amr_mutations = pandas.Series(dtype=object)
640 for region_i in range(mutation_regions.shape[0]): 564 for region_i in range(mutation_regions.shape[0]):
641 region = mutation_regions.iloc[region_i, :] 565 region = mutation_regions.iloc[region_i, :]
642 region_name = str(region['name']) 566 region_name = str(region['name'])
643 self.ofh.write("Processing mutations for region %s\n" % region_name)
644 region_mutations_tsv_name = '%s_mutations.tsv' % region_name 567 region_mutations_tsv_name = '%s_mutations.tsv' % region_name
645 if region_mutations_tsv_name not in self.mutation_regions_tsv_files: 568 if region_mutations_tsv_name not in self.mutation_regions_tsv_files:
646 continue 569 continue
647 region_mutations_tsv = self.mutation_regions_tsv_files[region_mutations_tsv_name] 570 region_mutations_tsv = self.mutation_regions_tsv_files[region_mutations_tsv_name]
648 try: 571 try:
690 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) 613 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method))
691 method = 'Mutations were identified using %s and %s.' % (self.samtools_version, self.varscan_version) 614 method = 'Mutations were identified using %s and %s.' % (self.samtools_version, self.varscan_version)
692 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) 615 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method))
693 616
694 def add_amr_matrix(self): 617 def add_amr_matrix(self):
695 self.ofh.write("\nXXXXXX In add_amr_matrix\n\n")
696 # Make sure that we have an AMR matrix to plot 618 # Make sure that we have an AMR matrix to plot
697 if len(self.amr_matrix_files) == 0: 619 if len(self.amr_matrix_files) == 0:
698 return 620 return
699 self.doc.new_line() 621 self.doc.new_line()
700 self.doc.new_header(level=2, title=self.amr_matrix_title) 622 self.doc.new_header(level=2, title=self.amr_matrix_title)
701 self.doc.new_line('AMR genes and mutations with their corresponding drugs') 623 amr_matrix_text = 'AMR genes and mutations with their corresponding drugs: dark blue indicates the presence of a gene/mutation, light blue indicates the absence of a gene/mutation'
624 self.doc.new_line(amr_matrix_text)
702 for amr_matrix_file in self.amr_matrix_files: 625 for amr_matrix_file in self.amr_matrix_files:
703 self.doc.new_line(self.doc.new_inline_image(text='AMR genes and mutations with their corresponding drugs', 626 self.doc.new_line(self.doc.new_inline_image(text=amr_matrix_text, path=os.path.abspath(amr_matrix_file)))
704 path=os.path.abspath(amr_matrix_file)))
705 627
706 def add_large_indels(self): 628 def add_large_indels(self):
707 self.ofh.write("\nXXXXXX In add_large_indels\n\n")
708 large_indels = pandas.Series(dtype='float64') 629 large_indels = pandas.Series(dtype='float64')
709 # Pull in insertions. 630 # Pull in insertions.
710 try: 631 try:
711 reference_insertions = pandas.read_csv(filepath_or_buffer=self.reference_insertions_file, sep='\t', header=None) 632 reference_insertions = pandas.read_csv(filepath_or_buffer=self.reference_insertions_file, sep='\t', header=None)
712 except Exception: 633 except Exception:
749 self.doc.new_line() 670 self.doc.new_line()
750 self.doc.new_line('<div style="page-break-after: always;"></div>') 671 self.doc.new_line('<div style="page-break-after: always;"></div>')
751 self.doc.new_line() 672 self.doc.new_line()
752 673
753 def add_lrn_risk_info(self): 674 def add_lrn_risk_info(self):
754 self.ofh.write("\nXXXXXX In add_lrn_risk_info\n\n")
755 if self.lrn_risk_amr_file is None and self.lrn_risk_blacklist_file is None and self.lrn_risk_vf_file is None: 675 if self.lrn_risk_amr_file is None and self.lrn_risk_blacklist_file is None and self.lrn_risk_vf_file is None:
756 return 676 return
757 if not os.path.isfile(self.lrn_risk_amr_file) and not os.path.isfile(self.lrn_risk_blacklist_file) and not os.path.isfile(self.lrn_risk_vf_file): 677 if not os.path.isfile(self.lrn_risk_amr_file) and not os.path.isfile(self.lrn_risk_blacklist_file) and not os.path.isfile(self.lrn_risk_vf_file):
758 return 678 return
759 if os.path.getsize(self.lrn_risk_amr_file) == 0 and os.path.getsize(self.lrn_risk_blacklist_file) == 0 and os.path.getsize(self.lrn_risk_vf_file) == 0: 679 if os.path.getsize(self.lrn_risk_amr_file) == 0 and os.path.getsize(self.lrn_risk_blacklist_file) == 0 and os.path.getsize(self.lrn_risk_vf_file) == 0:
830 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) 750 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method))
831 method = 'Plasmid-to-genome hits were resolved using the pChunks algorithm.' 751 method = 'Plasmid-to-genome hits were resolved using the pChunks algorithm.'
832 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) 752 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method))
833 753
834 def add_methods(self): 754 def add_methods(self):
835 self.ofh.write("\nXXXXXX In add_methods\n\n")
836 if len(self.methods) == 0: 755 if len(self.methods) == 0:
837 return 756 return
838 self.doc.new_line() 757 self.doc.new_line()
839 self.doc.new_header(level=2, title=self.methods_title) 758 self.doc.new_header(level=2, title=self.methods_title)
840 for methods_section in self.methods.index.tolist(): 759 for methods_section in self.methods.index.tolist():
845 self.doc.new_paragraph(' '.join(self.methods[methods_section])) 764 self.doc.new_paragraph(' '.join(self.methods[methods_section]))
846 self.doc.new_line('<div style="page-break-after: always;"></div>') 765 self.doc.new_line('<div style="page-break-after: always;"></div>')
847 self.doc.new_line() 766 self.doc.new_line()
848 767
849 def add_summary(self): 768 def add_summary(self):
850 self.ofh.write("\nXXXXXX In add_summary\n\n")
851 # Add summary title 769 # Add summary title
852 self.doc.new_header(level=1, title=self.summary_title) 770 self.doc.new_header(level=1, title=self.summary_title)
853 # First section of Summary 771 # First section of Summary
854 self.doc.new_header(level=1, title='CDC Advisory') 772 self.doc.new_header(level=1, title='CDC Advisory')
855 self.doc.new_paragraph(CDC_ADVISORY) 773 self.doc.new_paragraph(CDC_ADVISORY)
891 text = text.replace("]--##", "") 809 text = text.replace("]--##", "")
892 self.doc.file_data_text = text 810 self.doc.file_data_text = text
893 self.doc.create_md_file() 811 self.doc.create_md_file()
894 812
895 def make_report(self): 813 def make_report(self):
896 self.ofh.write("\nXXXXXX In make_report\n\n")
897 self.start_doc() 814 self.start_doc()
898 self.add_summary() 815 self.add_summary()
899 self.add_contamination() 816 self.add_contamination()
900 self.add_alignment() 817 self.add_alignment()
901 self.add_features() 818 self.add_features()
910 self.make_tex() 827 self.make_tex()
911 # It took me quite a long time to find out that the value of the -t 828 # It took me quite a long time to find out that the value of the -t
912 # (implied) argument in the following command must be 'html' instead of 829 # (implied) argument in the following command must be 'html' instead of
913 # the more logical 'pdf'. see the answer from snsn in this thread: 830 # the more logical 'pdf'. see the answer from snsn in this thread:
914 # https://github.com/jessicategner/pypandoc/issues/186 831 # https://github.com/jessicategner/pypandoc/issues/186
915 self.ofh.write("\nXXXXX In make_report, calling pypandoc.convert_file...\n\n")
916 pypandoc.convert_file(self.report_md, 832 pypandoc.convert_file(self.report_md,
917 'html', 833 'html',
918 extra_args=['--pdf-engine=weasyprint', '-V', '-css=%s' % self.pima_css], 834 extra_args=['--pdf-engine=weasyprint', '-V', '-css=%s' % self.pima_css],
919 outputfile='pima_report.pdf') 835 outputfile='pima_report.pdf')
920 self.ofh.close()
921 836
922 837
923 parser = argparse.ArgumentParser() 838 parser = argparse.ArgumentParser()
924 839
925 parser.add_argument('--amr_deletions_file', action='store', dest='amr_deletions_file', help='AMR deletions BED file') 840 parser.add_argument('--amr_deletions_file', action='store', dest='amr_deletions_file', help='AMR deletions BED file')