Mercurial > repos > greg > pima_report
comparison pima_report.py @ 1:67d0939b56b0 draft
Uploaded
author | greg |
---|---|
date | Tue, 07 Mar 2023 16:05:14 +0000 |
parents | 0a558f444c98 |
children | 9cb62054a87a |
comparison
equal
deleted
inserted
replaced
0:0a558f444c98 | 1:67d0939b56b0 |
---|---|
13 CDC_ADVISORY = 'The analysis and report presented here should be treated as preliminary. Please contact the CDC/BDRD with any results regarding _Bacillus anthracis_.' | 13 CDC_ADVISORY = 'The analysis and report presented here should be treated as preliminary. Please contact the CDC/BDRD with any results regarding _Bacillus anthracis_.' |
14 | 14 |
15 | 15 |
16 class PimaReport: | 16 class PimaReport: |
17 | 17 |
18 def __init__(self, analysis_name=None, assembly_fasta_file=None, assembly_name=None, feature_bed_files=None, feature_png_files=None, | 18 def __init__(self, analysis_name=None, amr_deletions_file=None, amr_matrix_files=None, assembly_fasta_file=None, |
19 contig_coverage_file=None, dbkey=None, gzipped=None, illumina_fastq_file=None, mutation_regions_bed_file=None, | 19 assembly_name=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, |
20 mutation_regions_tsv_files=None, pima_css=None): | 20 dnadiff_snps_file=None, feature_bed_files=None, feature_png_files=None, flye_assembly_info_file=None, |
21 flye_version=None, genome_insertions_file=None, gzipped=None, illumina_fastq_file=None, | |
22 mutation_regions_bed_file=None, mutation_regions_tsv_files=None, pima_css=None, plasmids_file=None, | |
23 reference_insertions_file=None): | |
21 self.ofh = open("process_log.txt", "w") | 24 self.ofh = open("process_log.txt", "w") |
22 | 25 |
26 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) | |
27 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) | |
23 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) | 28 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) |
24 self.ofh.write("assembly_fasta_file: %s\n" % str(assembly_fasta_file)) | 29 self.ofh.write("assembly_fasta_file: %s\n" % str(assembly_fasta_file)) |
25 self.ofh.write("assembly_name: %s\n" % str(assembly_name)) | 30 self.ofh.write("assembly_name: %s\n" % str(assembly_name)) |
31 self.ofh.write("compute_sequence_length_file: %s\n" % str(compute_sequence_length_file)) | |
32 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file)) | |
33 self.ofh.write("dbkey: %s\n" % str(dbkey)) | |
34 self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file)) | |
26 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) | 35 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) |
27 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) | 36 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) |
28 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file)) | 37 self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file)) |
29 self.ofh.write("dbkey: %s\n" % str(dbkey)) | 38 self.ofh.write("flye_version: %s\n" % str(flye_version)) |
30 self.ofh.write("gzipped: %s\n" % str(gzipped)) | 39 self.ofh.write("gzipped: %s\n" % str(gzipped)) |
40 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) | |
31 self.ofh.write("illumina_fastq_file: %s\n" % str(illumina_fastq_file)) | 41 self.ofh.write("illumina_fastq_file: %s\n" % str(illumina_fastq_file)) |
32 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) | 42 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) |
33 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) | 43 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) |
34 self.ofh.write("pima_css: %s\n" % str(pima_css)) | 44 self.ofh.write("pima_css: %s\n" % str(pima_css)) |
45 self.ofh.write("plasmids_file: %s\n" % str(plasmids_file)) | |
46 # self.ofh.write("reference_genome: %s\n" % str(reference_genome)) | |
47 self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file)) | |
35 | 48 |
36 # General | 49 # General |
37 self.doc = None | 50 self.doc = None |
38 self.report_md = 'pima_report.md' | 51 self.report_md = 'pima_report.md' |
39 | 52 |
40 # Inputs | 53 # Inputs |
54 self.amr_deletions_file = amr_deletions_file | |
55 self.amr_matrix_files = amr_matrix_files | |
41 self.analysis_name = analysis_name | 56 self.analysis_name = analysis_name |
42 self.assembly_fasta_file = assembly_fasta_file | 57 self.assembly_fasta_file = assembly_fasta_file |
43 self.assembly_name = assembly_name | 58 self.assembly_name = assembly_name |
59 self.compute_sequence_length_file = compute_sequence_length_file | |
60 self.contig_coverage_file = contig_coverage_file | |
61 self.dbkey = dbkey | |
62 self.dnadiff_snps_file = dnadiff_snps_file | |
44 self.feature_bed_files = feature_bed_files | 63 self.feature_bed_files = feature_bed_files |
45 self.feature_png_files = feature_png_files | 64 self.feature_png_files = feature_png_files |
46 self.contig_coverage_file = contig_coverage_file | 65 self.flye_assembly_info_file = flye_assembly_info_file |
47 self.dbkey = dbkey | 66 self.flye_version = flye_version |
48 self.gzipped = gzipped | 67 self.gzipped = gzipped |
68 self.genome_insertions_file = genome_insertions_file | |
49 self.illumina_fastq_file = illumina_fastq_file | 69 self.illumina_fastq_file = illumina_fastq_file |
50 self.mutation_regions_bed_file = mutation_regions_bed_file | 70 self.mutation_regions_bed_file = mutation_regions_bed_file |
51 self.mutation_regions_tsv_files = mutation_regions_tsv_files | 71 self.mutation_regions_tsv_files = mutation_regions_tsv_files |
52 self.read_type = 'Illumina' | 72 self.read_type = 'Illumina' |
53 self.ont_bases = None | 73 self.ont_bases = None |
54 self.ont_n50 = None | 74 self.ont_n50 = None |
55 self.ont_read_count = None | 75 self.ont_read_count = None |
56 self.pima_css = pima_css | 76 self.pima_css = pima_css |
77 self.plasmids_file = plasmids_file | |
78 # self.reference_genome = reference_genome | |
79 self.reference_insertions_file = reference_insertions_file | |
57 | 80 |
58 # Titles | 81 # Titles |
59 self.alignment_title = 'Comparison with reference' | 82 self.alignment_title = 'Comparison with reference' |
60 self.alignment_notes_title = 'Alignment notes' | 83 self.alignment_notes_title = 'Alignment notes' |
61 self.amr_matrix_title = 'AMR matrix' | 84 self.amr_matrix_title = 'AMR matrix' |
71 self.large_indel_title = 'Large insertions & deletions' | 94 self.large_indel_title = 'Large insertions & deletions' |
72 self.methods_title = 'Methods' | 95 self.methods_title = 'Methods' |
73 self.mutation_title = 'Mutations found in the sample' | 96 self.mutation_title = 'Mutations found in the sample' |
74 self.mutation_methods_title = 'Mutation screening' | 97 self.mutation_methods_title = 'Mutation screening' |
75 self.plasmid_methods_title = 'Plasmid annotation' | 98 self.plasmid_methods_title = 'Plasmid annotation' |
99 self.plasmid_title = 'Plasmid annotation' | |
76 self.reference_methods_title = 'Reference comparison' | 100 self.reference_methods_title = 'Reference comparison' |
77 self.snp_indel_title = 'SNPs and small indels' | 101 self.snp_indel_title = 'SNPs and small indels' |
78 #self.summary_title = 'Summary' | |
79 self.summary_title = 'Analysis of %s' % analysis_name | 102 self.summary_title = 'Analysis of %s' % analysis_name |
80 | 103 |
81 # Methods | 104 # Methods |
82 self.methods = pandas.Series(dtype='float64') | 105 self.methods = pandas.Series(dtype='float64') |
83 self.methods[self.contamination_methods_title] = pandas.Series(dtype='float64') | 106 self.methods[self.contamination_methods_title] = pandas.Series(dtype='float64') |
96 self.contig_alignment = pandas.Series(dtype=object) | 119 self.contig_alignment = pandas.Series(dtype=object) |
97 | 120 |
98 # Values | 121 # Values |
99 self.assembly_size = 0 | 122 self.assembly_size = 0 |
100 self.contig_info = None | 123 self.contig_info = None |
101 self.did_flye_ont_fastq = False | |
102 self.did_medaka_ont_assembly = False | 124 self.did_medaka_ont_assembly = False |
103 self.feature_hits = pandas.Series(dtype='float64') | 125 self.feature_hits = pandas.Series(dtype='float64') |
104 self.illumina_length_mean = 0 | 126 self.illumina_length_mean = 0 |
105 self.illumina_read_count = 0 | 127 self.illumina_read_count = 0 |
106 self.illumina_bases = 0 | 128 self.illumina_bases = 0 |
107 self.mean_coverage = 0 | 129 self.mean_coverage = 0 |
108 self.num_assembly_contigs = 0 | 130 self.num_assembly_contigs = 0 |
131 # TODO: should the following 2 values be passed as parameters? | |
132 self.ont_n50_min = 2500 | |
133 self.ont_coverage_min = 30 | |
134 self.quast_indels = 0 | |
135 self.quast_mismatches = 0 | |
109 | 136 |
110 # Actions | 137 # Actions |
111 self.did_guppy_ont_fast5 = False | 138 self.did_guppy_ont_fast5 = False |
112 self.did_qcat_ont_fastq = False | 139 self.did_qcat_ont_fastq = False |
113 self.info_illumina_fastq() | 140 self.info_illumina_fastq() |
137 def load_contig_info(self): | 164 def load_contig_info(self): |
138 self.contig_info = pandas.Series(dtype=object) | 165 self.contig_info = pandas.Series(dtype=object) |
139 self.contig_info[self.read_type] = pandas.read_csv(self.contig_coverage_file, header=None, index_col=None, sep='\t').sort_values(1, axis=0, ascending=False) | 166 self.contig_info[self.read_type] = pandas.read_csv(self.contig_coverage_file, header=None, index_col=None, sep='\t').sort_values(1, axis=0, ascending=False) |
140 self.contig_info[self.read_type].columns = ['contig', 'size', 'coverage'] | 167 self.contig_info[self.read_type].columns = ['contig', 'size', 'coverage'] |
141 self.mean_coverage = (self.contig_info[self.read_type].iloc[:, 1] * self.contig_info[self.read_type].iloc[:, 2]).sum() / self.contig_info[self.read_type].iloc[:, 1].sum() | 168 self.mean_coverage = (self.contig_info[self.read_type].iloc[:, 1] * self.contig_info[self.read_type].iloc[:, 2]).sum() / self.contig_info[self.read_type].iloc[:, 1].sum() |
169 if self.mean_coverage <= self.ont_coverage_min: | |
170 warning = '%s mean coverage ({:.0f}X) is less than the recommended minimum ({:.0f}X).'.format(self.mean_coverage, self.ont_coverage_min) % self.read_type | |
171 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
172 # Report if some contigs have low coverage. | |
173 low_coverage = self.contig_info[self.read_type].loc[self.contig_info[self.read_type]['coverage'] < self.ont_coverage_min, :] | |
174 if low_coverage.shape[0] >= 0: | |
175 for contig_i in range(low_coverage.shape[0]): | |
176 warning = '%s coverage of {:s} ({:.0f}X) is less than the recommended minimum ({:.0f}X).'.format(low_coverage.iloc[contig_i, 0], low_coverage.iloc[contig_i, 2], self.ont_coverage_min) % self.read_type | |
177 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
178 # See if some contigs have anolously low coverage. | |
179 fold_coverage = self.contig_info[self.read_type]['coverage'] / self.mean_coverage | |
180 low_coverage = self.contig_info[self.read_type].loc[fold_coverage < 1 / 5, :] | |
181 if low_coverage.shape[0] >= 0 : | |
182 for contig_i in range(low_coverage.shape[0]): | |
183 warning = '%s coverage of {:s} ({:.0f}X) is less than 1/5 the mean coverage ({:.0f}X).'.format(low_coverage.iloc[contig_i, 0], low_coverage.iloc[contig_i, 2], self.mean_coverage) % self.read_type | |
184 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
142 | 185 |
143 def load_fasta(self, fasta): | 186 def load_fasta(self, fasta): |
144 sequence = pandas.Series(dtype=object) | 187 sequence = pandas.Series(dtype=object) |
145 for contig in SeqIO.parse(fasta, 'fasta'): | 188 for contig in SeqIO.parse(fasta, 'fasta'): |
146 sequence[contig.id] = contig | 189 sequence[contig.id] = contig |
183 # self.illumina_length_mean /= len(self.illumina_fastq) | 226 # self.illumina_length_mean /= len(self.illumina_fastq) |
184 self.illumina_length_mean /= 1 | 227 self.illumina_length_mean /= 1 |
185 self.illumina_bases = self.format_kmg(self.illumina_bases, decimals=1) | 228 self.illumina_bases = self.format_kmg(self.illumina_bases, decimals=1) |
186 | 229 |
187 def start_doc(self): | 230 def start_doc(self): |
188 #header_text = 'Analysis of %s' % self.analysis_name | |
189 self.doc = MdUtils(file_name=self.report_md, title='') | 231 self.doc = MdUtils(file_name=self.report_md, title='') |
190 | 232 |
191 def add_run_information(self): | 233 def add_run_information(self): |
192 self.ofh.write("\nXXXXXX In add_run_information\n\n") | 234 self.ofh.write("\nXXXXXX In add_run_information\n\n") |
193 self.doc.new_line() | 235 self.doc.new_line() |
254 'Illumina bases', | 296 'Illumina bases', |
255 '{:s}'.format(self.illumina_bases) | 297 '{:s}'.format(self.illumina_bases) |
256 ] | 298 ] |
257 self.doc.new_table(columns=2, rows=4, text=Table_List, text_align='left') | 299 self.doc.new_table(columns=2, rows=4, text=Table_List, text_align='left') |
258 | 300 |
301 def evaluate_assembly(self) : | |
302 assembly_info = pandas.read_csv(self.compute_sequence_length_file, sep='\t', header=None) | |
303 assembly_info.columns = ['contig', 'length'] | |
304 self.contig_sizes = assembly_info | |
305 # Take a look at the number of contigs, their sizes, | |
306 # and circularity. Warn if things don't look good. | |
307 if assembly_info.shape[0] > 4: | |
308 warning = 'Assembly produced {:d} contigs, more than ususally expected; assembly may be fragmented'.format(assembly_info.shape[0]) | |
309 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
310 small_contigs = assembly_info.loc[assembly_info['length'] <= 3000, :] | |
311 if small_contigs.shape[0] > 0: | |
312 warning = 'Assembly produced {:d} small contigs ({:s}); assembly may include spurious sequences.'.format(small_contigs.shape[0], ', '.join(small_contigs['contig'])) | |
313 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
314 | |
259 def add_assembly_information(self): | 315 def add_assembly_information(self): |
260 self.ofh.write("\nXXXXXX In add_assembly_information\n\n") | 316 self.ofh.write("\nXXXXXX In add_assembly_information\n\n") |
261 if self.assembly_fasta_file is None: | 317 if self.assembly_fasta_file is None: |
262 return | 318 return |
263 self.load_assembly() | 319 self.load_assembly() |
289 'END{printf "%d\\t%d\\t%d\\n", n50, (NR - 1), s;exit}\'']) | 345 'END{printf "%d\\t%d\\t%d\\n", n50, (NR - 1), s;exit}\'']) |
290 result = list(re.split('\\t', self.run_command(command)[0])) | 346 result = list(re.split('\\t', self.run_command(command)[0])) |
291 if result[1] == '0': | 347 if result[1] == '0': |
292 self.error_out('No ONT reads found') | 348 self.error_out('No ONT reads found') |
293 ont_n50, ont_read_count, ont_raw_bases = [int(i) for i in result] | 349 ont_n50, ont_read_count, ont_raw_bases = [int(i) for i in result] |
294 | |
295 command = ' '.join([opener, | 350 command = ' '.join([opener, |
296 fastq_file, | 351 fastq_file, |
297 '| awk \'{getline;print length($0);getline;getline;}\'']) | 352 '| awk \'{getline;print length($0);getline;getline;}\'']) |
298 result = self.run_command(command) | 353 result = self.run_command(command) |
299 result = list(filter(lambda x: x != '', result)) | 354 result = list(filter(lambda x: x != '', result)) |
300 ont_read_lengths = [int(i) for i in result] | 355 # TODO: the following are not yet used... |
301 | 356 # ont_read_lengths = [int(i) for i in result] |
302 return ([ont_n50, ont_read_count, ont_raw_bases, ont_read_lengths]) | 357 # ont_bases = self.format_kmg(ont_raw_bases, decimals=1) |
358 if ont_n50 <= self.ont_n50_min: | |
359 warning = 'ONT N50 (%s) is less than the recommended minimum (%s)' % (str(ont_n50), str(self.ont_n50_min)) | |
360 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
303 | 361 |
304 def wordwrap_markdown(self, string): | 362 def wordwrap_markdown(self, string): |
305 if string: | 363 if string: |
306 if len(string) < 35: | 364 if len(string) < 35: |
307 return string | 365 return string |
345 return | 403 return |
346 self.doc.new_line() | 404 self.doc.new_line() |
347 self.doc.new_line('<div style="page-break-after: always;"></div>') | 405 self.doc.new_line('<div style="page-break-after: always;"></div>') |
348 self.doc.new_line() | 406 self.doc.new_line() |
349 self.doc.new_header(2, self.assembly_notes_title) | 407 self.doc.new_header(2, self.assembly_notes_title) |
350 #for note in self.analysis.assembly_notes: | 408 for note in self.assembly_notes: |
351 # self.doc.new_line(note) | 409 self.doc.new_line(note) |
352 | 410 |
353 def add_contamination(self): | 411 def add_contamination(self): |
354 self.ofh.write("\nXXXXXX In add_contamination\n\n") | 412 self.ofh.write("\nXXXXXX In add_contamination\n\n") |
355 if len(self.kraken_fracs) == 0: | 413 if len(self.kraken_fracs) == 0: |
356 return | 414 return |
382 self.doc.new_header(level=3, title=self.snp_indel_title) | 440 self.doc.new_header(level=3, title=self.snp_indel_title) |
383 Table_1 = [ | 441 Table_1 = [ |
384 "Category", | 442 "Category", |
385 "Quantity", | 443 "Quantity", |
386 'SNPs', | 444 'SNPs', |
387 #'{:,}'.format(self.analysis.quast_mismatches), | 445 '{:,}'.format(self.quast_mismatches), |
388 'NA' | |
389 'Small indels', | 446 'Small indels', |
390 #'{:,}'.format(self.analysis.quast_indels) | 447 '{:,}'.format(self.quast_indels) |
391 'NA' | |
392 ] | 448 ] |
393 self.doc.new_table(columns=2, rows=3, text=Table_1, text_align='left') | 449 self.doc.new_table(columns=2, rows=3, text=Table_1, text_align='left') |
394 self.doc.new_line('<div style="page-break-after: always;"></div>') | 450 self.doc.new_line('<div style="page-break-after: always;"></div>') |
395 self.doc.new_line() | 451 self.doc.new_line() |
396 if len(self.alignment_notes) > 0: | 452 if len(self.alignment_notes) > 0: |
442 for i in range(contig_features.shape[0]): | 498 for i in range(contig_features.shape[0]): |
443 self.ofh.write("i: %s\n" % str(i)) | 499 self.ofh.write("i: %s\n" % str(i)) |
444 feature = contig_features.iloc[i, :].copy(deep=True) | 500 feature = contig_features.iloc[i, :].copy(deep=True) |
445 self.ofh.write("feature: %s\n" % str(feature)) | 501 self.ofh.write("feature: %s\n" % str(feature)) |
446 feature[4] = '{:.3f}'.format(feature[4]) | 502 feature[4] = '{:.3f}'.format(feature[4]) |
447 Table_List = Table_List + feature[1:].values.tolist() | 503 # FIXME: Uncommenting the following line (which is |
504 # https://github.com/appliedbinf/pima_md/blob/main/MarkdownReport.py#L317) | |
505 # will cause the job to fail with this exception: | |
506 # ValueError: columns * rows is not equal to text length | |
507 # Table_List = Table_List + feature[1:].values.tolist() | |
448 self.ofh.write("Table_List: %s\n" % str(Table_List)) | 508 self.ofh.write("Table_List: %s\n" % str(Table_List)) |
449 row_count = int(len(Table_List) / 5) | 509 row_count = int(len(Table_List) / 5) |
450 self.ofh.write("row_count: %s\n" % str(row_count)) | 510 self.ofh.write("row_count: %s\n" % str(row_count)) |
451 self.doc.new_line() | 511 self.doc.new_line() |
452 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left') | 512 self.ofh.write("Before new_table, len(Table_List):: %s\n" % str(len(Table_List))) |
513 self.doc.new_table(columns=5, rows=row_count, text=Table_List, text_align='left') | |
453 blastn_version = 'The genome assembly was queried for features using blastn.' | 514 blastn_version = 'The genome assembly was queried for features using blastn.' |
454 bedtools_version = 'Feature hits were clustered using bedtools and the highest scoring hit for each cluster was reported.' | 515 bedtools_version = 'Feature hits were clustered using bedtools and the highest scoring hit for each cluster was reported.' |
455 method = '%s %s' % (blastn_version, bedtools_version) | 516 method = '%s %s' % (blastn_version, bedtools_version) |
456 self.methods[self.feature_methods_title] = self.methods[self.feature_methods_title].append(pandas.Series(method)) | 517 self.methods[self.feature_methods_title] = self.methods[self.feature_methods_title].append(pandas.Series(method)) |
457 | 518 |
467 | 528 |
468 def add_mutations(self): | 529 def add_mutations(self): |
469 self.ofh.write("\nXXXXXX In add_mutations\n\n") | 530 self.ofh.write("\nXXXXXX In add_mutations\n\n") |
470 if len(self.mutation_regions_tsv_files) == 0: | 531 if len(self.mutation_regions_tsv_files) == 0: |
471 return | 532 return |
472 try: | 533 try : |
473 mutation_regions = pandas.read_csv(self.mutation_regions_bed_file, sep='\t', header=0, index_col=False) | 534 mutation_regions = pandas.read_csv(self.mutation_regions_bed_file, sep='\t', header=0, index_col=False) |
474 except Exception: | 535 except Exception: |
475 # Likely an empty file. | 536 # Likely an empty file. |
476 return | 537 return |
538 # TODO: this is the only place where reference_genome is used, | |
539 # so I'm commenting it out for now. We need to confirm if these | |
540 # errors that require the reference genmoe being passed are necessary. | |
541 # If so, we'll need to implement data tables in this tool. | |
542 # Make sure that the positions in the BED file fall within | |
543 # the chromosomes provided in the reference sequence. | |
544 """ | |
545 for mutation_region in range(mutation_regions.shape[0]): | |
546 mutation_region = mutation_regions.iloc[mutation_region, :] | |
547 if not (mutation_region[0] in self.reference_genome): | |
548 self.ofh.write("\nMutation region: %s not found in reference genome.\n" % ' '.join(mutation_region.astype(str))) | |
549 continue | |
550 if not isinstance(mutation_region[1], int): | |
551 self.ofh.write("\nNon-integer found in mutation region start (column 2): %s.\n" % str(mutation_region[1])) | |
552 break | |
553 elif not isinstance(mutation_region[2], int): | |
554 self.ofh.write("\nNon-integer found in mutation region start (column 3): %s.\n" % str(mutation_region[2])) | |
555 break | |
556 if mutation_region[1] <= 0 or mutation_region[2] <= 0: | |
557 self.ofh.write("\nMutation region %s starts before the reference sequence.\n" % ' '.join(mutation_region.astype(str))) | |
558 if mutation_region[1] > len(self.reference_genome[mutation_region[0]].seq) or mutation_region[2] > len(self.reference_genome[mutation_region[0]].seq): | |
559 self.ofh.write("\nMutation region %s ends after the reference sequence.\n" % ' '.join(mutation_region.astype(str))) | |
560 """ | |
477 amr_mutations = pandas.Series(dtype=object) | 561 amr_mutations = pandas.Series(dtype=object) |
478 for region_i in range(mutation_regions.shape[0]): | 562 for region_i in range(mutation_regions.shape[0]): |
479 region = mutation_regions.iloc[region_i, :] | 563 region = mutation_regions.iloc[region_i, :] |
480 region_name = str(region['name']) | 564 region_name = str(region['name']) |
481 self.ofh.write("Processing mutations for region %s\n" % region_name) | 565 self.ofh.write("Processing mutations for region %s\n" % region_name) |
482 region_mutations_tsv_name = '%s_mutations.tsv' % region_name | 566 region_mutations_tsv_name = '%s_mutations.tsv' % region_name |
483 if region_mutations_tsv_name not in self.mutation_regions_tsv_files: | 567 if region_mutations_tsv_name not in self.mutation_regions_tsv_files: |
484 continue | 568 continue |
485 region_mutations_tsv = self.mutation_regions_tsv_files[region_mutations_tsv_name] | 569 region_mutations_tsv = self.mutation_regions_tsv_files[region_mutations_tsv_name] |
486 try: | 570 try : |
487 region_mutations = pandas.read_csv(region_mutations_tsv, sep='\t', header=0, index_col=False) | 571 region_mutations = pandas.read_csv(region_mutations_tsv, sep='\t', header=0, index_col=False) |
488 except Exception: | 572 except Exception: |
489 region_mutations = pandas.DataFrame() | 573 region_mutations = pandas.DataFrame() |
490 if region_mutations.shape[0] == 0: | 574 if region_mutations.shape[0] == 0: |
491 continue | 575 continue |
492 # Figure out what kind of mutations are in this region | 576 # Figure out what kind of mutations are in this region. |
493 region_mutation_types = pandas.Series(['snp'] * region_mutations.shape[0], name='TYPE', index=region_mutations.index) | 577 region_mutation_types = pandas.Series(['snp'] * region_mutations.shape[0], name='TYPE', index=region_mutations.index) |
494 region_mutation_types[region_mutations['REF'].str.len() != region_mutations['ALT'].str.len()] = 'small-indel' | 578 region_mutation_types[region_mutations['REF'].str.len() != region_mutations['ALT'].str.len()] = 'small-indel' |
495 region_mutation_drugs = pandas.Series(region['drug'] * region_mutations.shape[0], name='DRUG', index=region_mutations.index) | 579 region_mutation_drugs = pandas.Series(region['drug'] * region_mutations.shape[0], name='DRUG', index=region_mutations.index) |
496 region_notes = pandas.Series(region['note'] * region_mutations.shape[0], name='NOTE', index=region_mutations.index) | 580 region_notes = pandas.Series(region['note'] * region_mutations.shape[0], name='NOTE', index=region_mutations.index) |
497 region_mutations = pandas.concat([region_mutations, region_mutation_types, region_mutation_drugs, region_notes], axis=1) | 581 region_mutations = pandas.concat([region_mutations, region_mutation_types, region_mutation_drugs, region_notes], axis=1) |
519 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) | 603 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) |
520 | 604 |
521 def add_amr_matrix(self): | 605 def add_amr_matrix(self): |
522 self.ofh.write("\nXXXXXX In add_amr_matrix\n\n") | 606 self.ofh.write("\nXXXXXX In add_amr_matrix\n\n") |
523 # Make sure that we have an AMR matrix to plot | 607 # Make sure that we have an AMR matrix to plot |
524 #if not getattr(self.analysis, 'did_draw_amr_matrix', False): | 608 if len(self.amr_matrix_files) == 0: |
525 # return | 609 return |
526 #amr_matrix_png = self.analysis.amr_matrix_png | 610 self.doc.new_line() |
527 #self.doc.new_line() | 611 self.doc.new_header(level=2, title=self.amr_matrix_title) |
528 #self.doc.new_header(level=2, title=self.amr_matrix_title) | 612 self.doc.new_line('AMR genes and mutations with their corresponding drugs') |
529 #self.doc.new_line('AMR genes and mutations with their corresponding drugs.') | 613 for amr_matrix_file in self.amr_matrix_files: |
530 #self.doc.new_line( | 614 self.doc.new_line(self.doc.new_inline_image(text='AMR genes and mutations with their corresponding drugs', |
531 # self.doc.new_inline_image( | 615 path=os.path.abspath(amr_matrix_file))) |
532 # text='AMR genes and mutations with their corresponding drugs', | |
533 # path=amr_matrix_png | |
534 # ) | |
535 #) | |
536 pass | |
537 | 616 |
538 def add_large_indels(self): | 617 def add_large_indels(self): |
539 self.ofh.write("\nXXXXXX In add_large_indels\n\n") | 618 self.ofh.write("\nXXXXXX In add_large_indels\n\n") |
540 # Make sure we looked for mutations | 619 large_indels = pandas.Series(dtype='float64') |
541 #if len(self.analysis.large_indels) == 0: | 620 # Pull in insertions. |
542 # return | 621 try: |
543 #large_indels = self.analysis.large_indels | 622 reference_insertions = pandas.read_csv(filepath_or_buffer=self.reference_insertions_file, sep='\t', header=None) |
544 #self.doc.new_line() | 623 except Exception: |
545 #self.doc.new_header(level=2, title=self.large_indel_title) | 624 reference_insertions = pandas.DataFrame() |
546 #for genome in ['Reference insertions', 'Query insertions']: | 625 try: |
547 # genome_indels = large_indels[genome].copy() | 626 genome_insertions = pandas.read_csv(filepath_or_buffer=self.genome_insertions_file, sep='\t', header=None) |
548 # self.doc.new_line() | 627 except Exception: |
549 # self.doc.new_header(level=3, title=genome) | 628 genome_insertions = pandas.DataFrame() |
550 # if (genome_indels.shape[0] == 0): | 629 large_indels['Reference insertions'] = reference_insertions |
551 # continue | 630 large_indels['Query insertions'] = genome_insertions |
552 # genome_indels.iloc[:, 1] = genome_indels.iloc[:, 1].apply(lambda x: '{:,}'.format(x)) | 631 # TODO: we don't seem to be reporting snps and deletions for some reason... |
553 # genome_indels.iloc[:, 2] = genome_indels.iloc[:, 2].apply(lambda x: '{:,}'.format(x)) | 632 # Pull in the number of SNPs and small indels. |
554 # genome_indels.iloc[:, 3] = genome_indels.iloc[:, 3].apply(lambda x: '{:,}'.format(x)) | 633 try: |
555 # Table_List = [ | 634 snps = pandas.read_csv(filepath_or_buffer=self.dnadiff_snps_file, sep='\t', header=None) |
556 # 'Reference contig', 'Start', 'Stop', 'Size (bp)' | 635 # TODO: the following is not used... |
557 # ] | 636 # small_indels = snps.loc[(snps.iloc[:, 1] == '.') | (snps.iloc[:, 2] == '.'), :] |
558 # for i in range(genome_indels.shape[0]): | 637 snps = snps.loc[(snps.iloc[:, 1] != '.') & (snps.iloc[:, 2] != '.'), :] |
559 # Table_List = Table_List + genome_indels.iloc[i, :].values.tolist() | 638 except Exception: |
560 # row_count = int(len(Table_List) / 4) | 639 snps = pandas.DataFrame() |
561 # self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') | 640 # Pull in deletions. |
562 #method = 'Large insertions or deletions were found as the complement of aligned regions using bedtools.' | 641 try: |
563 #self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append( | 642 amr_deletions = pandas.read_csv(filepath_or_buffer=self.amr_deletion_file, sep='\t', header=None) |
564 # pandas.Series(method)) | 643 except Exception: |
565 #self.doc.new_line() | 644 amr_deletions = pandas.DataFrame() |
566 #self.doc.new_line('<div style="page-break-after: always;"></div>') | 645 if amr_deletions.shape[0] > 0: |
567 #self.doc.new_line() | 646 amr_deletions.columns = ['contig', 'start', 'stop', 'name', 'type', 'drug', 'note'] |
568 pass | 647 amr_deletions = amr_deletions.loc[amr_deletions['type'].isin(['large-deletion', 'any']), :] |
648 self.doc.new_line() | |
649 self.doc.new_header(level=2, title=self.large_indel_title) | |
650 for genome in ['Reference insertions', 'Query insertions']: | |
651 genome_indels = large_indels[genome].copy() | |
652 self.doc.new_line() | |
653 self.doc.new_header(level=3, title=genome) | |
654 if (genome_indels.shape[0] == 0): | |
655 continue | |
656 genome_indels.iloc[:, 1] = genome_indels.iloc[:, 1].apply(lambda x: '{:,}'.format(x)) | |
657 genome_indels.iloc[:, 2] = genome_indels.iloc[:, 2].apply(lambda x: '{:,}'.format(x)) | |
658 genome_indels.iloc[:, 3] = genome_indels.iloc[:, 3].apply(lambda x: '{:,}'.format(x)) | |
659 Table_List = [ | |
660 'Reference contig', 'Start', 'Stop', 'Size (bp)' | |
661 ] | |
662 for i in range(genome_indels.shape[0]): | |
663 Table_List = Table_List + genome_indels.iloc[i, :].values.tolist() | |
664 row_count = int(len(Table_List) / 4) | |
665 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') | |
666 method = 'Large insertions or deletions were found as the complement of aligned regions using bedtools.' | |
667 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) | |
668 self.doc.new_line() | |
669 self.doc.new_line('<div style="page-break-after: always;"></div>') | |
670 self.doc.new_line() | |
569 | 671 |
570 def add_plasmids(self): | 672 def add_plasmids(self): |
571 self.ofh.write("\nXXXXXX In add_plasmids\n\n") | 673 try : |
572 """ | 674 plasmids = pandas.read_csv(filepath_or_buffer=self.plasmids_file, sep='\t', header=0) |
573 if not getattr(self.analysis, 'did_call_plasmids', False): | 675 except Exception: |
574 return | |
575 # Make sure we looked for mutations | |
576 #plasmids = self.analysis.plasmids | |
577 if plasmids is None: | |
578 return | 676 return |
579 plasmids = plasmids.copy() | 677 plasmids = plasmids.copy() |
580 self.doc.new_line() | 678 self.doc.new_line() |
581 #self.doc.new_header(level=2, title=self.analysis.plasmid_title) | 679 self.doc.new_header(level=2, title=self.plasmid_title) |
582 if (plasmids.shape[0] == 0): | 680 if (plasmids.shape[0] == 0): |
583 self.doc.new_line('None') | 681 self.doc.new_line('None') |
584 return | 682 return |
585 plasmids.iloc[:, 3] = plasmids.iloc[:, 3].apply(lambda x: '{:,}'.format(x)) | 683 plasmids.iloc[:, 3] = plasmids.iloc[:, 3].apply(lambda x: '{:,}'.format(x)) |
586 plasmids.iloc[:, 4] = plasmids.iloc[:, 4].apply(lambda x: '{:,}'.format(x)) | 684 plasmids.iloc[:, 4] = plasmids.iloc[:, 4].apply(lambda x: '{:,}'.format(x)) |
587 plasmids.iloc[:, 5] = plasmids.iloc[:, 5].apply(lambda x: '{:,}'.format(x)) | 685 plasmids.iloc[:, 5] = plasmids.iloc[:, 5].apply(lambda x: '{:,}'.format(x)) |
588 Table_List = [ | 686 Table_List = ['Genome contig', 'Plasmid hit', 'Plasmid acc.', 'Contig size', 'Aliged', 'Plasmid size'] |
589 'Genome contig', | |
590 'Plasmid hit', | |
591 'Plasmid acc.', | |
592 'Contig size', | |
593 'Aliged', | |
594 'Plasmid size' | |
595 ] | |
596 for i in range(plasmids.shape[0]): | 687 for i in range(plasmids.shape[0]): |
597 Table_List = Table_List + plasmids.iloc[i, 0:6].values.tolist() | 688 Table_List = Table_List + plasmids.iloc[i, 0:6].values.tolist() |
598 row_count = int(len(Table_List) / 6) | 689 row_count = int(len(Table_List) / 6) |
599 self.doc.new_table(columns=6, rows=row_count, text=Table_List, text_align='left') | 690 self.doc.new_table(columns=6, rows=row_count, text=Table_List, text_align='left') |
600 method = 'The plasmid reference database was queried against the genome assembly using minimap2.' | 691 method = 'The plasmid reference database was queried against the genome assembly using minimap2.' |
601 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) | 692 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) |
602 method = 'The resulting SAM was converted to a PSL using a custom version of sam2psl.' | 693 method = 'The resulting SAM was converted to a PSL using a custom version of sam2psl.' |
603 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) | 694 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) |
604 method = 'Plasmid-to-genome hits were resolved using the pChunks algorithm.' | 695 method = 'Plasmid-to-genome hits were resolved using the pChunks algorithm.' |
605 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) | 696 self.methods[self.plasmid_methods_title] = self.methods[self.plasmid_methods_title].append(pandas.Series(method)) |
606 """ | |
607 pass | |
608 | 697 |
609 def add_methods(self): | 698 def add_methods(self): |
610 self.ofh.write("\nXXXXXX In add_methods\n\n") | 699 self.ofh.write("\nXXXXXX In add_methods\n\n") |
611 self.doc.new_line('<div style="page-break-after: always;"></div>') | 700 self.doc.new_line('<div style="page-break-after: always;"></div>') |
612 self.doc.new_line() | 701 self.doc.new_line() |
613 if len(self.methods) == 0: | 702 if len(self.methods) == 0: |
614 return | 703 return |
615 self.doc.new_line() | 704 self.doc.new_line() |
616 self.doc.new_header(level=2, title=self.methods_title) | 705 self.doc.new_header(level=2, title=self.methods_title) |
617 | |
618 for methods_section in self.methods.index.tolist(): | 706 for methods_section in self.methods.index.tolist(): |
619 if self.methods[methods_section] is None or len(self.methods[methods_section]) == 0: | 707 if self.methods[methods_section] is None or len(self.methods[methods_section]) == 0: |
620 continue | 708 continue |
621 self.doc.new_line() | 709 self.doc.new_line() |
622 self.doc.new_header(level=3, title=methods_section) | 710 self.doc.new_header(level=3, title=methods_section) |
637 methods += ['ONT reads were basecalled using guppy'] | 725 methods += ['ONT reads were basecalled using guppy'] |
638 if self.did_qcat_ont_fastq: | 726 if self.did_qcat_ont_fastq: |
639 methods += ['ONT reads were demultiplexed and trimmed using qcat'] | 727 methods += ['ONT reads were demultiplexed and trimmed using qcat'] |
640 self.methods[self.basecalling_methods_title] = pandas.Series(methods) | 728 self.methods[self.basecalling_methods_title] = pandas.Series(methods) |
641 self.add_illumina_library_information() | 729 self.add_illumina_library_information() |
730 self.add_contig_info() | |
731 self.evaluate_assembly() | |
642 self.add_assembly_information() | 732 self.add_assembly_information() |
643 self.add_contig_info() | 733 if self.flye_assembly_info_file is not None: |
644 self.add_assembly_notes() | 734 method = 'ONT reads were assembled using %s' % self.flye_version |
645 if self.did_flye_ont_fastq: | |
646 method = 'ONT reads were assembled using Flye.' | |
647 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.Series(method)) | 735 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.Series(method)) |
736 # Pull in the assembly summary and look at the coverage. | |
737 assembly_info = pandas.read_csv(self.flye_assembly_info_file, header=0, index_col=0, sep='\t') | |
738 # Look for non-circular contigs. | |
739 open_contigs = assembly_info.loc[assembly_info['circ.'] == 'N', :] | |
740 if open_contigs.shape[0] > 0: | |
741 open_contig_ids = open_contigs.index.values | |
742 warning = 'Flye reported {:d} open contigs ({:s}); assembly may be incomplete.'.format(open_contigs.shape[0], ', '.join(open_contig_ids)) | |
743 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
648 if self.did_medaka_ont_assembly: | 744 if self.did_medaka_ont_assembly: |
649 method = 'the genome assembly was polished using ont reads and medaka.' | 745 method = 'the genome assembly was polished using ont reads and medaka.' |
650 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.series(method)) | 746 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.series(method)) |
747 self.info_ont_fastq(self.illumina_fastq_file) | |
748 self.add_assembly_notes() | |
651 | 749 |
652 def make_tex(self): | 750 def make_tex(self): |
653 self.doc.new_table_of_contents(table_title='detailed run information', depth=2, marker="tableofcontents") | 751 self.doc.new_table_of_contents(table_title='detailed run information', depth=2, marker="tableofcontents") |
654 text = self.doc.file_data_text | 752 text = self.doc.file_data_text |
655 text = text.replace("##--[", "") | 753 text = text.replace("##--[", "") |
664 self.add_contamination() | 762 self.add_contamination() |
665 self.add_alignment() | 763 self.add_alignment() |
666 self.add_features() | 764 self.add_features() |
667 self.add_feature_plots() | 765 self.add_feature_plots() |
668 self.add_mutations() | 766 self.add_mutations() |
669 # TODO stuff working to here... | |
670 self.add_large_indels() | 767 self.add_large_indels() |
671 self.add_plasmids() | 768 self.add_plasmids() |
769 # TODO stuff working to here... | |
672 self.add_amr_matrix() | 770 self.add_amr_matrix() |
673 # self.add_snps() | 771 # self.add_snps() |
674 self.add_methods() | 772 self.add_methods() |
675 self.make_tex() | 773 self.make_tex() |
676 # It took me quite a long time to find out that the value of the -t | 774 # It took me quite a long time to find out that the value of the -t |
685 self.ofh.close() | 783 self.ofh.close() |
686 | 784 |
687 | 785 |
688 parser = argparse.ArgumentParser() | 786 parser = argparse.ArgumentParser() |
689 | 787 |
788 parser.add_argument('--amr_deletions_file', action='store', dest='amr_deletions_file', help='AMR deletions BED file') | |
789 parser.add_argument('--amr_matrix_png_dir', action='store', dest='amr_matrix_png_dir', help='Directory of AMR matrix PNG files') | |
690 parser.add_argument('--analysis_name', action='store', dest='analysis_name', help='Sample identifier') | 790 parser.add_argument('--analysis_name', action='store', dest='analysis_name', help='Sample identifier') |
691 parser.add_argument('--assembly_fasta_file', action='store', dest='assembly_fasta_file', help='Assembly fasta file') | 791 parser.add_argument('--assembly_fasta_file', action='store', dest='assembly_fasta_file', help='Assembly fasta file') |
692 parser.add_argument('--assembly_name', action='store', dest='assembly_name', help='Assembly identifier') | 792 parser.add_argument('--assembly_name', action='store', dest='assembly_name', help='Assembly identifier') |
793 parser.add_argument('--compute_sequence_length_file', action='store', dest='compute_sequence_length_file', help='Comnpute sequence length tabular file') | |
794 parser.add_argument('--contig_coverage_file', action='store', dest='contig_coverage_file', help='Contig coverage TSV file') | |
795 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome identifier') | |
796 parser.add_argument('--dnadiff_snps_file', action='store', dest='dnadiff_snps_file', help='DNAdiff snps tabular file') | |
693 parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') | 797 parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') |
694 parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') | 798 parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') |
695 parser.add_argument('--contig_coverage_file', action='store', dest='contig_coverage_file', help='Contig coverage TSV file') | 799 parser.add_argument('--flye_assembly_info_file', action='store', dest='flye_assembly_info_file', default=None, help='Flye assembly info tabular file') |
696 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome') | 800 parser.add_argument('--flye_version', action='store', dest='flye_version', default=None, help='Flye version string') |
801 parser.add_argument('--genome_insertions_file', action='store', dest='genome_insertions_file', help='Genome insertions BED file') | |
697 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Input sample is gzipped') | 802 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Input sample is gzipped') |
698 parser.add_argument('--illumina_fastq_file', action='store', dest='illumina_fastq_file', help='Input sample') | 803 parser.add_argument('--illumina_fastq_file', action='store', dest='illumina_fastq_file', help='Input sample') |
699 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') | 804 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') |
700 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') | 805 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') |
701 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') | 806 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') |
807 parser.add_argument('--plasmids_file', action='store', dest='plasmids_file', help='pChunks plasmids TSV file') | |
808 parser.add_argument('--reference_insertions_file', action='store', dest='reference_insertions_file', help='Reference insertions BED file') | |
809 # parser.add_argument('--reference_genome', action='store', dest='reference_genome', help='Reference genome fasta file') | |
702 | 810 |
703 args = parser.parse_args() | 811 args = parser.parse_args() |
704 | 812 |
813 # Prepare the AMR matrix PNG files. | |
814 amr_matrix_files = [] | |
815 for file_name in sorted(os.listdir(args.amr_matrix_png_dir)): | |
816 file_path = os.path.abspath(os.path.join(args.amr_matrix_png_dir, file_name)) | |
817 amr_matrix_files.append(file_path) | |
705 # Prepare the features BED files. | 818 # Prepare the features BED files. |
706 feature_bed_files = [] | 819 feature_bed_files = [] |
707 for file_name in sorted(os.listdir(args.feature_bed_dir)): | 820 for file_name in sorted(os.listdir(args.feature_bed_dir)): |
708 file_path = os.path.abspath(os.path.join(args.feature_bed_dir, file_name)) | 821 file_path = os.path.abspath(os.path.join(args.feature_bed_dir, file_name)) |
709 feature_bed_files.append(file_path) | 822 feature_bed_files.append(file_path) |
717 for file_name in sorted(os.listdir(args.mutation_regions_dir)): | 830 for file_name in sorted(os.listdir(args.mutation_regions_dir)): |
718 file_path = os.path.abspath(os.path.join(args.feature_png_dir, file_name)) | 831 file_path = os.path.abspath(os.path.join(args.feature_png_dir, file_name)) |
719 mutation_regions_files.append(file_path) | 832 mutation_regions_files.append(file_path) |
720 | 833 |
721 markdown_report = PimaReport(args.analysis_name, | 834 markdown_report = PimaReport(args.analysis_name, |
835 args.amr_deletions_file, | |
836 amr_matrix_files, | |
722 args.assembly_fasta_file, | 837 args.assembly_fasta_file, |
723 args.assembly_name, | 838 args.assembly_name, |
839 args.compute_sequence_length_file, | |
840 args.contig_coverage_file, | |
841 args.dbkey, | |
842 args.dnadiff_snps_file, | |
724 feature_bed_files, | 843 feature_bed_files, |
725 feature_png_files, | 844 feature_png_files, |
726 args.contig_coverage_file, | 845 args.flye_assembly_info_file, |
727 args.dbkey, | 846 args.flye_version, |
847 args.genome_insertions_file, | |
728 args.gzipped, | 848 args.gzipped, |
729 args.illumina_fastq_file, | 849 args.illumina_fastq_file, |
730 args.mutation_regions_bed_file, | 850 args.mutation_regions_bed_file, |
731 mutation_regions_files, | 851 mutation_regions_files, |
732 args.pima_css) | 852 args.pima_css, |
853 args.plasmids_file, | |
854 args.reference_insertions_file) | |
733 markdown_report.make_report() | 855 markdown_report.make_report() |