Mercurial > repos > greg > pima_report
comparison pima_report.py @ 21:667b253329c6 draft
Uploaded
author | greg |
---|---|
date | Thu, 13 Apr 2023 17:13:40 +0000 |
parents | 4fe8c35cd176 |
children | 13a9c8ecd30e |
comparison
equal
deleted
inserted
replaced
20:4fe8c35cd176 | 21:667b253329c6 |
---|---|
15 CDC_ADVISORY = 'The analysis and report presented here should be treated as preliminary. Please contact the CDC/BDRD with any results regarding _Bacillus anthracis_.' | 15 CDC_ADVISORY = 'The analysis and report presented here should be treated as preliminary. Please contact the CDC/BDRD with any results regarding _Bacillus anthracis_.' |
16 | 16 |
17 | 17 |
18 class PimaReport: | 18 class PimaReport: |
19 | 19 |
20 def __init__(self, analysis_name=None, amr_deletions_file=None, amr_matrix_files=None, assembly_fasta_file=None, | 20 def __init__(self, analysis_name=None, amr_deletions_file=None, amr_matrix_files=None, assembler_version=None, |
21 assembly_name=None, bedtools_version=None, blastn_version=None, circos_files=None, | 21 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, |
22 compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, dnadiff_snps_file=None, | 22 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, |
23 dnadiff_version=None, errors_file=None, feature_bed_files=None, feature_png_files=None, | 23 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, fastq_file=None, feature_bed_files=None, |
24 flye_assembly_info_file=None, flye_version=None, genome_insertions_file=None, gzipped=None, | 24 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None, |
25 kraken2_report_file=None, kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, | 25 kraken2_report_file=None, kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, |
26 mutation_regions_tsv_files=None, ont_fastq_file=None, pima_css=None, plasmids_file=None, | 26 mutation_regions_tsv_files=None, pima_css=None, plasmids_file=None, quast_report_file=None, |
27 quast_report_file=None, read_type=None, reference_insertions_file=None, samtools_version=None, | 27 read_type=None, reference_insertions_file=None, samtools_version=None, varscan_version=None): |
28 varscan_version=None): | |
29 self.ofh = open("process_log.txt", "w") | 28 self.ofh = open("process_log.txt", "w") |
30 | 29 |
31 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) | 30 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) |
32 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) | 31 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) |
33 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) | 32 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) |
33 self.ofh.write("assembler_version: %s\n" % str(assembler_version)) | |
34 self.ofh.write("assembly_fasta_file: %s\n" % str(assembly_fasta_file)) | 34 self.ofh.write("assembly_fasta_file: %s\n" % str(assembly_fasta_file)) |
35 self.ofh.write("assembly_name: %s\n" % str(assembly_name)) | 35 self.ofh.write("assembly_name: %s\n" % str(assembly_name)) |
36 self.ofh.write("bedtools_version: %s\n" % str(bedtools_version)) | 36 self.ofh.write("bedtools_version: %s\n" % str(bedtools_version)) |
37 self.ofh.write("blastn_version: %s\n" % str(blastn_version)) | 37 self.ofh.write("blastn_version: %s\n" % str(blastn_version)) |
38 self.ofh.write("circos_files: %s\n" % str(circos_files)) | 38 self.ofh.write("circos_files: %s\n" % str(circos_files)) |
40 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file)) | 40 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file)) |
41 self.ofh.write("dbkey: %s\n" % str(dbkey)) | 41 self.ofh.write("dbkey: %s\n" % str(dbkey)) |
42 self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file)) | 42 self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file)) |
43 self.ofh.write("dnadiff_version: %s\n" % str(dnadiff_version)) | 43 self.ofh.write("dnadiff_version: %s\n" % str(dnadiff_version)) |
44 self.ofh.write("errors_file: %s\n" % str(errors_file)) | 44 self.ofh.write("errors_file: %s\n" % str(errors_file)) |
45 self.ofh.write("fastq_file: %s\n" % str(fastq_file)) | |
45 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) | 46 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) |
46 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) | 47 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) |
47 self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file)) | 48 self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file)) |
48 self.ofh.write("flye_version: %s\n" % str(flye_version)) | |
49 self.ofh.write("gzipped: %s\n" % str(gzipped)) | 49 self.ofh.write("gzipped: %s\n" % str(gzipped)) |
50 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) | 50 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) |
51 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) | 51 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) |
52 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) | 52 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) |
53 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) | 53 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) |
54 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) | 54 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) |
55 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) | 55 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) |
56 self.ofh.write("ont_fastq_file: %s\n" % str(ont_fastq_file)) | |
57 self.ofh.write("pima_css: %s\n" % str(pima_css)) | 56 self.ofh.write("pima_css: %s\n" % str(pima_css)) |
58 self.ofh.write("plasmids_file: %s\n" % str(plasmids_file)) | 57 self.ofh.write("plasmids_file: %s\n" % str(plasmids_file)) |
59 self.ofh.write("quast_report_file: %s\n" % str(quast_report_file)) | 58 self.ofh.write("quast_report_file: %s\n" % str(quast_report_file)) |
60 self.ofh.write("read_type: %s\n" % str(read_type)) | 59 self.ofh.write("read_type: %s\n" % str(read_type)) |
61 self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file)) | 60 self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file)) |
69 # Inputs | 68 # Inputs |
70 self.amr_deletions_file = amr_deletions_file | 69 self.amr_deletions_file = amr_deletions_file |
71 self.amr_matrix_files = amr_matrix_files | 70 self.amr_matrix_files = amr_matrix_files |
72 self.analysis_name = analysis_name.split('_')[0] | 71 self.analysis_name = analysis_name.split('_')[0] |
73 self.ofh.write("self.analysis_name: %s\n" % str(self.analysis_name)) | 72 self.ofh.write("self.analysis_name: %s\n" % str(self.analysis_name)) |
73 if assembler_version is None: | |
74 self.assembler_version = 'assembler (version unknown)' | |
75 else: | |
76 if read_type == 'ont': | |
77 # Assembler is flye. | |
78 assembler_version = assembler_version.rstrip(' _assembly info_') | |
79 else: | |
80 # Assembler is spades. | |
81 assembler_version = assembler_version.rstrip(' _contigs') | |
82 self.assembler_version = re.sub('_', '.', assembler_version) | |
74 self.assembly_fasta_file = assembly_fasta_file | 83 self.assembly_fasta_file = assembly_fasta_file |
75 self.assembly_name = re.sub('_', '.', assembly_name.rstrip(' _consensus_')) | 84 self.assembly_name = re.sub('_', '.', assembly_name.rstrip(' _consensus_')) |
76 if bedtools_version is None: | 85 if bedtools_version is None: |
77 self.bedtools_version = 'bedtools (version unknown)' | 86 self.bedtools_version = 'bedtools (version unknown)' |
78 else: | 87 else: |
92 self.dnadiff_version = re.sub('_', '.', dnadiff_version.rstrip(' _snps_')) | 101 self.dnadiff_version = re.sub('_', '.', dnadiff_version.rstrip(' _snps_')) |
93 self.errors_file = errors_file | 102 self.errors_file = errors_file |
94 self.feature_bed_files = feature_bed_files | 103 self.feature_bed_files = feature_bed_files |
95 self.feature_png_files = feature_png_files | 104 self.feature_png_files = feature_png_files |
96 self.flye_assembly_info_file = flye_assembly_info_file | 105 self.flye_assembly_info_file = flye_assembly_info_file |
97 if flye_version is None: | |
98 self.flye_version = 'flye (version unknown)' | |
99 else: | |
100 self.flye_version = re.sub('_', '.', flye_version.rstrip(' _assembly info_')) | |
101 self.gzipped = gzipped | 106 self.gzipped = gzipped |
102 self.genome_insertions_file = genome_insertions_file | 107 self.genome_insertions_file = genome_insertions_file |
103 self.kraken2_report_file = kraken2_report_file | 108 self.kraken2_report_file = kraken2_report_file |
104 if kraken2_version is None: | 109 if kraken2_version is None: |
105 self.kraken2_version = 'kraken2 (version unknown)' | 110 self.kraken2_version = 'kraken2 (version unknown)' |
144 self.mutation_errors_title = 'Errors finding mutations in the sample' | 149 self.mutation_errors_title = 'Errors finding mutations in the sample' |
145 self.mutation_title = 'Mutations found in the sample' | 150 self.mutation_title = 'Mutations found in the sample' |
146 self.mutation_methods_title = 'Mutation screening' | 151 self.mutation_methods_title = 'Mutation screening' |
147 self.plasmid_methods_title = 'Plasmid annotation' | 152 self.plasmid_methods_title = 'Plasmid annotation' |
148 self.plasmid_title = 'Plasmid annotation' | 153 self.plasmid_title = 'Plasmid annotation' |
154 self.reference_genome_title = 'Reference genome' | |
149 self.reference_methods_title = 'Reference comparison' | 155 self.reference_methods_title = 'Reference comparison' |
150 self.snp_indel_title = 'SNPs and small indels' | 156 self.snp_indel_title = 'SNPs and small indels' |
151 self.summary_title = 'Summary' | 157 self.summary_title = 'Summary' |
152 | 158 |
153 # Methods | 159 # Methods |
154 self.methods = pandas.Series(dtype='float64') | 160 self.methods = pandas.Series(dtype='float64') |
155 self.methods[self.contamination_methods_title] = pandas.Series(dtype='float64') | 161 self.methods[self.contamination_methods_title] = pandas.Series(dtype='float64') |
156 self.methods[self.assembly_methods_title] = pandas.Series(dtype='float64') | 162 self.methods[self.assembly_methods_title] = pandas.Series(dtype='float64') |
163 self.methods[self.reference_genome_title] = pandas.Series(dtype='float64') | |
157 self.methods[self.reference_methods_title] = pandas.Series(dtype='float64') | 164 self.methods[self.reference_methods_title] = pandas.Series(dtype='float64') |
158 self.methods[self.mutation_methods_title] = pandas.Series(dtype='float64') | 165 self.methods[self.mutation_methods_title] = pandas.Series(dtype='float64') |
159 self.methods[self.feature_methods_title] = pandas.Series(dtype='float64') | 166 self.methods[self.feature_methods_title] = pandas.Series(dtype='float64') |
160 self.methods[self.plasmid_methods_title] = pandas.Series(dtype='float64') | 167 self.methods[self.plasmid_methods_title] = pandas.Series(dtype='float64') |
161 | 168 |
167 # Values | 174 # Values |
168 self.assembly_size = 0 | 175 self.assembly_size = 0 |
169 self.contig_info = None | 176 self.contig_info = None |
170 self.did_medaka_ont_assembly = False | 177 self.did_medaka_ont_assembly = False |
171 self.feature_hits = pandas.Series(dtype='float64') | 178 self.feature_hits = pandas.Series(dtype='float64') |
172 self.illumina_fastq_file = None | |
173 self.illumina_length_mean = None | 179 self.illumina_length_mean = None |
174 self.illumina_read_count = None | 180 self.illumina_read_count = None |
175 self.illumina_bases = None | 181 self.illumina_bases = None |
176 self.ont_bases = None | 182 self.ont_bases = None |
177 # TODO: should the following be passed as a parameter? | 183 # TODO: should the following be passed as a parameter? |
178 self.ont_coverage_min = 30 | 184 self.ont_coverage_min = 30 |
179 self.ont_fast5 = None | 185 self.ont_fast5 = None |
180 self.ont_fastq_file = ont_fastq_file | 186 self.fastq_file = fastq_file |
181 self.ont_n50 = None | 187 self.ont_n50 = None |
182 # TODO: should the following be passed as a parameter? | 188 # TODO: should the following be passed as a parameter? |
183 self.ont_n50_min = 2500 | 189 self.ont_n50_min = 2500 |
184 self.ont_raw_fastq = self.analysis_name | 190 if self.read_type == 'ONT': |
191 self.ont_raw_fastq = self.analysis_name | |
192 self.illumina_fastq = None | |
193 else: | |
194 self.ont_raw_fastq = None | |
195 self.illumina_fastq = self.analysis_name | |
185 self.ont_read_count = None | 196 self.ont_read_count = None |
186 | 197 |
187 # Actions | 198 # Actions |
188 self.did_guppy_ont_fast5 = False | 199 self.did_guppy_ont_fast5 = False |
189 self.did_qcat_ont_fastq = False | 200 self.did_qcat_ont_fastq = False |
190 self.info_ont_fastq(self.ont_fastq_file) | 201 self.ofh.write("self.read_type: %s\n" % str(self.read_type)) |
191 self.info_illumina_fastq() | 202 if self.read_type == 'ONT': |
203 self.info_ont_fastq(self.fastq_file) | |
204 else: | |
205 self.info_illumina_fastq(self.fastq_file) | |
192 self.load_contig_info() | 206 self.load_contig_info() |
193 | 207 |
194 def run_command(self, command): | 208 def run_command(self, command): |
195 self.ofh.write("\nXXXXXX In run_command, command:\n%s\n\n" % str(command)) | 209 self.ofh.write("\nXXXXXX In run_command, command:\n%s\n\n" % str(command)) |
196 try: | 210 try: |
245 self.num_assembly_contigs = len(self.assembly) | 259 self.num_assembly_contigs = len(self.assembly) |
246 for i in self.assembly: | 260 for i in self.assembly: |
247 self.assembly_size += len(i.seq) | 261 self.assembly_size += len(i.seq) |
248 self.assembly_size = self.format_kmg(self.assembly_size, decimals=1) | 262 self.assembly_size = self.format_kmg(self.assembly_size, decimals=1) |
249 | 263 |
250 def info_illumina_fastq(self): | 264 def info_illumina_fastq(self, fastq_file): |
251 self.ofh.write("\nXXXXXX In info_illumina_fastq\n\n") | 265 self.ofh.write("\nXXXXXX In info_illumina_fastq\n\n") |
252 if self.illumina_length_mean is None: | 266 if self.illumina_length_mean is None: |
253 return | 267 return |
254 if self.gzipped: | 268 if self.gzipped: |
255 opener = 'gunzip -c' | 269 opener = 'gunzip -c' |
256 else: | 270 else: |
257 opener = 'cat' | 271 opener = 'cat' |
258 command = ' '.join([opener, | 272 command = ' '.join([opener, |
259 self.ont_fastq_file, | 273 fastq_file, |
260 '| awk \'{getline;s += length($1);getline;getline;}END{print s/(NR/4)"\t"(NR/4)"\t"s}\'']) | 274 '| awk \'{getline;s += length($1);getline;getline;}END{print s/(NR/4)"\t"(NR/4)"\t"s}\'']) |
261 output = self.run_command(command) | 275 output = self.run_command(command) |
262 self.ofh.write("output:\n%s\n" % str(output)) | 276 self.ofh.write("output:\n%s\n" % str(output)) |
263 self.ofh.write("re.split('\\t', self.run_command(command)[0]:\n%s\n" % str(re.split('\\t', self.run_command(command)[0]))) | 277 self.ofh.write("re.split('\\t', self.run_command(command)[0]:\n%s\n" % str(re.split('\\t', self.run_command(command)[0]))) |
264 values = [] | 278 values = [] |
272 self.illumina_length_mean += values[0] | 286 self.illumina_length_mean += values[0] |
273 self.ofh.write("values[1]:\n%s\n" % str(values[1])) | 287 self.ofh.write("values[1]:\n%s\n" % str(values[1])) |
274 self.illumina_read_count += int(values[1]) | 288 self.illumina_read_count += int(values[1]) |
275 self.ofh.write("values[2]:\n%s\n" % str(values[2])) | 289 self.ofh.write("values[2]:\n%s\n" % str(values[2])) |
276 self.illumina_bases += int(values[2]) | 290 self.illumina_bases += int(values[2]) |
277 # The original PIMA code inserts self.illumina_fastq_file into | |
278 # a list for no apparent reason. We don't do that here. | |
279 # self.illumina_length_mean /= len(self.illumina_fastq_file) | |
280 self.illumina_length_mean /= 1 | 291 self.illumina_length_mean /= 1 |
281 self.illumina_bases = self.format_kmg(self.illumina_bases, decimals=1) | 292 self.illumina_bases = self.format_kmg(self.illumina_bases, decimals=1) |
282 | 293 |
283 def start_doc(self): | 294 def start_doc(self): |
284 header_text = 'Analysis of ' + self.analysis_name | 295 header_text = 'Analysis of ' + self.analysis_name |
303 "ONT FAST5", | 314 "ONT FAST5", |
304 self.wordwrap_markdown(self.ont_fast5), | 315 self.wordwrap_markdown(self.ont_fast5), |
305 "ONT FASTQ", | 316 "ONT FASTQ", |
306 self.wordwrap_markdown(self.ont_raw_fastq), | 317 self.wordwrap_markdown(self.ont_raw_fastq), |
307 "Illumina FASTQ", | 318 "Illumina FASTQ", |
308 "N/A", | 319 self.wordwrap_markdown(self.illumina_fastq), |
309 "Assembly", | 320 "Assembly", |
310 self.wordwrap_markdown(self.assembly_name), | 321 self.wordwrap_markdown(self.assembly_name), |
311 "Reference", | 322 "Reference", |
312 self.wordwrap_markdown(self.dbkey), | 323 self.wordwrap_markdown(self.dbkey), |
313 ] | 324 ] |
405 '| awk \'BEGIN{bp = 0;f = 0}', | 416 '| awk \'BEGIN{bp = 0;f = 0}', |
406 '{if(NR == 1){sub(/+/, "", $1);s=$1}else{bp += $1;if(bp > s / 2 && f == 0){n50 = $1;f = 1}}}', | 417 '{if(NR == 1){sub(/+/, "", $1);s=$1}else{bp += $1;if(bp > s / 2 && f == 0){n50 = $1;f = 1}}}', |
407 'END{printf "%d\\t%d\\t%d\\n", n50, (NR - 1), s;exit}\'']) | 418 'END{printf "%d\\t%d\\t%d\\n", n50, (NR - 1), s;exit}\'']) |
408 result = list(re.split('\\t', self.run_command(command)[0])) | 419 result = list(re.split('\\t', self.run_command(command)[0])) |
409 if result[1] == '0': | 420 if result[1] == '0': |
410 self.error_out('No ONT reads found') | 421 warning = 'No ONT reads found' |
422 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
411 self.ont_n50, self.ont_read_count, ont_raw_bases = [int(i) for i in result] | 423 self.ont_n50, self.ont_read_count, ont_raw_bases = [int(i) for i in result] |
412 command = ' '.join([opener, | 424 command = ' '.join([opener, |
413 fastq_file, | 425 fastq_file, |
414 '| awk \'{getline;print length($0);getline;getline;}\'']) | 426 '| awk \'{getline;print length($0);getline;getline;}\'']) |
415 result = self.run_command(command) | 427 result = self.run_command(command) |
528 for circos_file in self.circos_files: | 540 for circos_file in self.circos_files: |
529 contig = os.path.basename(circos_file) | 541 contig = os.path.basename(circos_file) |
530 contig_title = 'Alignment to %s' % contig | 542 contig_title = 'Alignment to %s' % contig |
531 self.doc.new_line() | 543 self.doc.new_line() |
532 self.doc.new_header(level=3, title=contig_title) | 544 self.doc.new_header(level=3, title=contig_title) |
533 self.doc.new_line('Blue indicates aligned sequences (to the reference) and yellow indicates missing sequences') | |
534 self.doc.new_line(self.doc.new_inline_image(text='contig_title', path=os.path.abspath(circos_file))) | 545 self.doc.new_line(self.doc.new_inline_image(text='contig_title', path=os.path.abspath(circos_file))) |
535 self.doc.new_line('<div style="page-break-after: always;"></div>') | 546 self.doc.new_line('<div style="page-break-after: always;"></div>') |
536 self.doc.new_line() | 547 self.doc.new_line() |
537 method = 'The genome assembly was aligned against the reference sequencing using dnadiff version %s.' % self.dnadiff_version | 548 if self.dbkey == 'ref_genome': |
549 headers = ["* Chromosome - NC_007530.2 Bacillus anthracis str. 'Ames Ancestor', complete sequence", | |
550 "* pXO1 - NC_007322.2 Bacillus anthracis str. 'Ames Ancestor' plasmid pXO1, complete sequence", | |
551 "* pXO2 - NC_007323.3 Bacillus anthracis str. 'Ames Ancestor' plasmid pXO2, complete sequence"] | |
552 method = '\n'.join(headers) | |
553 self.methods[self.reference_genome_title] = self.methods[self.reference_genome_title].append(pandas.Series(method)) | |
554 method = 'The genome assembly was aligned against the reference sequence using %s.' % self.dnadiff_version | |
538 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) | 555 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) |
539 | 556 |
540 def add_features(self): | 557 def add_features(self): |
541 self.ofh.write("\nXXXXXX In add_features\n\n") | 558 self.ofh.write("\nXXXXXX In add_features\n\n") |
542 if len(self.feature_bed_files) == 0: | 559 if len(self.feature_bed_files) == 0: |
778 self.methods[self.basecalling_methods_title] = pandas.Series(methods) | 795 self.methods[self.basecalling_methods_title] = pandas.Series(methods) |
779 # self.add_illumina_library_information() | 796 # self.add_illumina_library_information() |
780 self.add_contig_info() | 797 self.add_contig_info() |
781 self.evaluate_assembly() | 798 self.evaluate_assembly() |
782 self.add_assembly_information() | 799 self.add_assembly_information() |
783 if self.flye_assembly_info_file is not None: | 800 if self.assembler_version is not None: |
784 method = 'ONT reads were assembled using %s' % self.flye_version.rstrip('assembly info') | 801 if self.read_type == 'ONT': |
785 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.Series(method)) | 802 method = 'ONT reads were assembled using %s' % self.assembler_version |
786 # Pull in the assembly summary and look at the coverage. | 803 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.Series(method)) |
787 assembly_info = pandas.read_csv(self.flye_assembly_info_file, header=0, index_col=0, sep='\t') | 804 # Pull in the assembly summary and look at the coverage. |
788 # Look for non-circular contigs. | 805 assembly_info = pandas.read_csv(self.flye_assembly_info_file, header=0, index_col=0, sep='\t') |
789 open_contigs = assembly_info.loc[assembly_info['circ.'] == 'N', :] | 806 # Look for non-circular contigs. |
790 if open_contigs.shape[0] > 0: | 807 open_contigs = assembly_info.loc[assembly_info['circ.'] == 'N', :] |
791 open_contig_ids = open_contigs.index.values | 808 if open_contigs.shape[0] > 0: |
792 warning = 'Flye reported {:d} open contigs ({:s}); assembly may be incomplete.'.format(open_contigs.shape[0], ', '.join(open_contig_ids)) | 809 open_contig_ids = open_contigs.index.values |
793 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | 810 warning = 'Flye reported {:d} open contigs ({:s}); assembly may be incomplete.'.format(open_contigs.shape[0], ', '.join(open_contig_ids)) |
811 self.assembly_notes = self.assembly_notes.append(pandas.Series(warning)) | |
812 else: | |
813 method = 'Illumina reads were assembled using %s' % self.assembler_version | |
794 if self.did_medaka_ont_assembly: | 814 if self.did_medaka_ont_assembly: |
795 method = 'the genome assembly was polished using ont reads and medaka.' | 815 method = 'the genome assembly was polished using ont reads and medaka.' |
796 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.series(method)) | 816 self.methods[self.assembly_methods_title] = self.methods[self.assembly_methods_title].append(pandas.series(method)) |
797 self.add_assembly_notes() | 817 self.add_assembly_notes() |
798 | 818 |
834 parser = argparse.ArgumentParser() | 854 parser = argparse.ArgumentParser() |
835 | 855 |
836 parser.add_argument('--amr_deletions_file', action='store', dest='amr_deletions_file', help='AMR deletions BED file') | 856 parser.add_argument('--amr_deletions_file', action='store', dest='amr_deletions_file', help='AMR deletions BED file') |
837 parser.add_argument('--amr_matrix_png_dir', action='store', dest='amr_matrix_png_dir', help='Directory of AMR matrix PNG files') | 857 parser.add_argument('--amr_matrix_png_dir', action='store', dest='amr_matrix_png_dir', help='Directory of AMR matrix PNG files') |
838 parser.add_argument('--analysis_name', action='store', dest='analysis_name', help='Sample identifier') | 858 parser.add_argument('--analysis_name', action='store', dest='analysis_name', help='Sample identifier') |
859 parser.add_argument('--assembler_version', action='store', dest='assembler_version', default=None, help='Assembler version string') | |
839 parser.add_argument('--assembly_fasta_file', action='store', dest='assembly_fasta_file', help='Assembly fasta file') | 860 parser.add_argument('--assembly_fasta_file', action='store', dest='assembly_fasta_file', help='Assembly fasta file') |
840 parser.add_argument('--assembly_name', action='store', dest='assembly_name', help='Assembly identifier') | 861 parser.add_argument('--assembly_name', action='store', dest='assembly_name', help='Assembly identifier') |
841 parser.add_argument('--bedtools_version', action='store', dest='bedtools_version', default=None, help='Bedtools version string') | 862 parser.add_argument('--bedtools_version', action='store', dest='bedtools_version', default=None, help='Bedtools version string') |
842 parser.add_argument('--blastn_version', action='store', dest='blastn_version', default=None, help='Blastn version string') | 863 parser.add_argument('--blastn_version', action='store', dest='blastn_version', default=None, help='Blastn version string') |
843 parser.add_argument('--circos_png_dir', action='store', dest='circos_png_dir', help='Directory of circos PNG files') | 864 parser.add_argument('--circos_png_dir', action='store', dest='circos_png_dir', help='Directory of circos PNG files') |
845 parser.add_argument('--contig_coverage_file', action='store', dest='contig_coverage_file', help='Contig coverage TSV file') | 866 parser.add_argument('--contig_coverage_file', action='store', dest='contig_coverage_file', help='Contig coverage TSV file') |
846 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome identifier') | 867 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome identifier') |
847 parser.add_argument('--dnadiff_snps_file', action='store', dest='dnadiff_snps_file', help='DNAdiff snps tabular file') | 868 parser.add_argument('--dnadiff_snps_file', action='store', dest='dnadiff_snps_file', help='DNAdiff snps tabular file') |
848 parser.add_argument('--dnadiff_version', action='store', dest='dnadiff_version', default=None, help='DNAdiff version string') | 869 parser.add_argument('--dnadiff_version', action='store', dest='dnadiff_version', default=None, help='DNAdiff version string') |
849 parser.add_argument('--errors_file', action='store', dest='errors_file', default=None, help='AMR mutations errors encountered txt file') | 870 parser.add_argument('--errors_file', action='store', dest='errors_file', default=None, help='AMR mutations errors encountered txt file') |
871 parser.add_argument('--fastq_file', action='store', dest='fastq_file', help='Input sample') | |
850 parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') | 872 parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') |
851 parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') | 873 parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') |
852 parser.add_argument('--flye_assembly_info_file', action='store', dest='flye_assembly_info_file', default=None, help='Flye assembly info tabular file') | 874 parser.add_argument('--flye_assembly_info_file', action='store', dest='flye_assembly_info_file', default=None, help='Flye assembly info tabular file') |
853 parser.add_argument('--flye_version', action='store', dest='flye_version', default=None, help='Flye version string') | |
854 parser.add_argument('--genome_insertions_file', action='store', dest='genome_insertions_file', help='Genome insertions BED file') | 875 parser.add_argument('--genome_insertions_file', action='store', dest='genome_insertions_file', help='Genome insertions BED file') |
855 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Input sample is gzipped') | 876 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Input sample is gzipped') |
856 parser.add_argument('--ont_fastq_file', action='store', dest='ont_fastq_file', help='Input sample') | |
857 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') | 877 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') |
858 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') | 878 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') |
859 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') | 879 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') |
860 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') | 880 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') |
861 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') | 881 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') |
896 mutation_regions_files.append(file_path) | 916 mutation_regions_files.append(file_path) |
897 | 917 |
898 markdown_report = PimaReport(args.analysis_name, | 918 markdown_report = PimaReport(args.analysis_name, |
899 args.amr_deletions_file, | 919 args.amr_deletions_file, |
900 amr_matrix_files, | 920 amr_matrix_files, |
921 args.assembler_version, | |
901 args.assembly_fasta_file, | 922 args.assembly_fasta_file, |
902 args.assembly_name, | 923 args.assembly_name, |
903 args.bedtools_version, | 924 args.bedtools_version, |
904 args.blastn_version, | 925 args.blastn_version, |
905 circos_files, | 926 circos_files, |
907 args.contig_coverage_file, | 928 args.contig_coverage_file, |
908 args.dbkey, | 929 args.dbkey, |
909 args.dnadiff_snps_file, | 930 args.dnadiff_snps_file, |
910 args.dnadiff_version, | 931 args.dnadiff_version, |
911 args.errors_file, | 932 args.errors_file, |
933 args.fastq_file, | |
912 feature_bed_files, | 934 feature_bed_files, |
913 feature_png_files, | 935 feature_png_files, |
914 args.flye_assembly_info_file, | 936 args.flye_assembly_info_file, |
915 args.flye_version, | |
916 args.genome_insertions_file, | 937 args.genome_insertions_file, |
917 args.gzipped, | 938 args.gzipped, |
918 args.kraken2_report_file, | 939 args.kraken2_report_file, |
919 args.kraken2_version, | 940 args.kraken2_version, |
920 args.minimap2_version, | 941 args.minimap2_version, |
921 args.mutation_regions_bed_file, | 942 args.mutation_regions_bed_file, |
922 mutation_regions_files, | 943 mutation_regions_files, |
923 args.ont_fastq_file, | |
924 args.pima_css, | 944 args.pima_css, |
925 args.plasmids_file, | 945 args.plasmids_file, |
926 args.quast_report_file, | 946 args.quast_report_file, |
927 args.read_type, | 947 args.read_type, |
928 args.reference_insertions_file, | 948 args.reference_insertions_file, |