Mercurial > repos > greg > pima_report
comparison pima_report.py @ 18:e948214a9e3c draft
Uploaded
| author | greg |
|---|---|
| date | Wed, 22 Mar 2023 13:07:22 +0000 |
| parents | b4ed9f55de13 |
| children | c509e6819795 |
comparison
equal
deleted
inserted
replaced
| 17:b4ed9f55de13 | 18:e948214a9e3c |
|---|---|
| 18 class PimaReport: | 18 class PimaReport: |
| 19 | 19 |
| 20 def __init__(self, analysis_name=None, amr_deletions_file=None, amr_matrix_files=None, assembly_fasta_file=None, | 20 def __init__(self, analysis_name=None, amr_deletions_file=None, amr_matrix_files=None, assembly_fasta_file=None, |
| 21 assembly_name=None, bedtools_version=None, blastn_version=None, circos_files=None, | 21 assembly_name=None, bedtools_version=None, blastn_version=None, circos_files=None, |
| 22 compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, dnadiff_snps_file=None, | 22 compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, dnadiff_snps_file=None, |
| 23 dnadiff_version=None, feature_bed_files=None, feature_png_files=None, flye_assembly_info_file=None, | 23 dnadiff_version=None, errors_file=None, feature_bed_files=None, feature_png_files=None, |
| 24 flye_version=None, genome_insertions_file=None, gzipped=None, kraken2_report_file=None, | 24 flye_assembly_info_file=None, flye_version=None, genome_insertions_file=None, gzipped=None, |
| 25 kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None, | 25 kraken2_report_file=None, kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, |
| 26 ont_fastq_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, reference_insertions_file=None, | 26 mutation_regions_tsv_files=None, ont_fastq_file=None, pima_css=None, plasmids_file=None, |
| 27 samtools_version=None, varscan_version=None): | 27 quast_report_file=None, read_type=None, reference_insertions_file=None, samtools_version=None, |
| 28 varscan_version=None): | |
| 28 self.ofh = open("process_log.txt", "w") | 29 self.ofh = open("process_log.txt", "w") |
| 29 | 30 |
| 30 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) | 31 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) |
| 31 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) | 32 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) |
| 32 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) | 33 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) |
| 38 self.ofh.write("compute_sequence_length_file: %s\n" % str(compute_sequence_length_file)) | 39 self.ofh.write("compute_sequence_length_file: %s\n" % str(compute_sequence_length_file)) |
| 39 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file)) | 40 self.ofh.write("contig_coverage_file: %s\n" % str(contig_coverage_file)) |
| 40 self.ofh.write("dbkey: %s\n" % str(dbkey)) | 41 self.ofh.write("dbkey: %s\n" % str(dbkey)) |
| 41 self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file)) | 42 self.ofh.write("dnadiff_snps_file: %s\n" % str(dnadiff_snps_file)) |
| 42 self.ofh.write("dnadiff_version: %s\n" % str(dnadiff_version)) | 43 self.ofh.write("dnadiff_version: %s\n" % str(dnadiff_version)) |
| 44 self.ofh.write("errors_file: %s\n" % str(errors_file)) | |
| 43 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) | 45 self.ofh.write("feature_bed_files: %s\n" % str(feature_bed_files)) |
| 44 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) | 46 self.ofh.write("feature_png_files: %s\n" % str(feature_png_files)) |
| 45 self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file)) | 47 self.ofh.write("flye_assembly_info_file: %s\n" % str(flye_assembly_info_file)) |
| 46 self.ofh.write("flye_version: %s\n" % str(flye_version)) | 48 self.ofh.write("flye_version: %s\n" % str(flye_version)) |
| 47 self.ofh.write("gzipped: %s\n" % str(gzipped)) | 49 self.ofh.write("gzipped: %s\n" % str(gzipped)) |
| 53 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) | 55 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) |
| 54 self.ofh.write("ont_fastq_file: %s\n" % str(ont_fastq_file)) | 56 self.ofh.write("ont_fastq_file: %s\n" % str(ont_fastq_file)) |
| 55 self.ofh.write("pima_css: %s\n" % str(pima_css)) | 57 self.ofh.write("pima_css: %s\n" % str(pima_css)) |
| 56 self.ofh.write("plasmids_file: %s\n" % str(plasmids_file)) | 58 self.ofh.write("plasmids_file: %s\n" % str(plasmids_file)) |
| 57 self.ofh.write("quast_report_file: %s\n" % str(quast_report_file)) | 59 self.ofh.write("quast_report_file: %s\n" % str(quast_report_file)) |
| 60 self.ofh.write("read_type: %s\n" % str(read_type)) | |
| 58 self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file)) | 61 self.ofh.write("reference_insertions_file: %s\n" % str(reference_insertions_file)) |
| 59 self.ofh.write("samtools_version: %s\n" % str(samtools_version)) | 62 self.ofh.write("samtools_version: %s\n" % str(samtools_version)) |
| 60 self.ofh.write("varscan_version: %s\n" % str(varscan_version)) | 63 self.ofh.write("varscan_version: %s\n" % str(varscan_version)) |
| 61 | 64 |
| 62 # General | 65 # General |
| 85 self.dnadiff_snps_file = dnadiff_snps_file | 88 self.dnadiff_snps_file = dnadiff_snps_file |
| 86 if dnadiff_version is None: | 89 if dnadiff_version is None: |
| 87 self.dnadiff_version = 'dnadiff (version unknown)' | 90 self.dnadiff_version = 'dnadiff (version unknown)' |
| 88 else: | 91 else: |
| 89 self.dnadiff_version = re.sub('_', '.', dnadiff_version.rstrip(' _snps_')) | 92 self.dnadiff_version = re.sub('_', '.', dnadiff_version.rstrip(' _snps_')) |
| 93 self.errors_file = errors_file | |
| 90 self.feature_bed_files = feature_bed_files | 94 self.feature_bed_files = feature_bed_files |
| 91 self.feature_png_files = feature_png_files | 95 self.feature_png_files = feature_png_files |
| 92 self.flye_assembly_info_file = flye_assembly_info_file | 96 self.flye_assembly_info_file = flye_assembly_info_file |
| 93 if flye_version is None: | 97 if flye_version is None: |
| 94 self.flye_version = 'flye (version unknown)' | 98 self.flye_version = 'flye (version unknown)' |
| 108 self.mutation_regions_bed_file = mutation_regions_bed_file | 112 self.mutation_regions_bed_file = mutation_regions_bed_file |
| 109 self.mutation_regions_tsv_files = mutation_regions_tsv_files | 113 self.mutation_regions_tsv_files = mutation_regions_tsv_files |
| 110 self.pima_css = pima_css | 114 self.pima_css = pima_css |
| 111 self.plasmids_file = plasmids_file | 115 self.plasmids_file = plasmids_file |
| 112 self.quast_report_file = quast_report_file | 116 self.quast_report_file = quast_report_file |
| 113 self.read_type = 'ONT' | 117 self.read_type = read_type.upper() |
| 114 self.reference_insertions_file = reference_insertions_file | 118 self.reference_insertions_file = reference_insertions_file |
| 115 self.reference_insertions_file = reference_insertions_file | 119 self.reference_insertions_file = reference_insertions_file |
| 116 if samtools_version is None: | 120 if samtools_version is None: |
| 117 self.samtools_version = 'samtools (version unknown)' | 121 self.samtools_version = 'samtools (version unknown)' |
| 118 else: | 122 else: |
| 135 self.feature_title = 'Features found in the assembly' | 139 self.feature_title = 'Features found in the assembly' |
| 136 self.feature_methods_title = 'Feature annotation' | 140 self.feature_methods_title = 'Feature annotation' |
| 137 self.feature_plot_title = 'Feature annotation plots' | 141 self.feature_plot_title = 'Feature annotation plots' |
| 138 self.large_indel_title = 'Large insertions & deletions' | 142 self.large_indel_title = 'Large insertions & deletions' |
| 139 self.methods_title = 'Methods' | 143 self.methods_title = 'Methods' |
| 144 self.mutation_errors_title = 'Errors finding mutations in the sample' | |
| 140 self.mutation_title = 'Mutations found in the sample' | 145 self.mutation_title = 'Mutations found in the sample' |
| 141 self.mutation_methods_title = 'Mutation screening' | 146 self.mutation_methods_title = 'Mutation screening' |
| 142 self.plasmid_methods_title = 'Plasmid annotation' | 147 self.plasmid_methods_title = 'Plasmid annotation' |
| 143 self.plasmid_title = 'Plasmid annotation' | 148 self.plasmid_title = 'Plasmid annotation' |
| 144 self.reference_methods_title = 'Reference comparison' | 149 self.reference_methods_title = 'Reference comparison' |
| 597 try: | 602 try: |
| 598 mutation_regions = pandas.read_csv(self.mutation_regions_bed_file, sep='\t', header=0, index_col=False) | 603 mutation_regions = pandas.read_csv(self.mutation_regions_bed_file, sep='\t', header=0, index_col=False) |
| 599 except Exception: | 604 except Exception: |
| 600 # Likely an empty file. | 605 # Likely an empty file. |
| 601 return | 606 return |
| 602 # TODO: this is the only place where reference_genome is used, | |
| 603 # so I'm commenting it out for now. We need to confirm if these | |
| 604 # errors that require the reference genmoe being passed are necessary. | |
| 605 # If so, we'll need to implement data tables in this tool. | |
| 606 # Make sure that the positions in the BED file fall within | |
| 607 # the chromosomes provided in the reference sequence. | |
| 608 """ | |
| 609 for mutation_region in range(mutation_regions.shape[0]): | |
| 610 mutation_region = mutation_regions.iloc[mutation_region, :] | |
| 611 if not (mutation_region[0] in self.reference_genome): | |
| 612 self.ofh.write("\nMutation region: %s not found in reference genome.\n" % ' '.join(mutation_region.astype(str))) | |
| 613 continue | |
| 614 if not isinstance(mutation_region[1], int): | |
| 615 self.ofh.write("\nNon-integer found in mutation region start (column 2): %s.\n" % str(mutation_region[1])) | |
| 616 break | |
| 617 elif not isinstance(mutation_region[2], int): | |
| 618 self.ofh.write("\nNon-integer found in mutation region start (column 3): %s.\n" % str(mutation_region[2])) | |
| 619 break | |
| 620 if mutation_region[1] <= 0 or mutation_region[2] <= 0: | |
| 621 self.ofh.write("\nMutation region %s starts before the reference sequence.\n" % ' '.join(mutation_region.astype(str))) | |
| 622 if mutation_region[1] > len(self.reference_genome[mutation_region[0]].seq) or mutation_region[2] > len(self.reference_genome[mutation_region[0]].seq): | |
| 623 self.ofh.write("\nMutation region %s ends after the reference sequence.\n" % ' '.join(mutation_region.astype(str))) | |
| 624 """ | |
| 625 amr_mutations = pandas.Series(dtype=object) | 607 amr_mutations = pandas.Series(dtype=object) |
| 626 for region_i in range(mutation_regions.shape[0]): | 608 for region_i in range(mutation_regions.shape[0]): |
| 627 region = mutation_regions.iloc[region_i, :] | 609 region = mutation_regions.iloc[region_i, :] |
| 628 region_name = str(region['name']) | 610 region_name = str(region['name']) |
| 629 self.ofh.write("Processing mutations for region %s\n" % region_name) | 611 self.ofh.write("Processing mutations for region %s\n" % region_name) |
| 660 Table_List = ['Reference contig', 'Position', 'Reference', 'Alternate', 'Drug', 'Note'] | 642 Table_List = ['Reference contig', 'Position', 'Reference', 'Alternate', 'Drug', 'Note'] |
| 661 for i in range(region_mutations.shape[0]): | 643 for i in range(region_mutations.shape[0]): |
| 662 Table_List = Table_List + region_mutations.iloc[i, [0, 1, 3, 4, 5, 6]].values.tolist() | 644 Table_List = Table_List + region_mutations.iloc[i, [0, 1, 3, 4, 5, 6]].values.tolist() |
| 663 row_count = int(len(Table_List) / 6) | 645 row_count = int(len(Table_List) / 6) |
| 664 self.doc.new_table(columns=6, rows=row_count, text=Table_List, text_align='left') | 646 self.doc.new_table(columns=6, rows=row_count, text=Table_List, text_align='left') |
| 647 if os.path.getsize(self.errors_file) > 0: | |
| 648 # Report the errors encountered when attempting | |
| 649 # to find mutations in the sample. | |
| 650 self.doc.new_line() | |
| 651 self.doc.new_header(level=2, title=self.mutation_errors_title) | |
| 652 with open(self.errors_file, 'r') as efh: | |
| 653 for i, line in enumerate(efh): | |
| 654 line = line.strip() | |
| 655 if line: | |
| 656 self.doc.new_line('* %s' % line) | |
| 665 method = '%s reads were mapped to the reference sequence using %s.' % (self.read_type, self.minimap2_version) | 657 method = '%s reads were mapped to the reference sequence using %s.' % (self.read_type, self.minimap2_version) |
| 666 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) | 658 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) |
| 667 method = 'Mutations were identified using %s and %s.' % (self.samtools_version, self.varscan_version) | 659 method = 'Mutations were identified using %s and %s.' % (self.samtools_version, self.varscan_version) |
| 668 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) | 660 self.methods[self.mutation_methods_title] = self.methods[self.mutation_methods_title].append(pandas.Series(method)) |
| 669 | 661 |
| 701 if amr_deletions.shape[0] > 0: | 693 if amr_deletions.shape[0] > 0: |
| 702 amr_deletions.columns = ['contig', 'start', 'stop', 'name', 'type', 'drug', 'note'] | 694 amr_deletions.columns = ['contig', 'start', 'stop', 'name', 'type', 'drug', 'note'] |
| 703 amr_deletions = amr_deletions.loc[amr_deletions['type'].isin(['large-deletion', 'any']), :] | 695 amr_deletions = amr_deletions.loc[amr_deletions['type'].isin(['large-deletion', 'any']), :] |
| 704 self.doc.new_line() | 696 self.doc.new_line() |
| 705 self.doc.new_header(level=2, title=self.large_indel_title) | 697 self.doc.new_header(level=2, title=self.large_indel_title) |
| 698 self.doc.new_line('This section is informative only when your idolates were identified as *Bacillus anthracis* strains') | |
| 706 for genome in ['Reference insertions', 'Query insertions']: | 699 for genome in ['Reference insertions', 'Query insertions']: |
| 707 genome_indels = large_indels[genome].copy() | 700 genome_indels = large_indels[genome].copy() |
| 708 self.doc.new_line() | 701 self.doc.new_line() |
| 709 self.doc.new_header(level=3, title=genome) | 702 self.doc.new_header(level=3, title=genome) |
| 710 if (genome_indels.shape[0] == 0): | 703 if (genome_indels.shape[0] == 0): |
| 850 parser.add_argument('--compute_sequence_length_file', action='store', dest='compute_sequence_length_file', help='Comnpute sequence length tabular file') | 843 parser.add_argument('--compute_sequence_length_file', action='store', dest='compute_sequence_length_file', help='Comnpute sequence length tabular file') |
| 851 parser.add_argument('--contig_coverage_file', action='store', dest='contig_coverage_file', help='Contig coverage TSV file') | 844 parser.add_argument('--contig_coverage_file', action='store', dest='contig_coverage_file', help='Contig coverage TSV file') |
| 852 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome identifier') | 845 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Reference genome identifier') |
| 853 parser.add_argument('--dnadiff_snps_file', action='store', dest='dnadiff_snps_file', help='DNAdiff snps tabular file') | 846 parser.add_argument('--dnadiff_snps_file', action='store', dest='dnadiff_snps_file', help='DNAdiff snps tabular file') |
| 854 parser.add_argument('--dnadiff_version', action='store', dest='dnadiff_version', default=None, help='DNAdiff version string') | 847 parser.add_argument('--dnadiff_version', action='store', dest='dnadiff_version', default=None, help='DNAdiff version string') |
| 848 parser.add_argument('--errors_file', action='store', dest='errors_file', default=None, help='AMR mutations errors encountered txt file') | |
| 855 parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') | 849 parser.add_argument('--feature_bed_dir', action='store', dest='feature_bed_dir', help='Directory of best feature hits bed files') |
| 856 parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') | 850 parser.add_argument('--feature_png_dir', action='store', dest='feature_png_dir', help='Directory of best feature hits png files') |
| 857 parser.add_argument('--flye_assembly_info_file', action='store', dest='flye_assembly_info_file', default=None, help='Flye assembly info tabular file') | 851 parser.add_argument('--flye_assembly_info_file', action='store', dest='flye_assembly_info_file', default=None, help='Flye assembly info tabular file') |
| 858 parser.add_argument('--flye_version', action='store', dest='flye_version', default=None, help='Flye version string') | 852 parser.add_argument('--flye_version', action='store', dest='flye_version', default=None, help='Flye version string') |
| 859 parser.add_argument('--genome_insertions_file', action='store', dest='genome_insertions_file', help='Genome insertions BED file') | 853 parser.add_argument('--genome_insertions_file', action='store', dest='genome_insertions_file', help='Genome insertions BED file') |
| 865 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') | 859 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') |
| 866 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') | 860 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') |
| 867 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') | 861 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') |
| 868 parser.add_argument('--plasmids_file', action='store', dest='plasmids_file', help='pChunks plasmids TSV file') | 862 parser.add_argument('--plasmids_file', action='store', dest='plasmids_file', help='pChunks plasmids TSV file') |
| 869 parser.add_argument('--quast_report_file', action='store', dest='quast_report_file', help='Quast report tabular file') | 863 parser.add_argument('--quast_report_file', action='store', dest='quast_report_file', help='Quast report tabular file') |
| 864 parser.add_argument('--read_type', action='store', dest='read_type', help='Sample read type (ONT or Illumina)') | |
| 870 parser.add_argument('--reference_insertions_file', action='store', dest='reference_insertions_file', help='Reference insertions BED file') | 865 parser.add_argument('--reference_insertions_file', action='store', dest='reference_insertions_file', help='Reference insertions BED file') |
| 871 parser.add_argument('--samtools_version', action='store', dest='samtools_version', default=None, help='Samtools version string') | 866 parser.add_argument('--samtools_version', action='store', dest='samtools_version', default=None, help='Samtools version string') |
| 872 parser.add_argument('--varscan_version', action='store', dest='varscan_version', default=None, help='Varscan version string') | 867 parser.add_argument('--varscan_version', action='store', dest='varscan_version', default=None, help='Varscan version string') |
| 873 | 868 |
| 874 args = parser.parse_args() | 869 args = parser.parse_args() |
| 910 args.compute_sequence_length_file, | 905 args.compute_sequence_length_file, |
| 911 args.contig_coverage_file, | 906 args.contig_coverage_file, |
| 912 args.dbkey, | 907 args.dbkey, |
| 913 args.dnadiff_snps_file, | 908 args.dnadiff_snps_file, |
| 914 args.dnadiff_version, | 909 args.dnadiff_version, |
| 910 args.errors_file, | |
| 915 feature_bed_files, | 911 feature_bed_files, |
| 916 feature_png_files, | 912 feature_png_files, |
| 917 args.flye_assembly_info_file, | 913 args.flye_assembly_info_file, |
| 918 args.flye_version, | 914 args.flye_version, |
| 919 args.genome_insertions_file, | 915 args.genome_insertions_file, |
| 925 mutation_regions_files, | 921 mutation_regions_files, |
| 926 args.ont_fastq_file, | 922 args.ont_fastq_file, |
| 927 args.pima_css, | 923 args.pima_css, |
| 928 args.plasmids_file, | 924 args.plasmids_file, |
| 929 args.quast_report_file, | 925 args.quast_report_file, |
| 926 args.read_type, | |
| 930 args.reference_insertions_file, | 927 args.reference_insertions_file, |
| 931 args.samtools_version, | 928 args.samtools_version, |
| 932 args.varscan_version) | 929 args.varscan_version) |
| 933 markdown_report.make_report() | 930 markdown_report.make_report() |
