Mercurial > repos > greg > pima_report
comparison pima_report.py @ 28:27485e70ed2b draft
Uploaded
author | greg |
---|---|
date | Fri, 28 Apr 2023 19:40:58 +0000 |
parents | ddc056cf16bf |
children | 134a0879d0b6 |
comparison
equal
deleted
inserted
replaced
27:ddc056cf16bf | 28:27485e70ed2b |
---|---|
1 #!/usr/bin/env python | |
2 | |
1 import argparse | 3 import argparse |
2 import os | 4 import os |
3 import pandas | 5 import pandas |
4 import pypandoc | 6 import pypandoc |
5 import re | 7 import re |
21 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, | 23 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, |
22 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, | 24 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, |
23 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, feature_bed_files=None, | 25 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, feature_bed_files=None, |
24 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None, | 26 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None, |
25 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None, | 27 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None, |
26 kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, | 28 kraken2_version=None, lrn_risk_amr_file=None, lrn_risk_blacklist_file=None, lrn_risk_vf_file=None, |
27 mutation_regions_tsv_files=None, ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, | 29 minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None, |
28 read_type=None, reference_insertions_file=None, samtools_version=None, varscan_version=None): | 30 ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, read_type=None, |
31 reference_insertions_file=None, samtools_version=None, varscan_version=None): | |
29 self.ofh = open("process_log.txt", "w") | 32 self.ofh = open("process_log.txt", "w") |
30 | 33 |
31 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) | 34 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) |
32 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) | 35 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) |
33 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) | 36 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) |
50 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) | 53 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) |
51 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file)) | 54 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file)) |
52 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file)) | 55 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file)) |
53 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) | 56 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) |
54 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) | 57 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) |
58 self.ofh.write("lrn_risk_amr_file: %s\n" % str(lrn_risk_amr_file)) | |
59 self.ofh.write("lrn_risk_blacklist_file: %s\n" % str(lrn_risk_blacklist_file)) | |
60 self.ofh.write("lrn_risk_vf_file: %s\n" % str(lrn_risk_vf_file)) | |
55 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) | 61 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) |
56 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) | 62 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) |
57 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) | 63 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) |
58 self.ofh.write("ont_file: %s\n" % str(ont_file)) | 64 self.ofh.write("ont_file: %s\n" % str(ont_file)) |
59 self.ofh.write("pima_css: %s\n" % str(pima_css)) | 65 self.ofh.write("pima_css: %s\n" % str(pima_css)) |
113 self.kraken2_report_file = kraken2_report_file | 119 self.kraken2_report_file = kraken2_report_file |
114 if kraken2_version is None: | 120 if kraken2_version is None: |
115 self.kraken2_version = 'kraken2 (version unknown)' | 121 self.kraken2_version = 'kraken2 (version unknown)' |
116 else: | 122 else: |
117 self.kraken2_version = re.sub('_', '.', kraken2_version.rstrip(' _report_')) | 123 self.kraken2_version = re.sub('_', '.', kraken2_version.rstrip(' _report_')) |
124 self.lrn_risk_amr_file = lrn_risk_amr_file | |
125 self.lrn_risk_blacklist_file = lrn_risk_blacklist_file | |
126 self.lrn_risk_vf_file = lrn_risk_vf_file | |
118 if minimap2_version is None: | 127 if minimap2_version is None: |
119 self.minimap2_version = 'minimap2 (version unknown)' | 128 self.minimap2_version = 'minimap2 (version unknown)' |
120 else: | 129 else: |
121 self.minimap2_version = re.sub('_', '.', minimap2_version) | 130 self.minimap2_version = re.sub('_', '.', minimap2_version) |
122 self.mutation_regions_bed_file = mutation_regions_bed_file | 131 self.mutation_regions_bed_file = mutation_regions_bed_file |
148 self.contig_alignment_title = 'Alignment vs. reference contigs' | 157 self.contig_alignment_title = 'Alignment vs. reference contigs' |
149 self.feature_title = 'Features found in the assembly' | 158 self.feature_title = 'Features found in the assembly' |
150 self.feature_methods_title = 'Feature annotation' | 159 self.feature_methods_title = 'Feature annotation' |
151 self.feature_plot_title = 'Feature annotation plots' | 160 self.feature_plot_title = 'Feature annotation plots' |
152 self.large_indel_title = 'Large insertions & deletions' | 161 self.large_indel_title = 'Large insertions & deletions' |
162 self.lrn_risk_title = 'LRNRisk isolate classification' | |
153 self.methods_title = 'Methods' | 163 self.methods_title = 'Methods' |
154 self.mutation_errors_title = 'Errors finding mutations in the sample' | 164 self.mutation_errors_title = 'Errors finding mutations in the sample' |
155 self.mutation_title = 'Mutations found in the sample' | 165 self.mutation_title = 'Mutations found in the sample' |
156 self.mutation_methods_title = 'Mutation screening' | 166 self.mutation_methods_title = 'Mutation screening' |
157 self.plasmid_methods_title = 'Plasmid annotation' | 167 self.plasmid_methods_title = 'Plasmid annotation' |
735 row_count = int(len(Table_List) / 4) | 745 row_count = int(len(Table_List) / 4) |
736 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') | 746 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') |
737 method = 'Large insertions or deletions were found as the complement of aligned regions using %s.' % self.bedtools_version | 747 method = 'Large insertions or deletions were found as the complement of aligned regions using %s.' % self.bedtools_version |
738 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) | 748 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) |
739 self.doc.new_line() | 749 self.doc.new_line() |
750 self.doc.new_line('<div style="page-break-after: always;"></div>') | |
751 self.doc.new_line() | |
752 | |
753 def add_lrn_risk_info(self): | |
754 self.ofh.write("\nXXXXXX In add_lrn_risk_info\n\n") | |
755 if self.lrn_risk_amr_file is None and self.lrn_risk_blacklist_file is None and self.lrn_risk_vf_file is None: | |
756 return | |
757 self.doc.new_line() | |
758 self.doc.new_header(level=2, title=self.lrn_risk_title) | |
759 # Process self.lrn_risk_amr_file. | |
760 try: | |
761 lrn_risk_amr = pandas.read_csv(filepath_or_buffer=self.lrn_risk_amr_file, sep='\t', header=0) | |
762 except Exception: | |
763 lrn_risk_amr = pandas.DataFrame() | |
764 if lrn_risk_amr.shape[0] > 0: | |
765 self.doc.new_line() | |
766 self.doc.new_header(level=2, title="AMR Determinant Distribution") | |
767 self.doc.new_line() | |
768 Table_List = ["Gene", "Contig", "% Identity", "% Coverage", "E-Value", "Annotation", "Comparison to Publicly Available Genomes"] | |
769 for index, row in lrn_risk_amr.iterrows(): | |
770 Table_List = Table_List + row.tolist() | |
771 row_count = int(len(Table_List) / 7) | |
772 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left') | |
773 # Process self.lrn_risk_blacklist_file. | |
774 try: | |
775 lrn_risk_blacklist = pandas.read_csv(filepath_or_buffer=self.lrn_risk_blacklist_file, sep='\t', header=0) | |
776 except Exception: | |
777 lrn_risk_blacklist = pandas.DataFrame() | |
778 if lrn_risk_blacklist.shape[0] > 0: | |
779 self.doc.new_line() | |
780 self.doc.new_header(level=2, title="Blacklisted High-risk Virulence Factors") | |
781 self.doc.new_line() | |
782 Table_List = ["Blacklisted Gene", "Reason", "Risk Category"] | |
783 for index, row in lrn_risk_blacklist.iterrows(): | |
784 Table_List = Table_List + row.tolist() | |
785 row_count = int(len(Table_List) / 3) | |
786 self.doc.new_table(columns=3, rows=row_count, text=Table_List, text_align='left') | |
787 # Process self.lrn_risk_vf_file. | |
788 try: | |
789 lrn_risk_vf = pandas.read_csv(filepath_or_buffer=self.lrn_risk_vf_file, sep='\t', header=0) | |
790 except Exception: | |
791 lrn_risk_vf = pandas.DataFrame() | |
792 if lrn_risk_vf.shape[0] > 0: | |
793 self.doc.new_line() | |
794 self.doc.new_header(level=2, title="Virulence Factor Distribution") | |
795 self.doc.new_line() | |
796 Table_List = ["Gene", "Contig", "% Identity", "% Coverage", "E-Value", "Annotation", "Comparison to Publicly Available Genomes"] | |
797 for index, row in lrn_risk_vf.iterrows(): | |
798 Table_List = Table_List + row.tolist() | |
799 row_count = int(len(Table_List) / 7) | |
800 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left') | |
740 self.doc.new_line('<div style="page-break-after: always;"></div>') | 801 self.doc.new_line('<div style="page-break-after: always;"></div>') |
741 self.doc.new_line() | 802 self.doc.new_line() |
742 | 803 |
743 def add_plasmids(self): | 804 def add_plasmids(self): |
744 try: | 805 try: |
837 self.add_feature_plots() | 898 self.add_feature_plots() |
838 self.add_mutations() | 899 self.add_mutations() |
839 self.add_large_indels() | 900 self.add_large_indels() |
840 self.add_plasmids() | 901 self.add_plasmids() |
841 self.add_amr_matrix() | 902 self.add_amr_matrix() |
903 self.add_lrn_risk_info() | |
842 # self.add_snps() | 904 # self.add_snps() |
843 self.add_methods() | 905 self.add_methods() |
844 self.make_tex() | 906 self.make_tex() |
845 # It took me quite a long time to find out that the value of the -t | 907 # It took me quite a long time to find out that the value of the -t |
846 # (implied) argument in the following command must be 'html' instead of | 908 # (implied) argument in the following command must be 'html' instead of |
878 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Sample(s) is/are gzipped') | 940 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Sample(s) is/are gzipped') |
879 parser.add_argument('--illumina_forward_read_file', action='store', dest='illumina_forward_read_file', help='Illumina forward read file') | 941 parser.add_argument('--illumina_forward_read_file', action='store', dest='illumina_forward_read_file', help='Illumina forward read file') |
880 parser.add_argument('--illumina_reverse_read_file', action='store', dest='illumina_reverse_read_file', help='Illumina reverse read file') | 942 parser.add_argument('--illumina_reverse_read_file', action='store', dest='illumina_reverse_read_file', help='Illumina reverse read file') |
881 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') | 943 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') |
882 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') | 944 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') |
945 parser.add_argument('--lrn_risk_amr_file', action='store', dest='lrn_risk_amr_file', default=None, help='LRN RISK AMR TSV file') | |
946 parser.add_argument('--lrn_risk_blacklist_file', action='store', dest='lrn_risk_blacklist_file', default=None, help='LRN RISK blacklist TSV file') | |
947 parser.add_argument('--lrn_risk_vf_file', action='store', dest='lrn_risk_vf_file', default=None, help='LRN RISK virulence factors TSV file') | |
883 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') | 948 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') |
884 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') | 949 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') |
885 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') | 950 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') |
886 parser.add_argument('--ont_file', action='store', dest='ont_file', help='ONT single read file') | 951 parser.add_argument('--ont_file', action='store', dest='ont_file', help='ONT single read file') |
887 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') | 952 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') |
942 args.gzipped, | 1007 args.gzipped, |
943 args.illumina_forward_read_file, | 1008 args.illumina_forward_read_file, |
944 args.illumina_reverse_read_file, | 1009 args.illumina_reverse_read_file, |
945 args.kraken2_report_file, | 1010 args.kraken2_report_file, |
946 args.kraken2_version, | 1011 args.kraken2_version, |
1012 args.lrn_risk_amr_file, | |
1013 args.lrn_risk_blacklist_file, | |
1014 args.lrn_risk_vf_file, | |
947 args.minimap2_version, | 1015 args.minimap2_version, |
948 args.mutation_regions_bed_file, | 1016 args.mutation_regions_bed_file, |
949 mutation_regions_files, | 1017 mutation_regions_files, |
950 args.ont_file, | 1018 args.ont_file, |
951 args.pima_css, | 1019 args.pima_css, |