comparison pima_report.py @ 28:27485e70ed2b draft

Uploaded
author greg
date Fri, 28 Apr 2023 19:40:58 +0000
parents ddc056cf16bf
children 134a0879d0b6
comparison
equal deleted inserted replaced
27:ddc056cf16bf 28:27485e70ed2b
1 #!/usr/bin/env python
2
1 import argparse 3 import argparse
2 import os 4 import os
3 import pandas 5 import pandas
4 import pypandoc 6 import pypandoc
5 import re 7 import re
21 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, 23 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None,
22 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, 24 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None,
23 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, feature_bed_files=None, 25 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, feature_bed_files=None,
24 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None, 26 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None,
25 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None, 27 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None,
26 kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, 28 kraken2_version=None, lrn_risk_amr_file=None, lrn_risk_blacklist_file=None, lrn_risk_vf_file=None,
27 mutation_regions_tsv_files=None, ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, 29 minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None,
28 read_type=None, reference_insertions_file=None, samtools_version=None, varscan_version=None): 30 ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, read_type=None,
31 reference_insertions_file=None, samtools_version=None, varscan_version=None):
29 self.ofh = open("process_log.txt", "w") 32 self.ofh = open("process_log.txt", "w")
30 33
31 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) 34 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file))
32 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) 35 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files))
33 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) 36 self.ofh.write("analysis_name: %s\n" % str(analysis_name))
50 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) 53 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file))
51 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file)) 54 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file))
52 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file)) 55 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file))
53 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) 56 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file))
54 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) 57 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version))
58 self.ofh.write("lrn_risk_amr_file: %s\n" % str(lrn_risk_amr_file))
59 self.ofh.write("lrn_risk_blacklist_file: %s\n" % str(lrn_risk_blacklist_file))
60 self.ofh.write("lrn_risk_vf_file: %s\n" % str(lrn_risk_vf_file))
55 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) 61 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version))
56 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) 62 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file))
57 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) 63 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files))
58 self.ofh.write("ont_file: %s\n" % str(ont_file)) 64 self.ofh.write("ont_file: %s\n" % str(ont_file))
59 self.ofh.write("pima_css: %s\n" % str(pima_css)) 65 self.ofh.write("pima_css: %s\n" % str(pima_css))
113 self.kraken2_report_file = kraken2_report_file 119 self.kraken2_report_file = kraken2_report_file
114 if kraken2_version is None: 120 if kraken2_version is None:
115 self.kraken2_version = 'kraken2 (version unknown)' 121 self.kraken2_version = 'kraken2 (version unknown)'
116 else: 122 else:
117 self.kraken2_version = re.sub('_', '.', kraken2_version.rstrip(' _report_')) 123 self.kraken2_version = re.sub('_', '.', kraken2_version.rstrip(' _report_'))
124 self.lrn_risk_amr_file = lrn_risk_amr_file
125 self.lrn_risk_blacklist_file = lrn_risk_blacklist_file
126 self.lrn_risk_vf_file = lrn_risk_vf_file
118 if minimap2_version is None: 127 if minimap2_version is None:
119 self.minimap2_version = 'minimap2 (version unknown)' 128 self.minimap2_version = 'minimap2 (version unknown)'
120 else: 129 else:
121 self.minimap2_version = re.sub('_', '.', minimap2_version) 130 self.minimap2_version = re.sub('_', '.', minimap2_version)
122 self.mutation_regions_bed_file = mutation_regions_bed_file 131 self.mutation_regions_bed_file = mutation_regions_bed_file
148 self.contig_alignment_title = 'Alignment vs. reference contigs' 157 self.contig_alignment_title = 'Alignment vs. reference contigs'
149 self.feature_title = 'Features found in the assembly' 158 self.feature_title = 'Features found in the assembly'
150 self.feature_methods_title = 'Feature annotation' 159 self.feature_methods_title = 'Feature annotation'
151 self.feature_plot_title = 'Feature annotation plots' 160 self.feature_plot_title = 'Feature annotation plots'
152 self.large_indel_title = 'Large insertions & deletions' 161 self.large_indel_title = 'Large insertions & deletions'
162 self.lrn_risk_title = 'LRNRisk isolate classification'
153 self.methods_title = 'Methods' 163 self.methods_title = 'Methods'
154 self.mutation_errors_title = 'Errors finding mutations in the sample' 164 self.mutation_errors_title = 'Errors finding mutations in the sample'
155 self.mutation_title = 'Mutations found in the sample' 165 self.mutation_title = 'Mutations found in the sample'
156 self.mutation_methods_title = 'Mutation screening' 166 self.mutation_methods_title = 'Mutation screening'
157 self.plasmid_methods_title = 'Plasmid annotation' 167 self.plasmid_methods_title = 'Plasmid annotation'
735 row_count = int(len(Table_List) / 4) 745 row_count = int(len(Table_List) / 4)
736 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') 746 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left')
737 method = 'Large insertions or deletions were found as the complement of aligned regions using %s.' % self.bedtools_version 747 method = 'Large insertions or deletions were found as the complement of aligned regions using %s.' % self.bedtools_version
738 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) 748 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method))
739 self.doc.new_line() 749 self.doc.new_line()
750 self.doc.new_line('<div style="page-break-after: always;"></div>')
751 self.doc.new_line()
752
753 def add_lrn_risk_info(self):
754 self.ofh.write("\nXXXXXX In add_lrn_risk_info\n\n")
755 if self.lrn_risk_amr_file is None and self.lrn_risk_blacklist_file is None and self.lrn_risk_vf_file is None:
756 return
757 self.doc.new_line()
758 self.doc.new_header(level=2, title=self.lrn_risk_title)
759 # Process self.lrn_risk_amr_file.
760 try:
761 lrn_risk_amr = pandas.read_csv(filepath_or_buffer=self.lrn_risk_amr_file, sep='\t', header=0)
762 except Exception:
763 lrn_risk_amr = pandas.DataFrame()
764 if lrn_risk_amr.shape[0] > 0:
765 self.doc.new_line()
766 self.doc.new_header(level=2, title="AMR Determinant Distribution")
767 self.doc.new_line()
768 Table_List = ["Gene", "Contig", "% Identity", "% Coverage", "E-Value", "Annotation", "Comparison to Publicly Available Genomes"]
769 for index, row in lrn_risk_amr.iterrows():
770 Table_List = Table_List + row.tolist()
771 row_count = int(len(Table_List) / 7)
772 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left')
773 # Process self.lrn_risk_blacklist_file.
774 try:
775 lrn_risk_blacklist = pandas.read_csv(filepath_or_buffer=self.lrn_risk_blacklist_file, sep='\t', header=0)
776 except Exception:
777 lrn_risk_blacklist = pandas.DataFrame()
778 if lrn_risk_blacklist.shape[0] > 0:
779 self.doc.new_line()
780 self.doc.new_header(level=2, title="Blacklisted High-risk Virulence Factors")
781 self.doc.new_line()
782 Table_List = ["Blacklisted Gene", "Reason", "Risk Category"]
783 for index, row in lrn_risk_blacklist.iterrows():
784 Table_List = Table_List + row.tolist()
785 row_count = int(len(Table_List) / 3)
786 self.doc.new_table(columns=3, rows=row_count, text=Table_List, text_align='left')
787 # Process self.lrn_risk_vf_file.
788 try:
789 lrn_risk_vf = pandas.read_csv(filepath_or_buffer=self.lrn_risk_vf_file, sep='\t', header=0)
790 except Exception:
791 lrn_risk_vf = pandas.DataFrame()
792 if lrn_risk_vf.shape[0] > 0:
793 self.doc.new_line()
794 self.doc.new_header(level=2, title="Virulence Factor Distribution")
795 self.doc.new_line()
796 Table_List = ["Gene", "Contig", "% Identity", "% Coverage", "E-Value", "Annotation", "Comparison to Publicly Available Genomes"]
797 for index, row in lrn_risk_vf.iterrows():
798 Table_List = Table_List + row.tolist()
799 row_count = int(len(Table_List) / 7)
800 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left')
740 self.doc.new_line('<div style="page-break-after: always;"></div>') 801 self.doc.new_line('<div style="page-break-after: always;"></div>')
741 self.doc.new_line() 802 self.doc.new_line()
742 803
743 def add_plasmids(self): 804 def add_plasmids(self):
744 try: 805 try:
837 self.add_feature_plots() 898 self.add_feature_plots()
838 self.add_mutations() 899 self.add_mutations()
839 self.add_large_indels() 900 self.add_large_indels()
840 self.add_plasmids() 901 self.add_plasmids()
841 self.add_amr_matrix() 902 self.add_amr_matrix()
903 self.add_lrn_risk_info()
842 # self.add_snps() 904 # self.add_snps()
843 self.add_methods() 905 self.add_methods()
844 self.make_tex() 906 self.make_tex()
845 # It took me quite a long time to find out that the value of the -t 907 # It took me quite a long time to find out that the value of the -t
846 # (implied) argument in the following command must be 'html' instead of 908 # (implied) argument in the following command must be 'html' instead of
878 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Sample(s) is/are gzipped') 940 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Sample(s) is/are gzipped')
879 parser.add_argument('--illumina_forward_read_file', action='store', dest='illumina_forward_read_file', help='Illumina forward read file') 941 parser.add_argument('--illumina_forward_read_file', action='store', dest='illumina_forward_read_file', help='Illumina forward read file')
880 parser.add_argument('--illumina_reverse_read_file', action='store', dest='illumina_reverse_read_file', help='Illumina reverse read file') 942 parser.add_argument('--illumina_reverse_read_file', action='store', dest='illumina_reverse_read_file', help='Illumina reverse read file')
881 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') 943 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file')
882 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') 944 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string')
945 parser.add_argument('--lrn_risk_amr_file', action='store', dest='lrn_risk_amr_file', default=None, help='LRN RISK AMR TSV file')
946 parser.add_argument('--lrn_risk_blacklist_file', action='store', dest='lrn_risk_blacklist_file', default=None, help='LRN RISK blacklist TSV file')
947 parser.add_argument('--lrn_risk_vf_file', action='store', dest='lrn_risk_vf_file', default=None, help='LRN RISK virulence factors TSV file')
883 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') 948 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string')
884 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') 949 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file')
885 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') 950 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files')
886 parser.add_argument('--ont_file', action='store', dest='ont_file', help='ONT single read file') 951 parser.add_argument('--ont_file', action='store', dest='ont_file', help='ONT single read file')
887 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') 952 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet')
942 args.gzipped, 1007 args.gzipped,
943 args.illumina_forward_read_file, 1008 args.illumina_forward_read_file,
944 args.illumina_reverse_read_file, 1009 args.illumina_reverse_read_file,
945 args.kraken2_report_file, 1010 args.kraken2_report_file,
946 args.kraken2_version, 1011 args.kraken2_version,
1012 args.lrn_risk_amr_file,
1013 args.lrn_risk_blacklist_file,
1014 args.lrn_risk_vf_file,
947 args.minimap2_version, 1015 args.minimap2_version,
948 args.mutation_regions_bed_file, 1016 args.mutation_regions_bed_file,
949 mutation_regions_files, 1017 mutation_regions_files,
950 args.ont_file, 1018 args.ont_file,
951 args.pima_css, 1019 args.pima_css,