Mercurial > repos > greg > pima_report
comparison pima_report.py @ 28:27485e70ed2b draft
Uploaded
| author | greg |
|---|---|
| date | Fri, 28 Apr 2023 19:40:58 +0000 |
| parents | ddc056cf16bf |
| children | 134a0879d0b6 |
comparison
equal
deleted
inserted
replaced
| 27:ddc056cf16bf | 28:27485e70ed2b |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 1 import argparse | 3 import argparse |
| 2 import os | 4 import os |
| 3 import pandas | 5 import pandas |
| 4 import pypandoc | 6 import pypandoc |
| 5 import re | 7 import re |
| 21 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, | 23 assembly_fasta_file=None, assembly_name=None, bedtools_version=None, blastn_version=None, |
| 22 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, | 24 circos_files=None, compute_sequence_length_file=None, contig_coverage_file=None, dbkey=None, |
| 23 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, feature_bed_files=None, | 25 dnadiff_snps_file=None, dnadiff_version=None, errors_file=None, feature_bed_files=None, |
| 24 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None, | 26 feature_png_files=None, flye_assembly_info_file=None, genome_insertions_file=None, gzipped=None, |
| 25 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None, | 27 illumina_forward_read_file=None, illumina_reverse_read_file=None, kraken2_report_file=None, |
| 26 kraken2_version=None, minimap2_version=None, mutation_regions_bed_file=None, | 28 kraken2_version=None, lrn_risk_amr_file=None, lrn_risk_blacklist_file=None, lrn_risk_vf_file=None, |
| 27 mutation_regions_tsv_files=None, ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, | 29 minimap2_version=None, mutation_regions_bed_file=None, mutation_regions_tsv_files=None, |
| 28 read_type=None, reference_insertions_file=None, samtools_version=None, varscan_version=None): | 30 ont_file=None, pima_css=None, plasmids_file=None, quast_report_file=None, read_type=None, |
| 31 reference_insertions_file=None, samtools_version=None, varscan_version=None): | |
| 29 self.ofh = open("process_log.txt", "w") | 32 self.ofh = open("process_log.txt", "w") |
| 30 | 33 |
| 31 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) | 34 self.ofh.write("amr_deletions_file: %s\n" % str(amr_deletions_file)) |
| 32 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) | 35 self.ofh.write("amr_matrix_files: %s\n" % str(amr_matrix_files)) |
| 33 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) | 36 self.ofh.write("analysis_name: %s\n" % str(analysis_name)) |
| 50 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) | 53 self.ofh.write("genome_insertions_file: %s\n" % str(genome_insertions_file)) |
| 51 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file)) | 54 self.ofh.write("illumina_forward_read_file: %s\n" % str(illumina_forward_read_file)) |
| 52 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file)) | 55 self.ofh.write("illumina_reverse_read_file: %s\n" % str(illumina_reverse_read_file)) |
| 53 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) | 56 self.ofh.write("kraken2_report_file: %s\n" % str(kraken2_report_file)) |
| 54 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) | 57 self.ofh.write("kraken2_version: %s\n" % str(kraken2_version)) |
| 58 self.ofh.write("lrn_risk_amr_file: %s\n" % str(lrn_risk_amr_file)) | |
| 59 self.ofh.write("lrn_risk_blacklist_file: %s\n" % str(lrn_risk_blacklist_file)) | |
| 60 self.ofh.write("lrn_risk_vf_file: %s\n" % str(lrn_risk_vf_file)) | |
| 55 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) | 61 self.ofh.write("minimap2_version: %s\n" % str(minimap2_version)) |
| 56 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) | 62 self.ofh.write("mutation_regions_bed_file: %s\n" % str(mutation_regions_bed_file)) |
| 57 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) | 63 self.ofh.write("mutation_regions_tsv_files: %s\n" % str(mutation_regions_tsv_files)) |
| 58 self.ofh.write("ont_file: %s\n" % str(ont_file)) | 64 self.ofh.write("ont_file: %s\n" % str(ont_file)) |
| 59 self.ofh.write("pima_css: %s\n" % str(pima_css)) | 65 self.ofh.write("pima_css: %s\n" % str(pima_css)) |
| 113 self.kraken2_report_file = kraken2_report_file | 119 self.kraken2_report_file = kraken2_report_file |
| 114 if kraken2_version is None: | 120 if kraken2_version is None: |
| 115 self.kraken2_version = 'kraken2 (version unknown)' | 121 self.kraken2_version = 'kraken2 (version unknown)' |
| 116 else: | 122 else: |
| 117 self.kraken2_version = re.sub('_', '.', kraken2_version.rstrip(' _report_')) | 123 self.kraken2_version = re.sub('_', '.', kraken2_version.rstrip(' _report_')) |
| 124 self.lrn_risk_amr_file = lrn_risk_amr_file | |
| 125 self.lrn_risk_blacklist_file = lrn_risk_blacklist_file | |
| 126 self.lrn_risk_vf_file = lrn_risk_vf_file | |
| 118 if minimap2_version is None: | 127 if minimap2_version is None: |
| 119 self.minimap2_version = 'minimap2 (version unknown)' | 128 self.minimap2_version = 'minimap2 (version unknown)' |
| 120 else: | 129 else: |
| 121 self.minimap2_version = re.sub('_', '.', minimap2_version) | 130 self.minimap2_version = re.sub('_', '.', minimap2_version) |
| 122 self.mutation_regions_bed_file = mutation_regions_bed_file | 131 self.mutation_regions_bed_file = mutation_regions_bed_file |
| 148 self.contig_alignment_title = 'Alignment vs. reference contigs' | 157 self.contig_alignment_title = 'Alignment vs. reference contigs' |
| 149 self.feature_title = 'Features found in the assembly' | 158 self.feature_title = 'Features found in the assembly' |
| 150 self.feature_methods_title = 'Feature annotation' | 159 self.feature_methods_title = 'Feature annotation' |
| 151 self.feature_plot_title = 'Feature annotation plots' | 160 self.feature_plot_title = 'Feature annotation plots' |
| 152 self.large_indel_title = 'Large insertions & deletions' | 161 self.large_indel_title = 'Large insertions & deletions' |
| 162 self.lrn_risk_title = 'LRNRisk isolate classification' | |
| 153 self.methods_title = 'Methods' | 163 self.methods_title = 'Methods' |
| 154 self.mutation_errors_title = 'Errors finding mutations in the sample' | 164 self.mutation_errors_title = 'Errors finding mutations in the sample' |
| 155 self.mutation_title = 'Mutations found in the sample' | 165 self.mutation_title = 'Mutations found in the sample' |
| 156 self.mutation_methods_title = 'Mutation screening' | 166 self.mutation_methods_title = 'Mutation screening' |
| 157 self.plasmid_methods_title = 'Plasmid annotation' | 167 self.plasmid_methods_title = 'Plasmid annotation' |
| 735 row_count = int(len(Table_List) / 4) | 745 row_count = int(len(Table_List) / 4) |
| 736 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') | 746 self.doc.new_table(columns=4, rows=row_count, text=Table_List, text_align='left') |
| 737 method = 'Large insertions or deletions were found as the complement of aligned regions using %s.' % self.bedtools_version | 747 method = 'Large insertions or deletions were found as the complement of aligned regions using %s.' % self.bedtools_version |
| 738 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) | 748 self.methods[self.reference_methods_title] = self.methods[self.reference_methods_title].append(pandas.Series(method)) |
| 739 self.doc.new_line() | 749 self.doc.new_line() |
| 750 self.doc.new_line('<div style="page-break-after: always;"></div>') | |
| 751 self.doc.new_line() | |
| 752 | |
| 753 def add_lrn_risk_info(self): | |
| 754 self.ofh.write("\nXXXXXX In add_lrn_risk_info\n\n") | |
| 755 if self.lrn_risk_amr_file is None and self.lrn_risk_blacklist_file is None and self.lrn_risk_vf_file is None: | |
| 756 return | |
| 757 self.doc.new_line() | |
| 758 self.doc.new_header(level=2, title=self.lrn_risk_title) | |
| 759 # Process self.lrn_risk_amr_file. | |
| 760 try: | |
| 761 lrn_risk_amr = pandas.read_csv(filepath_or_buffer=self.lrn_risk_amr_file, sep='\t', header=0) | |
| 762 except Exception: | |
| 763 lrn_risk_amr = pandas.DataFrame() | |
| 764 if lrn_risk_amr.shape[0] > 0: | |
| 765 self.doc.new_line() | |
| 766 self.doc.new_header(level=2, title="AMR Determinant Distribution") | |
| 767 self.doc.new_line() | |
| 768 Table_List = ["Gene", "Contig", "% Identity", "% Coverage", "E-Value", "Annotation", "Comparison to Publicly Available Genomes"] | |
| 769 for index, row in lrn_risk_amr.iterrows(): | |
| 770 Table_List = Table_List + row.tolist() | |
| 771 row_count = int(len(Table_List) / 7) | |
| 772 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left') | |
| 773 # Process self.lrn_risk_blacklist_file. | |
| 774 try: | |
| 775 lrn_risk_blacklist = pandas.read_csv(filepath_or_buffer=self.lrn_risk_blacklist_file, sep='\t', header=0) | |
| 776 except Exception: | |
| 777 lrn_risk_blacklist = pandas.DataFrame() | |
| 778 if lrn_risk_blacklist.shape[0] > 0: | |
| 779 self.doc.new_line() | |
| 780 self.doc.new_header(level=2, title="Blacklisted High-risk Virulence Factors") | |
| 781 self.doc.new_line() | |
| 782 Table_List = ["Blacklisted Gene", "Reason", "Risk Category"] | |
| 783 for index, row in lrn_risk_blacklist.iterrows(): | |
| 784 Table_List = Table_List + row.tolist() | |
| 785 row_count = int(len(Table_List) / 3) | |
| 786 self.doc.new_table(columns=3, rows=row_count, text=Table_List, text_align='left') | |
| 787 # Process self.lrn_risk_vf_file. | |
| 788 try: | |
| 789 lrn_risk_vf = pandas.read_csv(filepath_or_buffer=self.lrn_risk_vf_file, sep='\t', header=0) | |
| 790 except Exception: | |
| 791 lrn_risk_vf = pandas.DataFrame() | |
| 792 if lrn_risk_vf.shape[0] > 0: | |
| 793 self.doc.new_line() | |
| 794 self.doc.new_header(level=2, title="Virulence Factor Distribution") | |
| 795 self.doc.new_line() | |
| 796 Table_List = ["Gene", "Contig", "% Identity", "% Coverage", "E-Value", "Annotation", "Comparison to Publicly Available Genomes"] | |
| 797 for index, row in lrn_risk_vf.iterrows(): | |
| 798 Table_List = Table_List + row.tolist() | |
| 799 row_count = int(len(Table_List) / 7) | |
| 800 self.doc.new_table(columns=7, rows=row_count, text=Table_List, text_align='left') | |
| 740 self.doc.new_line('<div style="page-break-after: always;"></div>') | 801 self.doc.new_line('<div style="page-break-after: always;"></div>') |
| 741 self.doc.new_line() | 802 self.doc.new_line() |
| 742 | 803 |
| 743 def add_plasmids(self): | 804 def add_plasmids(self): |
| 744 try: | 805 try: |
| 837 self.add_feature_plots() | 898 self.add_feature_plots() |
| 838 self.add_mutations() | 899 self.add_mutations() |
| 839 self.add_large_indels() | 900 self.add_large_indels() |
| 840 self.add_plasmids() | 901 self.add_plasmids() |
| 841 self.add_amr_matrix() | 902 self.add_amr_matrix() |
| 903 self.add_lrn_risk_info() | |
| 842 # self.add_snps() | 904 # self.add_snps() |
| 843 self.add_methods() | 905 self.add_methods() |
| 844 self.make_tex() | 906 self.make_tex() |
| 845 # It took me quite a long time to find out that the value of the -t | 907 # It took me quite a long time to find out that the value of the -t |
| 846 # (implied) argument in the following command must be 'html' instead of | 908 # (implied) argument in the following command must be 'html' instead of |
| 878 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Sample(s) is/are gzipped') | 940 parser.add_argument('--gzipped', action='store_true', dest='gzipped', default=False, help='Sample(s) is/are gzipped') |
| 879 parser.add_argument('--illumina_forward_read_file', action='store', dest='illumina_forward_read_file', help='Illumina forward read file') | 941 parser.add_argument('--illumina_forward_read_file', action='store', dest='illumina_forward_read_file', help='Illumina forward read file') |
| 880 parser.add_argument('--illumina_reverse_read_file', action='store', dest='illumina_reverse_read_file', help='Illumina reverse read file') | 942 parser.add_argument('--illumina_reverse_read_file', action='store', dest='illumina_reverse_read_file', help='Illumina reverse read file') |
| 881 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') | 943 parser.add_argument('--kraken2_report_file', action='store', dest='kraken2_report_file', default=None, help='kraken2 report file') |
| 882 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') | 944 parser.add_argument('--kraken2_version', action='store', dest='kraken2_version', default=None, help='kraken2 version string') |
| 945 parser.add_argument('--lrn_risk_amr_file', action='store', dest='lrn_risk_amr_file', default=None, help='LRN RISK AMR TSV file') | |
| 946 parser.add_argument('--lrn_risk_blacklist_file', action='store', dest='lrn_risk_blacklist_file', default=None, help='LRN RISK blacklist TSV file') | |
| 947 parser.add_argument('--lrn_risk_vf_file', action='store', dest='lrn_risk_vf_file', default=None, help='LRN RISK virulence factors TSV file') | |
| 883 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') | 948 parser.add_argument('--minimap2_version', action='store', dest='minimap2_version', default=None, help='minimap2 version string') |
| 884 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') | 949 parser.add_argument('--mutation_regions_bed_file', action='store', dest='mutation_regions_bed_file', help='AMR mutation regions BRD file') |
| 885 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') | 950 parser.add_argument('--mutation_regions_dir', action='store', dest='mutation_regions_dir', help='Directory of mutation regions TSV files') |
| 886 parser.add_argument('--ont_file', action='store', dest='ont_file', help='ONT single read file') | 951 parser.add_argument('--ont_file', action='store', dest='ont_file', help='ONT single read file') |
| 887 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') | 952 parser.add_argument('--pima_css', action='store', dest='pima_css', help='PIMA css stypesheet') |
| 942 args.gzipped, | 1007 args.gzipped, |
| 943 args.illumina_forward_read_file, | 1008 args.illumina_forward_read_file, |
| 944 args.illumina_reverse_read_file, | 1009 args.illumina_reverse_read_file, |
| 945 args.kraken2_report_file, | 1010 args.kraken2_report_file, |
| 946 args.kraken2_version, | 1011 args.kraken2_version, |
| 1012 args.lrn_risk_amr_file, | |
| 1013 args.lrn_risk_blacklist_file, | |
| 1014 args.lrn_risk_vf_file, | |
| 947 args.minimap2_version, | 1015 args.minimap2_version, |
| 948 args.mutation_regions_bed_file, | 1016 args.mutation_regions_bed_file, |
| 949 mutation_regions_files, | 1017 mutation_regions_files, |
| 950 args.ont_file, | 1018 args.ont_file, |
| 951 args.pima_css, | 1019 args.pima_css, |
