comparison EURL_VTEC_WGS_PT.py @ 0:965517909457 draft

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Wed, 22 Jan 2020 08:41:44 -0500
parents
children 0cbed1c0a762
comparison
equal deleted inserted replaced
-1:000000000000 0:965517909457
1 # -*- coding: utf-8 -*-
2 """
3 ############################################################################
4 # Istituto Superiore di Sanita'
5 # European Union Reference Laboratory (EU-RL) for Escherichia coli, including Verotoxigenic E. coli (VTEC)
6 # Developer: Arnold Knijn arnold.knijn@iss.it
7 ############################################################################
8 """
9
10 import argparse
11 import sys
12 import os
13 import HTML
14 import datetime
15 import fileinput
16
17 BASE_URL = 'https://galaxytrakr.org'
18 TOOL_DIR = os.path.dirname(os.path.abspath(__file__))
19
20 def insertFile(filename, report):
21 with open(filename) as html_in:
22 for line in html_in:
23 report.write(line)
24
25 def insertFileAsTable(filename, report, hasheader=False, tabclass="table table-rep"):
26 with open(filename) as table_in:
27 table_data = [[str(col) for col in row.split('\t')] for row in table_in]
28 insertTable(table_data, report, hasheader, tabclass)
29
30 def insertTable(table_data, report, hasheader=False, tabclass="table table-rep"):
31 if hasheader:
32 htmlcode = HTML.table(table_data[1:], attribs={'class':tabclass}, header_row=table_data[0])
33 else:
34 htmlcode = HTML.table(table_data, attribs={'class':tabclass})
35 report.write(htmlcode)
36
37 def openFileAsTable(filename):
38 with open(filename) as table_in:
39 table_data = [[str(col) for col in row.split('\t')] for row in table_in]
40 return table_data
41
42 def __main__():
43 parser = argparse.ArgumentParser()
44 parser.add_argument('--serotyping', dest='serotyping', help='perform serotyping', action='store_true')
45 parser.add_argument('--virulotyping', dest='virulotyping', help='perform virulotyping', action='store_true')
46 parser.add_argument('--shigatoxintyping', dest='shigatoxintyping', help='perform shigatoxintyping', action='store_true')
47 parser.add_argument('-1', '--input1', dest='input1', help='forward or single-end reads file in Sanger FASTQ format')
48 parser.add_argument('--input1_ext', dest='input1_ext', help='extension of forward or single-end reads file in Sanger FASTQ format')
49 parser.add_argument('--input1_name', dest='input1_name', help='name of forward or single-end reads file in Sanger FASTQ format')
50 parser.add_argument('-2', '--input2', dest='input2', help='reverse reads file in Sanger FASTQ format')
51 parser.add_argument('--input2_ext', dest='input2_ext', help='extension of reverse reads file in Sanger FASTQ format')
52 parser.add_argument('--input2_name', dest='input2_name', help='name of reverse reads file in Sanger FASTQ format')
53 parser.add_argument('--html1', dest='html1', help='html FASTQC file')
54 parser.add_argument('--html1_id', dest='html1_id', help='html FASTQC file id')
55 parser.add_argument('--html1_path', dest='html1_path', help='html FASTQC file path')
56 parser.add_argument('--text1', dest='text1', help='text FASTQC file')
57 parser.add_argument('--html2', dest='html2', help='html FASTQC file')
58 parser.add_argument('--html2_id', dest='html2_id', help='html FASTQC file id')
59 parser.add_argument('--html2_path', dest='html2_path', help='html FASTQC file path')
60 parser.add_argument('--text2', dest='text2', help='text FASTQC file')
61 parser.add_argument('--trimmed1', dest='trimmed1', help='trimmed forward FASTQ file')
62 parser.add_argument('--trimmed2', dest='trimmed2', help='trimmed reverse FASTQ file')
63 parser.add_argument('--trimmedunpaired', dest='trimmedunpaired', help='trimmed unpaired FASTQ file')
64 parser.add_argument('--log', dest='logfile', help='log file')
65 parser.add_argument('--virulotyper', dest='virulotyper', help='Virulotyping Mapping reads')
66 parser.add_argument('--virulotyper_id', dest='virulotyper_id', help='Virulotyping Mapping reads id')
67 parser.add_argument('--blastn_STX', dest='blastn_STX', help='Blastn for Shiga toxin')
68 parser.add_argument('--mlstsevenloci', dest='mlstsevenloci', help='Multi Locus Alleles table')
69 parser.add_argument('--spades_log', dest='spades_log', help='SPAdes log')
70 parser.add_argument('--blastn_O', dest='blastn_O', help='Blastn for O')
71 parser.add_argument('--blastn_H', dest='blastn_H', help='Blastn for H')
72 parser.add_argument('--output', dest='output', help='output report html file')
73 args = parser.parse_args()
74
75 os.system("printf 'EURL VTEC WGS PT v2.3\n\nTool versions\n=============\n' > " + args.logfile)
76 if args.input2:
77 # FASTQC
78 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc")
79 os.system("rm -r " + args.html1_path)
80 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input2 + " -d " + args.html2_path + " -o " + args.html2 + " -t " + args.text2 + " -f " + args.input2_ext + " -j " + args.input2_name + " -e " + "fastqc")
81 os.system("rm -r " + args.html2_path)
82 os.system("fastqc -v >> " + args.logfile)
83 # TRIMMING
84 os.system("python " + TOOL_DIR + "/scripts/fastq_positional_quality_trimming.py -1 " + args.input1 + " --maxlt 300 --lt 17 --rt 0 --minqt 25 --avgqt 27.0 --minlf -1 --trimmed1 " + args.trimmed1 + " --log trimming_logfile -2 " + args.input2 + " --trimmed2 " + args.trimmed2 + " --trimmedunpaired " + args.trimmedunpaired)
85 os.system("ln -s " + args.trimmed1 + " input_1.fq")
86 os.system("ln -s " + args.trimmed2 + " input_2.fq")
87 os.system("printf '\nfastq_positional_quality_trimming v1.0\n' >> " + args.logfile)
88 os.system("printf 'parameters: maxlt=300, lt=17, rt=0, minqt=25, avgqt=27.0, minlf=-1\n' >> " + args.logfile)
89 if args.virulotyping:
90 # VIRULOTYPER
91 os.system("perl " + TOOL_DIR + "/scripts/patho_typing_pt.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " " + args.input2 + " -o output_dir -j 1 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'")
92 os.system("cat pathotyper_rep_tot_tab > " + args.virulotyper)
93 os.system("printf '\n\nViruloTyper\n===========\npatho_typing v0.3\n' >> " + args.logfile)
94 os.system("printf 'parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n' >> " + args.logfile)
95 os.system("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt >> " + args.logfile)
96 if args.shigatoxintyping:
97 # SHIGATOXIN FILTERING
98 os.system(TOOL_DIR + "/scripts/duk -m filtered1STX.fq -k 23 " + TOOL_DIR + "/data/stx.fa input_1.fq")
99 os.system(TOOL_DIR + "/scripts/duk -m filtered2STX.fq -k 23 " + TOOL_DIR + "/data/stx.fa input_2.fq")
100 os.system(TOOL_DIR + "/scripts/fastq_pair filtered1STX.fq filtered2STX.fq")
101 os.system(TOOL_DIR + "/scripts/fastq_pair filtered1STX.fq.single.fq input_2.fq")
102 os.system(TOOL_DIR + "/scripts/fastq_pair filtered2STX.fq.single.fq input_1.fq")
103 os.system("cat filtered1STX.fq.paired.fq > filtered1STX_paired.fq")
104 os.system("cat filtered1STX.fq.single.fq.paired.fq >> filtered1STX_paired.fq")
105 os.system("cat input_1.fq.paired.fq >> filtered1STX_paired.fq")
106 os.system("cat filtered2STX.fq.paired.fq > filtered2STX_paired.fq")
107 os.system("cat input_2.fq.paired.fq >> filtered2STX_paired.fq")
108 os.system("cat filtered2STX.fq.single.fq.paired.fq >> filtered2STX_paired.fq")
109 os.system("printf '\n\nShigatoxinTyper\n===============\nduk v20110303\n' >> " + args.logfile)
110 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile)
111 # SHIGATOXIN ASSEMBLY: SPADES
112 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_stx spades_contig_stats_stx spades_scaffolds_stx spades_scaffold_stats_stx spades_log_stx NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --pe1-ff --pe1-1 fastq:filtered1STX_paired.fq --pe1-2 fastq:filtered2STX_paired.fq")
113 os.system("spades.py -v >> " + args.logfile)
114 os.system("printf 'parameters: careful, k=21,33,55, pe1-ff, pe1-1, pe1-2\n\n' >> " + args.logfile)
115 # SHIGATOXIN NCBI BLAST+ blastn
116 os.system("blastn -query 'spades_contigs_stx' -db '" + TOOL_DIR + "/data/stx' -task blastn -evalue 0.001 -out '" + args.blastn_STX + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0")
117 os.system("blastn -version >> " + args.logfile)
118 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile)
119 os.system("cat " + TOOL_DIR + "/data/ShigatoxinTyping_db.txt >> " + args.logfile)
120 # SHIGATOXINTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT
121 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_shigatoxin_fct")
122 os.system("awk -F '\t' '($3>99 && $4>1200) { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_STX + " > blastn_shigatoxin_fc")
123 shigatoxin_typing = openFileAsTable("blastn_shigatoxin_fc")
124 os.system("cat blastn_shigatoxin_fc >> blastn_shigatoxin_fct")
125 if os.stat('blastn_shigatoxin_fc').st_size == 0:
126 os.system("echo '-\t-\t-\t-' >> blastn_shigatoxin_fct")
127 # SEQUENCETYPER
128 os.system("mentalist call --output_votes -o 'mentalist_out' --db '/afs/galaxy/tool-data/mentalist_databases/escherichia_coli_pubmlst_k31_2018-10-09/escherichia_coli_pubmlst_k31_m023_2018-10-09.jld' -1 input_1.fq -2 input_2.fq")
129 os.system("mv mentalist_out.byvote " + args.mlstsevenloci)
130 sequence_typing = openFileAsTable(args.mlstsevenloci)
131 sequence_qc = openFileAsTable("mentalist_out.coverage.txt")
132 os.system("printf '\n\nSequenceTyper\n=============\n' >> " + args.logfile)
133 os.system("mentalist -v | grep MentaLiST >> " + args.logfile)
134 os.system("printf '\n' >> " + args.logfile)
135 os.system("cat " + TOOL_DIR + "/data/SequenceTyping_db.txt >> " + args.logfile)
136 if args.serotyping:
137 # SEROTYPE FILTERING
138 os.system(TOOL_DIR + "/scripts/duk -m filteredO1.fq -k 23 " + TOOL_DIR + "/data/O_type.fsa input_1.fq")
139 os.system(TOOL_DIR + "/scripts/duk -m filteredH1.fq -k 23 " + TOOL_DIR + "/data/H_type.fsa input_1.fq")
140 os.system("cat filteredO1.fq > filteredOH1.fq")
141 os.system("cat filteredH1.fq >> filteredOH1.fq")
142 os.system(TOOL_DIR + "/scripts/duk -m filteredO2.fq -k 23 " + TOOL_DIR + "/data/O_type.fsa input_2.fq")
143 os.system(TOOL_DIR + "/scripts/duk -m filteredH2.fq -k 23 " + TOOL_DIR + "/data/H_type.fsa input_2.fq")
144 os.system("cat filteredO2.fq > filteredOH2.fq")
145 os.system("cat filteredH2.fq >> filteredOH2.fq")
146 os.system(TOOL_DIR + "/scripts/fastq_pair filteredOH1.fq filteredOH2.fq")
147 os.system(TOOL_DIR + "/scripts/fastq_pair filteredOH1.fq.single.fq input_2.fq")
148 os.system(TOOL_DIR + "/scripts/fastq_pair filteredOH2.fq.single.fq input_1.fq")
149 os.system("cat filteredOH1.fq.paired.fq > filteredOH1_paired.fq")
150 os.system("cat filteredOH1.fq.single.fq.paired.fq >> filteredOH1_paired.fq")
151 os.system("cat input_1.fq.paired.fq >> filteredOH1_paired.fq")
152 os.system("cat filteredOH2.fq.paired.fq > filteredOH2_paired.fq")
153 os.system("cat input_2.fq.paired.fq >> filteredOH2_paired.fq")
154 os.system("cat filteredOH2.fq.single.fq.paired.fq >> filteredOH2_paired.fq")
155 os.system("printf '\n\nSeroTyper\n=========\nduk v20110303\n' >> " + args.logfile)
156 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile)
157 # SEROTYPE ASSEMBLY: SPADES
158 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_oh spades_contig_stats_oh spades_scaffolds_oh spades_scaffold_stats_oh " + args.spades_log + " NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --pe1-ff --pe1-1 fastq:filteredOH1_paired.fq --pe1-2 fastq:filteredOH2_paired.fq")
159 os.system("spades.py -v >> " + args.logfile)
160 os.system("printf 'parameters: careful, k=21,33,55, pe1-ff, pe1-1, pe1-2\n\n' >> " + args.logfile)
161 # SEROTYPE NCBI BLAST+ blastn
162 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/O_type' -task blastn -evalue 0.001 -out '" + args.blastn_O + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0")
163 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/H_type' -task blastn -evalue 0.001 -out '" + args.blastn_H + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0 -ungapped")
164 os.system("blastn -version >> " + args.logfile)
165 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile)
166 os.system("cat " + TOOL_DIR + "/data/SeroTyping_db.txt >> " + args.logfile)
167 # SEROTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT
168 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_OH_fc")
169 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_O + " | sort -nrk 2 -nrk 3 > blastn_O_fc")
170 sero_typing_o = openFileAsTable("blastn_O_fc")
171 os.system("cat blastn_O_fc >> blastn_OH_fc")
172 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_H + " | sort -nrk 2 -nrk 3 > blastn_H_fc")
173 sero_typing_h = openFileAsTable("blastn_H_fc")
174 os.system("cat blastn_H_fc >> blastn_OH_fc")
175 if os.stat('blastn_O_fc').st_size == 0 and os.stat('blastn_H_fc').st_size == 0:
176 os.system("echo '-\t-\t-\t-' >> blastn_OH_fc")
177 else:
178 # FASTQC
179 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc")
180 os.system("rm -r " + args.html1_path)
181 os.system("fastqc -v >> " + args.logfile)
182 # TRIMMING
183 os.system("python " + TOOL_DIR + "/scripts/fastq_positional_quality_trimming.py -1 " + args.input1 + " --maxlt 360 --lt 10 --rt 0 --minqt 25 --avgqt 27.0 --minlf 50 --trimmed1 " + args.trimmed1 + " --log trimming_logfile")
184 os.system("ln -s " + args.trimmed1 + " input_1.fq")
185 os.system("echo 'fastq_positional_quality_trimming v1.0' >> " + args.logfile)
186 os.system("printf 'parameters: maxlt=360, lt=10, rt=0, minqt=25, avgqt=27.0, minlf=50\n' >> " + args.logfile)
187 if args.virulotyping:
188 # VIRULOTYPER
189 os.system("perl " + TOOL_DIR + "/scripts/patho_typing_pt.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " -o output_dir -j 1 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'")
190 os.system("cat pathotyper_rep_tot_tab > " + args.virulotyper)
191 os.system("printf '\n\nViruloTyper\n===========\npatho_typing v0.3\n' >> " + args.logfile)
192 os.system("printf 'parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n' >> " + args.logfile)
193 os.system("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt >> " + args.logfile)
194 if args.shigatoxintyping:
195 # SHIGATOXIN FILTERING
196 os.system(TOOL_DIR + "/scripts/duk -m filtered1STX.fq -k 23 " + TOOL_DIR + "/data/stx.fa input_1.fq")
197 os.system("printf '\n\nShigatoxinTyper\n===============\nduk v20110303\n' >> " + args.logfile)
198 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile)
199 # SHIGATOXIN ASSEMBLY: SPADES
200 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_stx spades_contig_stats_stx spades_scaffolds_stx spades_scaffold_stats_stx spades_log_stx NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --iontorrent --pe1-ff --pe1-s fastq:filtered1STX.fq")
201 os.system("spades.py -v >> " + args.logfile)
202 os.system("printf 'parameters: careful, k=21,33,55, iontorrent, pe1-ff, pe1-s\n\n' >> " + args.logfile)
203 # SHIGATOXIN NCBI BLAST+ blastn
204 os.system("blastn -query 'spades_contigs_stx' -db '" + TOOL_DIR + "/data/stx' -task blastn -evalue 0.001 -out '" + args.blastn_STX + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0")
205 os.system("blastn -version >> " + args.logfile)
206 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile)
207 os.system("cat " + TOOL_DIR + "/data/ShigatoxinTyping_db.txt >> " + args.logfile)
208 # SHIGATOXINTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT
209 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_shigatoxin_fct")
210 os.system("awk -F '\t' '($3>99 && $4>1200) { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_STX + " > blastn_shigatoxin_fc")
211 shigatoxin_typing = openFileAsTable("blastn_shigatoxin_fc")
212 os.system("cat blastn_shigatoxin_fc >> blastn_shigatoxin_fct")
213 if os.stat('blastn_shigatoxin_fc').st_size == 0:
214 os.system("echo '-\t-\t-\t-' >> blastn_shigatoxin_fct")
215 # SEQUENCETYPER
216 os.system("mentalist call --output_votes -o 'mentalist_out' --db '/afs/galaxy/tool-data/mentalist_databases/escherichia_coli_pubmlst_k31_2018-10-09/escherichia_coli_pubmlst_k31_m023_2018-10-09.jld' -1 input_1.fq")
217 os.system("mv mentalist_out.byvote " + args.mlstsevenloci)
218 sequence_typing = openFileAsTable(args.mlstsevenloci)
219 sequence_qc = openFileAsTable("mentalist_out.coverage.txt")
220 os.system("printf '\n\nSequenceTyper\n=============\n' >> " + args.logfile)
221 os.system("mentalist -v | grep MentaLiST >> " + args.logfile)
222 os.system("printf '\n' >> " + args.logfile)
223 os.system("cat " + TOOL_DIR + "/data/SequenceTyping_db.txt >> " + args.logfile)
224 if args.serotyping:
225 # SEROTYPE FILTERING
226 os.system(TOOL_DIR + "/scripts/duk -m filteredO1.fq -k 23 " + TOOL_DIR + "/data/O_type.fsa input_1.fq")
227 os.system(TOOL_DIR + "/scripts/duk -m filteredH1.fq -k 23 " + TOOL_DIR + "/data/H_type.fsa input_1.fq")
228 os.system("cat filteredO1.fq > filteredOH1.fq")
229 os.system("cat filteredH1.fq >> filteredOH1.fq")
230 os.system("printf '\n\nSeroTyper\n=========\nduk v20110303\n' >> " + args.logfile)
231 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile)
232 # SEROTYPE ASSEMBLY: SPADES
233 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_oh spades_contig_stats_oh spades_scaffolds_oh spades_scaffold_stats_oh " + args.spades_log + " NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --iontorrent --pe1-ff --pe1-s fastq:filteredOH1.fq")
234 os.system("spades.py -v >> " + args.logfile)
235 os.system("printf 'parameters: careful, k=21,33,55, iontorrent, pe1-ff, pe1-s\n\n' >> " + args.logfile)
236 # SEROTYPE NCBI BLAST+ blastn
237 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/O_type' -task blastn -evalue 0.001 -out '" + args.blastn_O + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0")
238 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/H_type' -task blastn -evalue 0.001 -out '" + args.blastn_H + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0 -ungapped")
239 os.system("blastn -version >> " + args.logfile)
240 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile)
241 os.system("cat " + TOOL_DIR + "/data/SeroTyping_db.txt >> " + args.logfile)
242 # SEROTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT
243 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_OH_fc")
244 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_O + " | sort -nrk 2 -nrk 3 > blastn_O_fc")
245 sero_typing_o = openFileAsTable("blastn_O_fc")
246 os.system("cat blastn_O_fc >> blastn_OH_fc")
247 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_H + " | sort -nrk 2 -nrk 3 > blastn_H_fc")
248 sero_typing_h = openFileAsTable("blastn_H_fc")
249 os.system("cat blastn_H_fc >> blastn_OH_fc")
250 if os.stat('blastn_O_fc').st_size == 0 and os.stat('blastn_H_fc').st_size == 0:
251 os.system("echo '-\t-\t-\t-' >> blastn_OH_fc")
252 try:
253 report = open(args.output, 'w')
254 # write head html
255 insertFile(TOOL_DIR + "/report_head.html", report)
256 report.write("<td><h1>EURL VTEC WGS PT</h1><h2>Report for %s</h2>%s</td>" % (args.input1_name, datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")))
257 insertFile(TOOL_DIR + "/report_head2.html", report)
258 # write results
259 report.write("<h3>Summary</h3>\n")
260 if args.serotyping:
261 report.write("<p>Serotype: ")
262 if len(sero_typing_o) == 0:
263 report.write("O?")
264 else:
265 report.write("%s" % sero_typing_o[0][0][sero_typing_o[0][0].rfind("O"):])
266 if len(sero_typing_h) == 0:
267 report.write(":H?")
268 else:
269 report.write(":%s" % sero_typing_h[0][0][sero_typing_h[0][0].rfind("H"):])
270 report.write("</p>\n")
271 report.write("<p>Sequence type: ")
272 if len(sequence_typing) < 2:
273 report.write("Sequence typing failed")
274 elif sequence_typing[1][1] == "failed":
275 report.write("Sequence typing failed")
276 else:
277 report.write("ST%s" % sequence_typing[1][8])
278 report.write("</p>\n")
279 if args.virulotyping:
280 os.system("sort " + args.virulotyper + " | awk '/eae_|stx1._|stx2._|ehxa_/ && $2>50 && !seen[substr($1, 1, index($1, \"_\")-2)]++ { printf(\"%s%s\",sep,substr($1, 1, index($1, \"_\")-1));sep=\", \" }END{print \"\"}' > virulotyper_rep")
281 for line in fileinput.input("virulotyper_rep", inplace=True):
282 print line.replace("1a", "1"),
283 for line in fileinput.input("virulotyper_rep", inplace=True):
284 print line.replace("2a", "2"),
285 for line in fileinput.input("virulotyper_rep", inplace=True):
286 print line.replace("1b", "1"),
287 for line in fileinput.input("virulotyper_rep", inplace=True):
288 print line.replace("2b", "2"),
289 report.write("<p>Virulotypes: ")
290 insertFile("virulotyper_rep", report)
291 report.write("</p>\n")
292 if args.shigatoxintyping:
293 report.write("<p>Stx Subtypes: ")
294 if len(shigatoxin_typing) == 0:
295 report.write("No subtype match found")
296 else:
297 shigatoxin_subtype = ""
298 shigatoxin_types = openFileAsTable(TOOL_DIR + "/data/stx_subtypes")
299 for subtype in shigatoxin_typing:
300 blast_pident_100 = float(subtype[1]) == 100
301 if (blast_pident_100):
302 for item in shigatoxin_types:
303 if item[0] == subtype[0]:
304 shigatoxin_subtype = item[1] + " " + shigatoxin_subtype
305 if len(shigatoxin_subtype) == 0:
306 shigatoxin_subtype = "No complete subtype match found"
307 report.write("%s" % shigatoxin_subtype)
308 report.write("</p>\n")
309 # Quality Check
310 disclaimer = False
311 sequence_qc_cov = 0
312 for x in range(7):
313 if float(sequence_qc[x+1][2]) < 1:
314 disclaimer = True
315 sequence_qc_cov = sequence_qc_cov + float(sequence_qc[x+1][3])
316 if sequence_qc_cov < 210:
317 disclaimer = True
318 if disclaimer:
319 report.write("<p style='font-weight:bold;color:red'>Disclaimer: The data analysed do not fulfill minimum quality parameters, please consider repeating the sequencing.</p>\n")
320 report.write("<hr/><h3>Raw data quality check</h3>\n")
321 if args.input2:
322 report.write("<p>FASTQC result forward: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html1_id))
323 report.write("<p>FASTQC result reverse: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html2_id))
324 else:
325 report.write("<p>FASTQC result: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html1_id))
326 if args.serotyping:
327 report.write("<br/><hr/><h3>Serotyping</h3>\n")
328 insertFileAsTable("blastn_OH_fc", report, True)
329 report.write("<br/><hr/><h3>Multi Locus Sequence Typing</h3>\n")
330 if len(sequence_typing) > 1:
331 insertTable(sequence_typing, report, True)
332 if args.virulotyping:
333 report.write("<br/><hr/><h3>Virulotyping</h3>\n")
334 report.write("<p>This table is filtered for results with >90%% gene coverage, unfiltered results can be found <a href='%s/datasets/%s/display/?preview=True'>here</a></p>\n" % (BASE_URL, args.virulotyper_id))
335 insertFileAsTable("pathotyper_rep_tab", report, True, "table table-cross")
336 if args.shigatoxintyping:
337 report.write("<br/><hr/><h3>Shiga toxin typing</h3>\n")
338 insertFileAsTable("blastn_shigatoxin_fct", report, True)
339 # write tail html
340 insertFile(TOOL_DIR + "/report_tail.html", report)
341 finally:
342 report.close()
343
344 if __name__ == "__main__":
345 __main__()
346
347