Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison EURL_VTEC_WGS_PT.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
comparison
equal
deleted
inserted
replaced
2:6837f733b4aa | 3:0cbed1c0a762 |
---|---|
1 #!/usr/bin/env python3 | |
1 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
2 """ | 3 """ |
3 ############################################################################ | 4 ############################################################################ |
4 # Istituto Superiore di Sanita' | 5 # Istituto Superiore di Sanita' |
5 # European Union Reference Laboratory (EU-RL) for Escherichia coli, including Verotoxigenic E. coli (VTEC) | 6 # European Union Reference Laboratory (EU-RL) for Escherichia coli, including Verotoxigenic E. coli (VTEC) |
42 def __main__(): | 43 def __main__(): |
43 parser = argparse.ArgumentParser() | 44 parser = argparse.ArgumentParser() |
44 parser.add_argument('--serotyping', dest='serotyping', help='perform serotyping', action='store_true') | 45 parser.add_argument('--serotyping', dest='serotyping', help='perform serotyping', action='store_true') |
45 parser.add_argument('--virulotyping', dest='virulotyping', help='perform virulotyping', action='store_true') | 46 parser.add_argument('--virulotyping', dest='virulotyping', help='perform virulotyping', action='store_true') |
46 parser.add_argument('--shigatoxintyping', dest='shigatoxintyping', help='perform shigatoxintyping', action='store_true') | 47 parser.add_argument('--shigatoxintyping', dest='shigatoxintyping', help='perform shigatoxintyping', action='store_true') |
48 parser.add_argument('--amrtyping', dest='amrtyping', help='perform amrtyping', action='store_true') | |
47 parser.add_argument('-1', '--input1', dest='input1', help='forward or single-end reads file in Sanger FASTQ format') | 49 parser.add_argument('-1', '--input1', dest='input1', help='forward or single-end reads file in Sanger FASTQ format') |
48 parser.add_argument('--input1_ext', dest='input1_ext', help='extension of forward or single-end reads file in Sanger FASTQ format') | 50 parser.add_argument('--input1_ext', dest='input1_ext', help='extension of forward or single-end reads file in Sanger FASTQ format') |
49 parser.add_argument('--input1_name', dest='input1_name', help='name of forward or single-end reads file in Sanger FASTQ format') | 51 parser.add_argument('--input1_name', dest='input1_name', help='name of forward or single-end reads file in Sanger FASTQ format') |
50 parser.add_argument('-2', '--input2', dest='input2', help='reverse reads file in Sanger FASTQ format') | 52 parser.add_argument('-2', '--input2', dest='input2', help='reverse reads file in Sanger FASTQ format') |
51 parser.add_argument('--input2_ext', dest='input2_ext', help='extension of reverse reads file in Sanger FASTQ format') | 53 parser.add_argument('--input2_ext', dest='input2_ext', help='extension of reverse reads file in Sanger FASTQ format') |
56 parser.add_argument('--text1', dest='text1', help='text FASTQC file') | 58 parser.add_argument('--text1', dest='text1', help='text FASTQC file') |
57 parser.add_argument('--html2', dest='html2', help='html FASTQC file') | 59 parser.add_argument('--html2', dest='html2', help='html FASTQC file') |
58 parser.add_argument('--html2_id', dest='html2_id', help='html FASTQC file id') | 60 parser.add_argument('--html2_id', dest='html2_id', help='html FASTQC file id') |
59 parser.add_argument('--html2_path', dest='html2_path', help='html FASTQC file path') | 61 parser.add_argument('--html2_path', dest='html2_path', help='html FASTQC file path') |
60 parser.add_argument('--text2', dest='text2', help='text FASTQC file') | 62 parser.add_argument('--text2', dest='text2', help='text FASTQC file') |
61 parser.add_argument('--trimmed1', dest='trimmed1', help='trimmed forward FASTQ file') | |
62 parser.add_argument('--trimmed2', dest='trimmed2', help='trimmed reverse FASTQ file') | |
63 parser.add_argument('--trimmedunpaired', dest='trimmedunpaired', help='trimmed unpaired FASTQ file') | |
64 parser.add_argument('--log', dest='logfile', help='log file') | 63 parser.add_argument('--log', dest='logfile', help='log file') |
65 parser.add_argument('--virulotyper', dest='virulotyper', help='Virulotyping Mapping reads') | 64 parser.add_argument('--virulotyper', dest='virulotyper', help='Virulotyping Mapping reads') |
66 parser.add_argument('--virulotyper_id', dest='virulotyper_id', help='Virulotyping Mapping reads id') | 65 parser.add_argument('--virulotyper_id', dest='virulotyper_id', help='Virulotyping Mapping reads id') |
67 parser.add_argument('--blastn_STX', dest='blastn_STX', help='Blastn for Shiga toxin') | 66 parser.add_argument('--stx', dest='stx', help='Shiga toxin') |
68 parser.add_argument('--mlstsevenloci', dest='mlstsevenloci', help='Multi Locus Alleles table') | 67 parser.add_argument('--mlstsevenloci', dest='mlstsevenloci', help='Multi Locus Alleles table') |
69 parser.add_argument('--spades_log', dest='spades_log', help='SPAdes log') | 68 parser.add_argument('--amr', dest='amr', help='SPAdes log') |
70 parser.add_argument('--blastn_O', dest='blastn_O', help='Blastn for O') | 69 parser.add_argument('--amr_id', dest='amr_id', help='AMR file id') |
71 parser.add_argument('--blastn_H', dest='blastn_H', help='Blastn for H') | 70 parser.add_argument('--antigen_O', dest='antigen_O', help='Antigen for O') |
71 parser.add_argument('--antigen_H', dest='antigen_H', help='Antigen for H') | |
72 parser.add_argument('--output', dest='output', help='output report html file') | 72 parser.add_argument('--output', dest='output', help='output report html file') |
73 args = parser.parse_args() | 73 args = parser.parse_args() |
74 | 74 |
75 os.system("printf 'EURL VTEC WGS PT v2.3\n\nTool versions\n=============\n' > " + args.logfile) | 75 log = open(args.logfile, 'w') |
76 log.write("EURL VTEC WGS PT v3.0\n\nTool versions\n=============\n") | |
76 if args.input2: | 77 if args.input2: |
77 # FASTQC | 78 # FASTQC |
78 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc") | 79 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc") |
79 os.system("rm -r " + args.html1_path) | 80 os.system("rm -r " + args.html1_path) |
80 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input2 + " -d " + args.html2_path + " -o " + args.html2 + " -t " + args.text2 + " -f " + args.input2_ext + " -j " + args.input2_name + " -e " + "fastqc") | 81 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input2 + " -d " + args.html2_path + " -o " + args.html2 + " -t " + args.text2 + " -f " + args.input2_ext + " -j " + args.input2_name + " -e " + "fastqc") |
81 os.system("rm -r " + args.html2_path) | 82 os.system("rm -r " + args.html2_path) |
82 os.system("fastqc -v >> " + args.logfile) | 83 log.write(os.popen("fastqc -v").read()) |
83 # TRIMMING | 84 # TRIMMING |
84 os.system("python " + TOOL_DIR + "/scripts/fastq_positional_quality_trimming.py -1 " + args.input1 + " --maxlt 300 --lt 17 --rt 0 --minqt 25 --avgqt 27.0 --minlf -1 --trimmed1 " + args.trimmed1 + " --log trimming_logfile -2 " + args.input2 + " --trimmed2 " + args.trimmed2 + " --trimmedunpaired " + args.trimmedunpaired) | 85 os.system("python " + TOOL_DIR + "/scripts/fastq_positional_quality_trimming.py -1 " + args.input1 + " --maxlt 300 --lt 17 --rt 0 --minqt 25 --avgqt 27.0 --minlf -1 --trimmed1 input_t1.fq --log trimming_logfile -2 " + args.input2 + " --trimmed2 input_t2.fq --trimmedunpaired trimmedunpaired") |
85 os.system("ln -s " + args.trimmed1 + " input_1.fq") | 86 log.write("\nfastq_positional_quality_trimming v1.0\n") |
86 os.system("ln -s " + args.trimmed2 + " input_2.fq") | 87 log.write("parameters: maxlt=300, lt=17, rt=0, minqt=25, avgqt=27.0, minlf=-1\n") |
87 os.system("printf '\nfastq_positional_quality_trimming v1.0\n' >> " + args.logfile) | 88 if args.shigatoxintyping or args.serotyping: |
88 os.system("printf 'parameters: maxlt=300, lt=17, rt=0, minqt=25, avgqt=27.0, minlf=-1\n' >> " + args.logfile) | 89 # ASSEMBLY |
90 os.system("perl " + TOOL_DIR + "/scripts/spades.pl contigs.fa spades_contig_stats spades_scaffolds spades_scaffold_stats spades_log NODE spades.py --disable-gzip-output --isolate -t \${GALAXY_SLOTS:-16} --pe1-ff --pe1-1 fastq:input_t1.fq --pe1-2 fastq:input_t2.fq") | |
91 log.write(os.popen("spades.py -v").read()) | |
92 log.write("parameters: --isolate, pe1-ff, pe1-1, pe1-2\n\n") | |
89 if args.virulotyping: | 93 if args.virulotyping: |
90 # VIRULOTYPER | 94 # VIRULOTYPER |
91 os.system("perl " + TOOL_DIR + "/scripts/patho_typing_pt.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " " + args.input2 + " -o output_dir -j 1 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'") | 95 os.system("perl " + TOOL_DIR + "/scripts/patho_typing.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " " + args.input2 + " -o output_dir -j 1 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'") |
92 os.system("cat pathotyper_rep_tot_tab > " + args.virulotyper) | 96 os.system("cat pathotyper_rep_tot_tab > " + args.virulotyper) |
93 os.system("printf '\n\nViruloTyper\n===========\npatho_typing v0.3\n' >> " + args.logfile) | 97 log.write("\n\nViruloTyper\n===========\npatho_typing v1.0\n") |
94 os.system("printf 'parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n' >> " + args.logfile) | 98 log.write("parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n") |
95 os.system("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt >> " + args.logfile) | 99 log.write(os.popen("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt").read()) |
96 if args.shigatoxintyping: | |
97 # SHIGATOXIN FILTERING | |
98 os.system(TOOL_DIR + "/scripts/duk -m filtered1STX.fq -k 23 " + TOOL_DIR + "/data/stx.fa input_1.fq") | |
99 os.system(TOOL_DIR + "/scripts/duk -m filtered2STX.fq -k 23 " + TOOL_DIR + "/data/stx.fa input_2.fq") | |
100 os.system(TOOL_DIR + "/scripts/fastq_pair filtered1STX.fq filtered2STX.fq") | |
101 os.system(TOOL_DIR + "/scripts/fastq_pair filtered1STX.fq.single.fq input_2.fq") | |
102 os.system(TOOL_DIR + "/scripts/fastq_pair filtered2STX.fq.single.fq input_1.fq") | |
103 os.system("cat filtered1STX.fq.paired.fq > filtered1STX_paired.fq") | |
104 os.system("cat filtered1STX.fq.single.fq.paired.fq >> filtered1STX_paired.fq") | |
105 os.system("cat input_1.fq.paired.fq >> filtered1STX_paired.fq") | |
106 os.system("cat filtered2STX.fq.paired.fq > filtered2STX_paired.fq") | |
107 os.system("cat input_2.fq.paired.fq >> filtered2STX_paired.fq") | |
108 os.system("cat filtered2STX.fq.single.fq.paired.fq >> filtered2STX_paired.fq") | |
109 os.system("printf '\n\nShigatoxinTyper\n===============\nduk v20110303\n' >> " + args.logfile) | |
110 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile) | |
111 # SHIGATOXIN ASSEMBLY: SPADES | |
112 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_stx spades_contig_stats_stx spades_scaffolds_stx spades_scaffold_stats_stx spades_log_stx NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --pe1-ff --pe1-1 fastq:filtered1STX_paired.fq --pe1-2 fastq:filtered2STX_paired.fq") | |
113 os.system("spades.py -v >> " + args.logfile) | |
114 os.system("printf 'parameters: careful, k=21,33,55, pe1-ff, pe1-1, pe1-2\n\n' >> " + args.logfile) | |
115 # SHIGATOXIN NCBI BLAST+ blastn | |
116 os.system("blastn -query 'spades_contigs_stx' -db '" + TOOL_DIR + "/data/stx' -task blastn -evalue 0.001 -out '" + args.blastn_STX + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0") | |
117 os.system("blastn -version >> " + args.logfile) | |
118 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile) | |
119 os.system("cat " + TOOL_DIR + "/data/ShigatoxinTyping_db.txt >> " + args.logfile) | |
120 # SHIGATOXINTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT | |
121 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_shigatoxin_fct") | |
122 os.system("awk -F '\t' '($3>99 && $4>1200) { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_STX + " > blastn_shigatoxin_fc") | |
123 shigatoxin_typing = openFileAsTable("blastn_shigatoxin_fc") | |
124 os.system("cat blastn_shigatoxin_fc >> blastn_shigatoxin_fct") | |
125 if os.stat('blastn_shigatoxin_fc').st_size == 0: | |
126 os.system("echo '-\t-\t-\t-' >> blastn_shigatoxin_fct") | |
127 # SEQUENCETYPER | |
128 os.system("mentalist call --output_votes -o 'mentalist_out' --db '/afs/galaxy/tool-data/mentalist_databases/escherichia_coli_pubmlst_k31_2018-10-09/escherichia_coli_pubmlst_k31_m023_2018-10-09.jld' -1 input_1.fq -2 input_2.fq") | |
129 os.system("mv mentalist_out.byvote " + args.mlstsevenloci) | |
130 sequence_typing = openFileAsTable(args.mlstsevenloci) | |
131 sequence_qc = openFileAsTable("mentalist_out.coverage.txt") | |
132 os.system("printf '\n\nSequenceTyper\n=============\n' >> " + args.logfile) | |
133 os.system("mentalist -v | grep MentaLiST >> " + args.logfile) | |
134 os.system("printf '\n' >> " + args.logfile) | |
135 os.system("cat " + TOOL_DIR + "/data/SequenceTyping_db.txt >> " + args.logfile) | |
136 if args.serotyping: | |
137 # SEROTYPE FILTERING | |
138 os.system(TOOL_DIR + "/scripts/duk -m filteredO1.fq -k 23 " + TOOL_DIR + "/data/O_type.fsa input_1.fq") | |
139 os.system(TOOL_DIR + "/scripts/duk -m filteredH1.fq -k 23 " + TOOL_DIR + "/data/H_type.fsa input_1.fq") | |
140 os.system("cat filteredO1.fq > filteredOH1.fq") | |
141 os.system("cat filteredH1.fq >> filteredOH1.fq") | |
142 os.system(TOOL_DIR + "/scripts/duk -m filteredO2.fq -k 23 " + TOOL_DIR + "/data/O_type.fsa input_2.fq") | |
143 os.system(TOOL_DIR + "/scripts/duk -m filteredH2.fq -k 23 " + TOOL_DIR + "/data/H_type.fsa input_2.fq") | |
144 os.system("cat filteredO2.fq > filteredOH2.fq") | |
145 os.system("cat filteredH2.fq >> filteredOH2.fq") | |
146 os.system(TOOL_DIR + "/scripts/fastq_pair filteredOH1.fq filteredOH2.fq") | |
147 os.system(TOOL_DIR + "/scripts/fastq_pair filteredOH1.fq.single.fq input_2.fq") | |
148 os.system(TOOL_DIR + "/scripts/fastq_pair filteredOH2.fq.single.fq input_1.fq") | |
149 os.system("cat filteredOH1.fq.paired.fq > filteredOH1_paired.fq") | |
150 os.system("cat filteredOH1.fq.single.fq.paired.fq >> filteredOH1_paired.fq") | |
151 os.system("cat input_1.fq.paired.fq >> filteredOH1_paired.fq") | |
152 os.system("cat filteredOH2.fq.paired.fq > filteredOH2_paired.fq") | |
153 os.system("cat input_2.fq.paired.fq >> filteredOH2_paired.fq") | |
154 os.system("cat filteredOH2.fq.single.fq.paired.fq >> filteredOH2_paired.fq") | |
155 os.system("printf '\n\nSeroTyper\n=========\nduk v20110303\n' >> " + args.logfile) | |
156 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile) | |
157 # SEROTYPE ASSEMBLY: SPADES | |
158 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_oh spades_contig_stats_oh spades_scaffolds_oh spades_scaffold_stats_oh " + args.spades_log + " NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --pe1-ff --pe1-1 fastq:filteredOH1_paired.fq --pe1-2 fastq:filteredOH2_paired.fq") | |
159 os.system("spades.py -v >> " + args.logfile) | |
160 os.system("printf 'parameters: careful, k=21,33,55, pe1-ff, pe1-1, pe1-2\n\n' >> " + args.logfile) | |
161 # SEROTYPE NCBI BLAST+ blastn | |
162 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/O_type' -task blastn -evalue 0.001 -out '" + args.blastn_O + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0") | |
163 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/H_type' -task blastn -evalue 0.001 -out '" + args.blastn_H + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0 -ungapped") | |
164 os.system("blastn -version >> " + args.logfile) | |
165 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile) | |
166 os.system("cat " + TOOL_DIR + "/data/SeroTyping_db.txt >> " + args.logfile) | |
167 # SEROTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT | |
168 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_OH_fc") | |
169 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_O + " | sort -nrk 2 -nrk 3 > blastn_O_fc") | |
170 sero_typing_o = openFileAsTable("blastn_O_fc") | |
171 os.system("cat blastn_O_fc >> blastn_OH_fc") | |
172 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_H + " | sort -nrk 2 -nrk 3 > blastn_H_fc") | |
173 sero_typing_h = openFileAsTable("blastn_H_fc") | |
174 os.system("cat blastn_H_fc >> blastn_OH_fc") | |
175 if os.stat('blastn_O_fc').st_size == 0 and os.stat('blastn_H_fc').st_size == 0: | |
176 os.system("echo '-\t-\t-\t-' >> blastn_OH_fc") | |
177 else: | 100 else: |
178 # FASTQC | 101 # FASTQC |
179 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc") | 102 os.system("python " + TOOL_DIR + "/scripts/rgFastQC.py -i " + args.input1 + " -d " + args.html1_path + " -o " + args.html1 + " -t " + args.text1 + " -f " + args.input1_ext + " -j " + args.input1_name + " -e " + "fastqc") |
180 os.system("rm -r " + args.html1_path) | 103 os.system("rm -r " + args.html1_path) |
181 os.system("fastqc -v >> " + args.logfile) | 104 log.write(os.popen("fastqc -v").read()) |
182 # TRIMMING | 105 # TRIMMING |
183 os.system("python " + TOOL_DIR + "/scripts/fastq_positional_quality_trimming.py -1 " + args.input1 + " --maxlt 360 --lt 10 --rt 0 --minqt 25 --avgqt 27.0 --minlf 50 --trimmed1 " + args.trimmed1 + " --log trimming_logfile") | 106 os.system("python " + TOOL_DIR + "/scripts/fastq_positional_quality_trimming.py -1 " + args.input1 + " --maxlt 360 --lt 10 --rt 0 --minqt 25 --avgqt 27.0 --minlf 50 --trimmed1 input_t1.fq --log trimming_logfile") |
184 os.system("ln -s " + args.trimmed1 + " input_1.fq") | 107 log.write("\nfastq_positional_quality_trimming v1.0\n") |
185 os.system("echo 'fastq_positional_quality_trimming v1.0' >> " + args.logfile) | 108 log.write("parameters: maxlt=360, lt=10, rt=0, minqt=25, avgqt=27.0, minlf=50\n") |
186 os.system("printf 'parameters: maxlt=360, lt=10, rt=0, minqt=25, avgqt=27.0, minlf=50\n' >> " + args.logfile) | 109 if args.shigatoxintyping or args.serotyping: |
110 # ASSEMBLY | |
111 os.system("perl " + TOOL_DIR + "/scripts/spades.pl contigs.fa spades_contig_stats spades_scaffolds spades_scaffold_stats spades_log NODE spades.py --disable-gzip-output --isolate -t ${GALAXY_SLOTS:-16} --iontorrent -s fastq:input_t1.fq") | |
112 log.write(os.popen("spades.py -v").read()) | |
113 log.write("parameters: --isolate, --iontorrent\n\n") | |
187 if args.virulotyping: | 114 if args.virulotyping: |
188 # VIRULOTYPER | 115 # VIRULOTYPER |
189 os.system("perl " + TOOL_DIR + "/scripts/patho_typing_pt.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " -o output_dir -j 1 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'") | 116 os.system("perl " + TOOL_DIR + "/scripts/patho_typing.pl 'python " + TOOL_DIR + "/scripts/patho_typing.py -s Escherichia coli -f " + args.input1 + " -o output_dir -j 1 --minGeneCoverage 90 --minGeneIdentity 90 --minGeneDepth 15'") |
190 os.system("cat pathotyper_rep_tot_tab > " + args.virulotyper) | 117 os.system("cat pathotyper_rep_tot_tab > " + args.virulotyper) |
191 os.system("printf '\n\nViruloTyper\n===========\npatho_typing v0.3\n' >> " + args.logfile) | 118 log.write("\n\nViruloTyper\n===========\npatho_typing v1.0\n") |
192 os.system("printf 'parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n' >> " + args.logfile) | 119 log.write("parameters: minGeneCoverage=90, minGeneIdentity=90, minGeneDepth=15\n\n") |
193 os.system("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt >> " + args.logfile) | 120 log.write(os.popen("cat " + TOOL_DIR + "/data/ViruloTyping_db.txt").read()) |
194 if args.shigatoxintyping: | 121 # SEQUENCETYPER |
195 # SHIGATOXIN FILTERING | 122 if args.input2: |
196 os.system(TOOL_DIR + "/scripts/duk -m filtered1STX.fq -k 23 " + TOOL_DIR + "/data/stx.fa input_1.fq") | 123 os.system("mentalist call --output_votes -o 'mentalist_out' --db '" + TOOL_DIR + "/data/escherichia_coli_pubmlst_k31_2018-10-09/escherichia_coli_pubmlst_k31_m023_2018-10-09.jld' -1 input_t1.fq -2 input_t2.fq") |
197 os.system("printf '\n\nShigatoxinTyper\n===============\nduk v20110303\n' >> " + args.logfile) | 124 else: |
198 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile) | 125 os.system("mentalist call --output_votes -o 'mentalist_out' --db '" + TOOL_DIR + "/data/escherichia_coli_pubmlst_k31_2018-10-09/escherichia_coli_pubmlst_k31_m023_2018-10-09.jld' -1 input_t1.fq") |
199 # SHIGATOXIN ASSEMBLY: SPADES | 126 os.system("mv mentalist_out.byvote " + args.mlstsevenloci) |
200 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_stx spades_contig_stats_stx spades_scaffolds_stx spades_scaffold_stats_stx spades_log_stx NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --iontorrent --pe1-ff --pe1-s fastq:filtered1STX.fq") | 127 sequence_typing = openFileAsTable(args.mlstsevenloci) |
201 os.system("spades.py -v >> " + args.logfile) | 128 sequence_qc = openFileAsTable("mentalist_out.coverage.txt") |
202 os.system("printf 'parameters: careful, k=21,33,55, iontorrent, pe1-ff, pe1-s\n\n' >> " + args.logfile) | 129 log.write("\n\nSequenceTyper\n=============\n") |
203 # SHIGATOXIN NCBI BLAST+ blastn | 130 log.write(os.popen("mentalist -v | grep MentaLiST").read()) |
204 os.system("blastn -query 'spades_contigs_stx' -db '" + TOOL_DIR + "/data/stx' -task blastn -evalue 0.001 -out '" + args.blastn_STX + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0") | 131 log.write("\n") |
205 os.system("blastn -version >> " + args.logfile) | 132 log.write(os.popen("cat " + TOOL_DIR + "/data/SequenceTyping_db.txt").read()) |
206 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile) | 133 if args.shigatoxintyping: |
207 os.system("cat " + TOOL_DIR + "/data/ShigatoxinTyping_db.txt >> " + args.logfile) | 134 # SHIGATOXIN TYPER |
208 # SHIGATOXINTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT | 135 os.system("echo 'sseqid\tpident\tlength\tpositive' > mmseqs_shigatoxin_fct") |
209 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_shigatoxin_fct") | 136 os.system("mmseqs easy-search --search-type 3 --format-output query,target,pident,alnlen,tlen contigs.fa " + TOOL_DIR + "/data/stxDB mmseqs_STX /tmp") |
210 os.system("awk -F '\t' '($3>99 && $4>1200) { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_STX + " > blastn_shigatoxin_fc") | 137 os.system(TOOL_DIR + "/scripts/extract_shigatoxin.sh") |
211 shigatoxin_typing = openFileAsTable("blastn_shigatoxin_fc") | 138 os.system("cat mmseqs_shigatoxin_fc >> mmseqs_shigatoxin_fct") |
212 os.system("cat blastn_shigatoxin_fc >> blastn_shigatoxin_fct") | 139 shigatoxin_typing = openFileAsTable("mmseqs_shigatoxin_fc") |
213 if os.stat('blastn_shigatoxin_fc').st_size == 0: | 140 log.write("\n\nShigatoxinTyper\n=============\n") |
214 os.system("echo '-\t-\t-\t-' >> blastn_shigatoxin_fct") | 141 log.write(os.popen("cat " + TOOL_DIR + "/data/ShigatoxinTyping_db.txt").read()) |
215 # SEQUENCETYPER | 142 if args.serotyping: |
216 os.system("mentalist call --output_votes -o 'mentalist_out' --db '/afs/galaxy/tool-data/mentalist_databases/escherichia_coli_pubmlst_k31_2018-10-09/escherichia_coli_pubmlst_k31_m023_2018-10-09.jld' -1 input_1.fq") | 143 # SEROTYPER |
217 os.system("mv mentalist_out.byvote " + args.mlstsevenloci) | 144 os.system("echo 'sseqid\tpident\tlength\tpositive' > mmseqs_OH_fc") |
218 sequence_typing = openFileAsTable(args.mlstsevenloci) | 145 # SEROTYPER O |
219 sequence_qc = openFileAsTable("mentalist_out.coverage.txt") | 146 os.system("mmseqs easy-search --search-type 3 --format-output target,pident,alnlen,tlen contigs.fa " + TOOL_DIR + "/data/O_typeDB mmseqs_O /tmp") |
220 os.system("printf '\n\nSequenceTyper\n=============\n' >> " + args.logfile) | 147 os.system("awk -F '\t' '($3>800 && $4>800) { print $1 FS $2 FS $3 FS $4 }' mmseqs_O | sort -nrk 2 -nrk 3 > mmseqs_O_fc") |
221 os.system("mentalist -v | grep MentaLiST >> " + args.logfile) | 148 sero_typing_o = openFileAsTable("mmseqs_O_fc") |
222 os.system("printf '\n' >> " + args.logfile) | 149 os.system("cat mmseqs_O_fc >> mmseqs_OH_fc") |
223 os.system("cat " + TOOL_DIR + "/data/SequenceTyping_db.txt >> " + args.logfile) | 150 # SEROTYPER H |
224 if args.serotyping: | 151 os.system("mmseqs easy-search --search-type 3 --format-output target,pident,alnlen,tlen contigs.fa " + TOOL_DIR + "/data/H_typeDB mmseqs_H /tmp") |
225 # SEROTYPE FILTERING | 152 os.system("awk -F '\t' '($3>800 && $4>800) { print $1 FS $2 FS $3 FS $4 }' mmseqs_H | sort -nrk 2 -nrk 3 > mmseqs_H_fc") |
226 os.system(TOOL_DIR + "/scripts/duk -m filteredO1.fq -k 23 " + TOOL_DIR + "/data/O_type.fsa input_1.fq") | 153 sero_typing_h = openFileAsTable("mmseqs_H_fc") |
227 os.system(TOOL_DIR + "/scripts/duk -m filteredH1.fq -k 23 " + TOOL_DIR + "/data/H_type.fsa input_1.fq") | 154 os.system("cat mmseqs_H_fc >> mmseqs_OH_fc") |
228 os.system("cat filteredO1.fq > filteredOH1.fq") | 155 if os.stat('mmseqs_O_fc').st_size == 0 and os.stat('mmseqs_H_fc').st_size == 0: |
229 os.system("cat filteredH1.fq >> filteredOH1.fq") | 156 os.system("echo '-\t-\t-\t-' >> mmseqs_OH_fc") |
230 os.system("printf '\n\nSeroTyper\n=========\nduk v20110303\n' >> " + args.logfile) | 157 log.write("\n\nSeroTyper\n=============\n") |
231 os.system("printf 'parameters: k=23\n\n' >> " + args.logfile) | 158 log.write(os.popen("cat " + TOOL_DIR + "/data/SeroTyping_db.txt").read()) |
232 # SEROTYPE ASSEMBLY: SPADES | 159 if args.amrtyping: |
233 os.system("perl " + TOOL_DIR + "/scripts/spades.pl spades_contigs_oh spades_contig_stats_oh spades_scaffolds_oh spades_scaffold_stats_oh " + args.spades_log + " NODE spades.py --disable-gzip-output --careful -t \${GALAXY_SLOTS:-16} -k '21,33,55' --iontorrent --pe1-ff --pe1-s fastq:filteredOH1.fq") | 160 # AMRGENES |
234 os.system("spades.py -v >> " + args.logfile) | 161 os.system("amrfinder --threads 4 --database " + TOOL_DIR + "/data/amrfinder -n contigs.fa -O Escherichia -o " + args.amr) |
235 os.system("printf 'parameters: careful, k=21,33,55, iontorrent, pe1-ff, pe1-s\n\n' >> " + args.logfile) | 162 # REPORT |
236 # SEROTYPE NCBI BLAST+ blastn | |
237 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/O_type' -task blastn -evalue 0.001 -out '" + args.blastn_O + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0") | |
238 os.system("blastn -query 'spades_contigs_oh' -db '" + TOOL_DIR + "/data/H_type' -task blastn -evalue 0.001 -out '" + args.blastn_H + "' -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads ${GALAXY_SLOTS:-8} -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0 -ungapped") | |
239 os.system("blastn -version >> " + args.logfile) | |
240 os.system("printf 'parameters: evalue=0.001, strand=both, dust=yes, max_target_seqs=10, perc_identity=95.0\n\n' >> " + args.logfile) | |
241 os.system("cat " + TOOL_DIR + "/data/SeroTyping_db.txt >> " + args.logfile) | |
242 # SEROTYPER: FILTER, CUT AND CONCATENATE BLASTN OUTPUT | |
243 os.system("echo 'sseqid\tpident\tlength\tpositive' > blastn_OH_fc") | |
244 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_O + " | sort -nrk 2 -nrk 3 > blastn_O_fc") | |
245 sero_typing_o = openFileAsTable("blastn_O_fc") | |
246 os.system("cat blastn_O_fc >> blastn_OH_fc") | |
247 os.system("awk -F '\t' '$4>800 { print $2 FS $3 FS $4 FS $16 }' " + args.blastn_H + " | sort -nrk 2 -nrk 3 > blastn_H_fc") | |
248 sero_typing_h = openFileAsTable("blastn_H_fc") | |
249 os.system("cat blastn_H_fc >> blastn_OH_fc") | |
250 if os.stat('blastn_O_fc').st_size == 0 and os.stat('blastn_H_fc').st_size == 0: | |
251 os.system("echo '-\t-\t-\t-' >> blastn_OH_fc") | |
252 try: | 163 try: |
253 report = open(args.output, 'w') | 164 report = open(args.output, 'w') |
254 # write head html | 165 # write head html |
255 insertFile(TOOL_DIR + "/report_head.html", report) | 166 insertFile(TOOL_DIR + "/report_head.html", report) |
256 report.write("<td><h1>EURL VTEC WGS PT</h1><h2>Report for %s</h2>%s</td>" % (args.input1_name, datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"))) | 167 report.write("<td><h1>EURL VTEC WGS PT</h1><h2>Report for %s</h2>%s</td>" % (args.input1_name, datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC"))) |
277 report.write("ST%s" % sequence_typing[1][8]) | 188 report.write("ST%s" % sequence_typing[1][8]) |
278 report.write("</p>\n") | 189 report.write("</p>\n") |
279 if args.virulotyping: | 190 if args.virulotyping: |
280 os.system("sort " + args.virulotyper + " | awk '/eae_|stx1._|stx2._|ehxa_/ && $2>50 && !seen[substr($1, 1, index($1, \"_\")-2)]++ { printf(\"%s%s\",sep,substr($1, 1, index($1, \"_\")-1));sep=\", \" }END{print \"\"}' > virulotyper_rep") | 191 os.system("sort " + args.virulotyper + " | awk '/eae_|stx1._|stx2._|ehxa_/ && $2>50 && !seen[substr($1, 1, index($1, \"_\")-2)]++ { printf(\"%s%s\",sep,substr($1, 1, index($1, \"_\")-1));sep=\", \" }END{print \"\"}' > virulotyper_rep") |
281 for line in fileinput.input("virulotyper_rep", inplace=True): | 192 for line in fileinput.input("virulotyper_rep", inplace=True): |
282 print line.replace("1a", "1"), | 193 print(line.replace("1a", "1"),) |
283 for line in fileinput.input("virulotyper_rep", inplace=True): | 194 for line in fileinput.input("virulotyper_rep", inplace=True): |
284 print line.replace("2a", "2"), | 195 print(line.replace("2a", "2"),) |
285 for line in fileinput.input("virulotyper_rep", inplace=True): | 196 for line in fileinput.input("virulotyper_rep", inplace=True): |
286 print line.replace("1b", "1"), | 197 print(line.replace("1b", "1"),) |
287 for line in fileinput.input("virulotyper_rep", inplace=True): | 198 for line in fileinput.input("virulotyper_rep", inplace=True): |
288 print line.replace("2b", "2"), | 199 print(line.replace("2b", "2"),) |
289 report.write("<p>Virulotypes: ") | 200 report.write("<p>Virulotypes: ") |
290 insertFile("virulotyper_rep", report) | 201 insertFile("virulotyper_rep", report) |
291 report.write("</p>\n") | 202 report.write("</p>\n") |
292 if args.shigatoxintyping: | 203 if args.shigatoxintyping: |
293 report.write("<p>Stx Subtypes: ") | 204 report.write("<p>Stx Subtypes: ") |
323 report.write("<p>FASTQC result reverse: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html2_id)) | 234 report.write("<p>FASTQC result reverse: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html2_id)) |
324 else: | 235 else: |
325 report.write("<p>FASTQC result: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html1_id)) | 236 report.write("<p>FASTQC result: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.html1_id)) |
326 if args.serotyping: | 237 if args.serotyping: |
327 report.write("<br/><hr/><h3>Serotyping</h3>\n") | 238 report.write("<br/><hr/><h3>Serotyping</h3>\n") |
328 insertFileAsTable("blastn_OH_fc", report, True) | 239 insertFileAsTable("mmseqs_OH_fc", report, True) |
329 report.write("<br/><hr/><h3>Multi Locus Sequence Typing</h3>\n") | 240 report.write("<br/><hr/><h3>Multi Locus Sequence Typing</h3>\n") |
330 if len(sequence_typing) > 1: | 241 if len(sequence_typing) > 1: |
331 insertTable(sequence_typing, report, True) | 242 insertTable(sequence_typing, report, True) |
332 if args.virulotyping: | 243 if args.virulotyping: |
333 report.write("<br/><hr/><h3>Virulotyping</h3>\n") | 244 report.write("<br/><hr/><h3>Virulotyping</h3>\n") |
334 report.write("<p>This table is filtered for results with >90%% gene coverage, unfiltered results can be found <a href='%s/datasets/%s/display/?preview=True'>here</a></p>\n" % (BASE_URL, args.virulotyper_id)) | 245 report.write("<p>This table is filtered for results with >90%% gene coverage, unfiltered results can be found <a href='%s/datasets/%s/display/?preview=True'>here</a></p>\n" % (BASE_URL, args.virulotyper_id)) |
335 insertFileAsTable("pathotyper_rep_tab", report, True, "table table-cross") | 246 insertFileAsTable("pathotyper_rep_tab", report, True, "table table-cross") |
336 if args.shigatoxintyping: | 247 if args.shigatoxintyping: |
337 report.write("<br/><hr/><h3>Shiga toxin typing</h3>\n") | 248 report.write("<br/><hr/><h3>Shiga toxin typing</h3>\n") |
338 insertFileAsTable("blastn_shigatoxin_fct", report, True) | 249 insertFileAsTable("mmseqs_shigatoxin_fct", report, True) |
250 if args.amrtyping: | |
251 report.write("<br/><hr/><h3>AMR typing</h3>\n") | |
252 report.write("<p>AMR result: <a href='%s/datasets/%s/display/?preview=True'>Webpage</a></p>\n" % (BASE_URL, args.amr_id)) | |
339 # write tail html | 253 # write tail html |
340 insertFile(TOOL_DIR + "/report_tail.html", report) | 254 insertFile(TOOL_DIR + "/report_tail.html", report) |
341 finally: | 255 finally: |
342 report.close() | 256 report.close() |
343 | 257 |
344 if __name__ == "__main__": | 258 if __name__ == "__main__": |
345 __main__() | 259 __main__() |
346 | |
347 |