2
|
1 <tool id="microrunqc" name="microrunqc" version="1.0.0">
|
|
2
|
|
3 <requirements>
|
11
|
4 <requirement type="package" version="2.4.0">skesa</requirement>
|
|
5 <requirement type="package" version="2.23.0">mlst</requirement>
|
|
6 <requirement type="package" version="0.7.17">bwa</requirement>
|
|
7 <requirement type="package" version="1.0.1">fastq-scan</requirement>
|
2
|
8 </requirements>
|
|
9
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
11
|
|
12 skesa
|
|
13
|
|
14 #set fqscan = "text"
|
|
15 #if $jobtype.select == "fastq_fr"
|
|
16 #set outname = $jobtype.fastq1.name
|
|
17 #set bwalist = str($jobtype.fastq1) + " " + str($jobtype.fastq2)
|
|
18 --fastq $jobtype.fastq1,$jobtype.fastq2
|
|
19 #if $jobtype.fastq1.is_of_type("fastq.gz")
|
|
20 #set fqscan = "gz"
|
|
21 #else if $jobtype.fastq1.is_of_type("fastqsanger.gz")
|
|
22 #set fqscan = "gz"
|
|
23 #end if
|
|
24 #else if $jobtype.select == "fastq_pair"
|
|
25 #set outname = $jobtype.coll.name
|
|
26 #set bwalist = str($jobtype.coll.forward) + " " + str($jobtype.coll.reverse)
|
|
27 --fastq $jobtype.coll.forward,$jobtype.coll.reverse
|
|
28 #if $jobtype.coll.forward.is_of_type("fastq.gz")
|
|
29 #set fqscan = "gz"
|
|
30 #else if $jobtype.coll.forward.is_of_type("fastqsanger.gz")
|
|
31 #set fqscan = "gz"
|
|
32 #end if
|
|
33 #end if
|
|
34
|
|
35 #set num_cores = 1
|
|
36
|
|
37 #if $options.select =="basic"
|
|
38 --cores $num_cores
|
|
39 --memory 8
|
|
40 #else if $options.select=="advanced"
|
|
41 #if $options.cores
|
|
42 #set num_cores = $options.cores
|
|
43 --cores $options.cores
|
|
44 #end if
|
|
45 #if $options.memory
|
|
46 --memory $options.memory
|
|
47 #end if
|
|
48 #if $options.hash_count
|
|
49 --hash_count
|
|
50 #end if
|
|
51 #if $options.estimated_kmers
|
|
52 --estimated_kmers $options.estimated.kmers
|
|
53 #end if
|
|
54 #if $options.skip
|
|
55 --skip_bloom_filter
|
|
56 #end if
|
|
57 #if $options.kmer
|
|
58 --kmer $options.kmer
|
|
59 #end if
|
|
60 #if $options.min_count
|
|
61 --min_count $options.min_count
|
|
62 #end if
|
|
63 #if $options.max_kmer_count
|
|
64 --max_kmer_count $options.max_kmer_count
|
|
65 #end if
|
|
66 #if $options.vector_percent
|
|
67 --vector_percent $options.vector_percent
|
|
68 #end if
|
|
69 #if $options.insert_size
|
|
70 --insert_size $options.insert.size
|
|
71 #end if
|
|
72 #if $options.steps
|
|
73 --steps $options.steps
|
|
74 #end if
|
|
75 #if $options.fraction
|
|
76 --fraction $options.fraction
|
|
77 #end if
|
|
78 #if $options.max_snp_len
|
|
79 --max_snp_len $options.max_snp_len
|
|
80 #end if
|
|
81 #if $options.min_contig
|
|
82 --min_contig $options.min_contig
|
|
83 #end if
|
|
84 #if $options.allow_snps
|
|
85 --allow_snps
|
|
86 #end if
|
|
87 #end if
|
|
88
|
|
89 > ${outname}.fasta;
|
|
90
|
|
91 bwa index ${outname}.fasta;
|
|
92 bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median;
|
|
93
|
|
94 mlst --nopath --threads $num_cores
|
|
95 #if $options.select=="advanced"
|
|
96 #if $options.minid
|
|
97 --minid $options.minid
|
|
98 #end if
|
|
99 #if $options.mincov
|
|
100 --mincov $options.mincov
|
|
101 #end if
|
|
102 #if $options.minscore
|
|
103 --minscore $options.minscore
|
|
104 #end if
|
|
105 #end if
|
|
106 ${outname}.fasta > ${outname}.mlst.tsv;
|
|
107
|
|
108 python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan};
|
|
109
|
|
110 python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt
|
|
111
|
|
112 ]]></command>
|
|
113 <inputs>
|
|
114 <conditional name="jobtype">
|
|
115 <param name="select" type="select" label="Select Input">
|
|
116 <option value="fastq_fr">Forward and Reverse FASTQ</option>
|
|
117 <option value="fastq_pair">Paired FASTQ Collection</option>
|
|
118 </param>
|
|
119 <when value="fastq_fr">
|
|
120 <param name="fastq1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Forward FASTQ" />
|
|
121 <param name="fastq2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Reverse FASTQ" />
|
|
122 </when>
|
|
123 <when value="fastq_pair">
|
|
124 <param name="coll" label="Paired FASTQ" type="data_collection" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" collection_type="paired" />
|
|
125 </when>
|
|
126 </conditional>
|
|
127
|
|
128 <conditional name="options">
|
|
129 <param name="select" type="select" label="Options Type">
|
|
130 <option value="basic">Basic</option>
|
|
131 <option value="advanced">Advanced</option>
|
|
132 </param>
|
|
133 <when value="advanced">
|
|
134 <param name="cores" optional="true" type="integer" label="Number of cores to use (Default=16)" value=""/>
|
|
135 <param name="memory" optional="true" type="integer" label="Memory available (Default=32GB)" value=""/>
|
|
136 <param name="hash_count" optional="true" type="boolean" label="hash counter"/>
|
|
137 <param name="estimated_kmers" optional="true" type="integer" label="Estimated number of unique kmers for bloom filter (Default=100)" value=""/>
|
|
138 <param name="skip" optional="true" type="boolean" label="skip bloom filter, use estimate kmers as the hash"/>
|
|
139 <param name="kmer" optional="true" type="integer" label="Minimal kmer length for assembly (Default=21)" value=""/>
|
|
140 <param name="min_count" optional="true" type="integer" label="Minimal count for kmers retained for comparing alternate choices" value=""/>
|
|
141 <param name="max_kmer_count" optional="true" type="integer" label="Minimum acceptable average count for estimating the maximal kmer length in reads" value=""/>
|
|
142 <param name="vector_percent" optional="true" type="float" label="Count for vectors as a fraction of the read number (0-1,1=disabled)" value="">
|
|
143 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
|
|
144 </param>
|
|
145 <param name="insert_size" optional="true" type="integer" label="Expected insert size for paired reads" value=""/>
|
|
146 <param name="steps" optional="true" type="integer" label="Number of assembly iterations from minimal to maximal kmer length in reads (Default=11)" value=""/>
|
|
147 <param name="fraction" optional="true" type="float" label="Maximum noise to signal ratio acceptable for extension (Default=0.1)" value="">
|
|
148 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
|
|
149 </param>
|
|
150 <param name="max_snp_len" optional="true" type="integer" label="Maximal snp length (Default=150)" value=""/>
|
|
151 <param name="min_contig" optional="true" type="integer" label="Minimal contig length reported in output (Default=200)" value=""/>
|
|
152 <param name="allow_snps" optional="true" type="boolean" label="Turn SNP discovery (Default=false)"/>
|
|
153 <param name="mincov" type="integer" label="Minimum DNA %coverage" value="10" help="Minimum DNA %coverage to report partial allele at all (default 10, must be between 0-100)" optional="true" />
|
|
154 <param name="minid" type="integer" label="Minimum DNA %identity" value="95" min="0" max="100" help="Minimum DNA %identity of full allelle to consider 'similar' (default 95, must be between 0-100)" optional="true" />
|
|
155 <param name="minscore" type="integer" label="Minimum score to match scheme" value="50" min="0" max="100" help="Minumum score out of 100 to match a scheme" optional="true" />
|
|
156 </when>
|
|
157 <when value="basic"/>
|
|
158 </conditional>
|
|
159 </inputs>
|
|
160 <outputs>
|
|
161 <data format="fasta" name="results.skesa.fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.fasta"/>
|
|
162 <data format="tabular" name="results.mlst.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.mlst.tsv"/>
|
|
163 <data format="tabular" name="qc_results.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.txt"/>
|
|
164 </outputs>
|
|
165
|
|
166 <help><![CDATA[
|
|
167
|
|
168 ]]></help>
|
|
169 <citations>
|
|
170 <citation type="bibtex">
|
|
171 @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014,
|
|
172 title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes
|
|
173 based on DeBruijn graphs. It uses conservative heuristics and is designed to
|
|
174 create breaks at repeat regions in the genome. This leads to excellent sequence
|
|
175 quality but not necessarily a large N50 statistic. It is a multi-threaded
|
|
176 application that scales well with the number of processors. For different runs
|
|
177 with the same inputs, including the order of reads, the order and orientation
|
|
178 of contigs in the output is deterministic. },
|
|
179 url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/},
|
|
180 author={National Center for Biotechnology Information },
|
|
181 }</citation>
|
|
182
|
|
183 <citation type="bibtex">
|
|
184 @UNPUBLISHED{Seemann2016,
|
|
185 author = "Seemann T",
|
|
186 title = "MLST: Scan contig files against PubMLST typing schemes",
|
|
187 year = "2016",
|
|
188 url = {https://github.com/tseemann/mlst}
|
|
189 }</citation>
|
|
190 </citations>
|
|
191 </tool>
|