comparison microrunqc.xml @ 0:a53acd38d77e draft

Uploaded
author estrain
date Tue, 24 Mar 2020 08:54:42 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a53acd38d77e
1 <tool id="microrunqc" name="microrunqc" version="0.0.1">
2
3 <requirements>
4 <requirement type="package" version="2.3.0">skesa</requirement>
5 <requirement type="package" version="2.19.0">mlst</requirement>
6 <requirement type="package" version="0.7.17">bwa</requirement>
7 <requirement type="package" version="1.15.4">numpy</requirement>
8 <requirement type="package" version="0.4.2">fastq-scan</requirement>
9 </requirements>
10
11 <command detect_errors="exit_code"><![CDATA[
12
13 skesa
14
15 #set fqscan = "text"
16 #if $jobtype.select == "fastq_fr"
17 #set outname = $jobtype.fastq1.name
18 #set bwalist = str($jobtype.fastq1) + " " + str($jobtype.fastq2)
19 --fastq $jobtype.fastq1,$jobtype.fastq2
20 #if $jobtype.fastq1.is_of_type("fastq.gz")
21 #set fqscan = "gz"
22 #else if $jobtype.fastq1.is_of_type("fastqsanger.gz")
23 #set fqscan = "gz"
24 #end if
25 #else if $jobtype.select == "fastq_pair"
26 #set outname = $jobtype.coll.name
27 #set bwalist = str($jobtype.coll.forward) + " " + str($jobtype.coll.reverse)
28 --fastq $jobtype.coll.forward,$jobtype.coll.reverse
29 #if $jobtype.coll.forward.is_of_type("fastq.gz")
30 #set fqscan = "gz"
31 #else if $jobtype.coll.forward.is_of_type("fastqsanger.gz")
32 #set fqscan = "gz"
33 #end if
34 #end if
35
36 #set num_cores = 1
37
38 #if $options.select =="basic"
39 --cores $num_cores
40 --memory 8
41 #else if $options.select=="advanced"
42 #if $options.cores
43 #set num_cores = $options.cores
44 --cores $options.cores
45 #end if
46 #if $options.memory
47 --memory $options.memory
48 #end if
49 #if $options.hash_count
50 --hash_count
51 #end if
52 #if $options.estimated_kmers
53 --estimated_kmers $options.estimated.kmers
54 #end if
55 #if $options.skip
56 --skip_bloom_filter
57 #end if
58 #if $options.kmer
59 --kmer $options.kmer
60 #end if
61 #if $options.min_count
62 --min_count $options.min_count
63 #end if
64 #if $options.max_kmer_count
65 --max_kmer_count $options.max_kmer_count
66 #end if
67 #if $options.vector_percent
68 --vector_percent $options.vector_percent
69 #end if
70 #if $options.insert_size
71 --insert_size $options.insert.size
72 #end if
73 #if $options.steps
74 --steps $options.steps
75 #end if
76 #if $options.fraction
77 --fraction $options.fraction
78 #end if
79 #if $options.max_snp_len
80 --max_snp_len $options.max_snp_len
81 #end if
82 #if $options.min_contig
83 --min_contig $options.min_contig
84 #end if
85 #if $options.allow_snps
86 --allow_snps
87 #end if
88 #end if
89
90 > ${outname}.fasta;
91
92 bwa index ${outname}.fasta;
93 bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median;
94
95 mlst --nopath --threads $num_cores
96 #if $options.select=="advanced"
97 #if $options.minid
98 --minid $options.minid
99 #end if
100 #if $options.mincov
101 --mincov $options.mincov
102 #end if
103 #if $options.minscore
104 --minscore $options.minscore
105 #end if
106 #end if
107 ${outname}.fasta > ${outname}.mlst.tsv;
108
109 python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan};
110
111 python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt
112
113 ]]></command>
114 <inputs>
115 <conditional name="jobtype">
116 <param name="select" type="select" label="Select Input">
117 <option value="fastq_fr">Forward and Reverse FASTQ</option>
118 <option value="fastq_pair">Paired FASTQ Collection</option>
119 </param>
120 <when value="fastq_fr">
121 <param name="fastq1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Forward FASTQ" />
122 <param name="fastq2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Reverse FASTQ" />
123 </when>
124 <when value="fastq_pair">
125 <param name="coll" label="Paired FASTQ" type="data_collection" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" collection_type="paired" />
126 </when>
127 </conditional>
128
129 <conditional name="options">
130 <param name="select" type="select" label="Options Type">
131 <option value="basic">Basic</option>
132 <option value="advanced">Advanced</option>
133 </param>
134 <when value="advanced">
135 <param name="cores" optional="true" type="integer" label="Number of cores to use (Default=16)" value=""/>
136 <param name="memory" optional="true" type="integer" label="Memory available (Default=32GB)" value=""/>
137 <param name="hash_count" optional="true" type="boolean" label="hash counter"/>
138 <param name="estimated_kmers" optional="true" type="integer" label="Estimated number of unique kmers for bloom filter (Default=100)" value=""/>
139 <param name="skip" optional="true" type="boolean" label="skip bloom filter, use estimate kmers as the hash"/>
140 <param name="kmer" optional="true" type="integer" label="Minimal kmer length for assembly (Default=21)" value=""/>
141 <param name="min_count" optional="true" type="integer" label="Minimal count for kmers retained for comparing alternate choices" value=""/>
142 <param name="max_kmer_count" optional="true" type="integer" label="Minimum acceptable average count for estimating the maximal kmer length in reads" value=""/>
143 <param name="vector_percent" optional="true" type="float" label="Count for vectors as a fraction of the read number (0-1,1=disabled)" value="">
144 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
145 </param>
146 <param name="insert_size" optional="true" type="integer" label="Expected insert size for paired reads" value=""/>
147 <param name="steps" optional="true" type="integer" label="Number of assembly iterations from minimal to maximal kmer length in reads (Default=11)" value=""/>
148 <param name="fraction" optional="true" type="float" label="Maximum noise to signal ratio acceptable for extension (Default=0.1)" value="">
149 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/>
150 </param>
151 <param name="max_snp_len" optional="true" type="integer" label="Maximal snp length (Default=150)" value=""/>
152 <param name="min_contig" optional="true" type="integer" label="Minimal contig length reported in output (Default=200)" value=""/>
153 <param name="allow_snps" optional="true" type="boolean" label="Turn SNP discovery (Default=false)"/>
154 <param name="mincov" type="integer" label="Minimum DNA %coverage" value="10" help="Minimum DNA %coverage to report partial allele at all (default 10, must be between 0-100)" optional="true" />
155 <param name="minid" type="integer" label="Minimum DNA %identity" value="95" min="0" max="100" help="Minimum DNA %identity of full allelle to consider 'similar' (default 95, must be between 0-100)" optional="true" />
156 <param name="minscore" type="integer" label="Minimum score to match scheme" value="50" min="0" max="100" help="Minumum score out of 100 to match a scheme" optional="true" />
157 </when>
158 <when value="basic"/>
159 </conditional>
160 </inputs>
161 <outputs>
162 <data format="fasta" name="results.skesa.fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.fasta"/>
163 <data format="tabular" name="results.mlst.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.mlst.tsv"/>
164 <data format="tabular" name="qc_results.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.txt"/>
165 </outputs>
166
167 <help><![CDATA[
168
169 ]]></help>
170 <citations>
171 <citation type="bibtex">
172 @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014,
173 title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes
174 based on DeBruijn graphs. It uses conservative heuristics and is designed to
175 create breaks at repeat regions in the genome. This leads to excellent sequence
176 quality but not necessarily a large N50 statistic. It is a multi-threaded
177 application that scales well with the number of processors. For different runs
178 with the same inputs, including the order of reads, the order and orientation
179 of contigs in the output is deterministic. },
180 url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/},
181 author={National Center for Biotechnology Information },
182 }</citation>
183
184 <citation type="bibtex">
185 @UNPUBLISHED{Seemann2016,
186 author = "Seemann T",
187 title = "MLST: Scan contig files against PubMLST typing schemes",
188 year = "2016",
189 url = {https://github.com/tseemann/mlst}
190 }</citation>
191 </citations>
192 </tool>