Mercurial > repos > estrain > microrunqc
comparison microrunqc.xml @ 0:a53acd38d77e draft
Uploaded
author | estrain |
---|---|
date | Tue, 24 Mar 2020 08:54:42 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a53acd38d77e |
---|---|
1 <tool id="microrunqc" name="microrunqc" version="0.0.1"> | |
2 | |
3 <requirements> | |
4 <requirement type="package" version="2.3.0">skesa</requirement> | |
5 <requirement type="package" version="2.19.0">mlst</requirement> | |
6 <requirement type="package" version="0.7.17">bwa</requirement> | |
7 <requirement type="package" version="1.15.4">numpy</requirement> | |
8 <requirement type="package" version="0.4.2">fastq-scan</requirement> | |
9 </requirements> | |
10 | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 | |
13 skesa | |
14 | |
15 #set fqscan = "text" | |
16 #if $jobtype.select == "fastq_fr" | |
17 #set outname = $jobtype.fastq1.name | |
18 #set bwalist = str($jobtype.fastq1) + " " + str($jobtype.fastq2) | |
19 --fastq $jobtype.fastq1,$jobtype.fastq2 | |
20 #if $jobtype.fastq1.is_of_type("fastq.gz") | |
21 #set fqscan = "gz" | |
22 #else if $jobtype.fastq1.is_of_type("fastqsanger.gz") | |
23 #set fqscan = "gz" | |
24 #end if | |
25 #else if $jobtype.select == "fastq_pair" | |
26 #set outname = $jobtype.coll.name | |
27 #set bwalist = str($jobtype.coll.forward) + " " + str($jobtype.coll.reverse) | |
28 --fastq $jobtype.coll.forward,$jobtype.coll.reverse | |
29 #if $jobtype.coll.forward.is_of_type("fastq.gz") | |
30 #set fqscan = "gz" | |
31 #else if $jobtype.coll.forward.is_of_type("fastqsanger.gz") | |
32 #set fqscan = "gz" | |
33 #end if | |
34 #end if | |
35 | |
36 #set num_cores = 1 | |
37 | |
38 #if $options.select =="basic" | |
39 --cores $num_cores | |
40 --memory 8 | |
41 #else if $options.select=="advanced" | |
42 #if $options.cores | |
43 #set num_cores = $options.cores | |
44 --cores $options.cores | |
45 #end if | |
46 #if $options.memory | |
47 --memory $options.memory | |
48 #end if | |
49 #if $options.hash_count | |
50 --hash_count | |
51 #end if | |
52 #if $options.estimated_kmers | |
53 --estimated_kmers $options.estimated.kmers | |
54 #end if | |
55 #if $options.skip | |
56 --skip_bloom_filter | |
57 #end if | |
58 #if $options.kmer | |
59 --kmer $options.kmer | |
60 #end if | |
61 #if $options.min_count | |
62 --min_count $options.min_count | |
63 #end if | |
64 #if $options.max_kmer_count | |
65 --max_kmer_count $options.max_kmer_count | |
66 #end if | |
67 #if $options.vector_percent | |
68 --vector_percent $options.vector_percent | |
69 #end if | |
70 #if $options.insert_size | |
71 --insert_size $options.insert.size | |
72 #end if | |
73 #if $options.steps | |
74 --steps $options.steps | |
75 #end if | |
76 #if $options.fraction | |
77 --fraction $options.fraction | |
78 #end if | |
79 #if $options.max_snp_len | |
80 --max_snp_len $options.max_snp_len | |
81 #end if | |
82 #if $options.min_contig | |
83 --min_contig $options.min_contig | |
84 #end if | |
85 #if $options.allow_snps | |
86 --allow_snps | |
87 #end if | |
88 #end if | |
89 | |
90 > ${outname}.fasta; | |
91 | |
92 bwa index ${outname}.fasta; | |
93 bwa mem -t $num_cores ${outname}.fasta ${bwalist} | python $__tool_directory__/median_size.py > insert.median; | |
94 | |
95 mlst --nopath --threads $num_cores | |
96 #if $options.select=="advanced" | |
97 #if $options.minid | |
98 --minid $options.minid | |
99 #end if | |
100 #if $options.mincov | |
101 --mincov $options.mincov | |
102 #end if | |
103 #if $options.minscore | |
104 --minscore $options.minscore | |
105 #end if | |
106 #end if | |
107 ${outname}.fasta > ${outname}.mlst.tsv; | |
108 | |
109 python $__tool_directory__/run_fastq_scan.py --fastq ${bwalist} --out fq_out.tab --type ${fqscan}; | |
110 | |
111 python $__tool_directory__/sum_mlst.py --fasta ${outname}.fasta --mlst ${outname}.mlst.tsv --med insert.median --fqscan fq_out.tab --out sum_qc.txt | |
112 | |
113 ]]></command> | |
114 <inputs> | |
115 <conditional name="jobtype"> | |
116 <param name="select" type="select" label="Select Input"> | |
117 <option value="fastq_fr">Forward and Reverse FASTQ</option> | |
118 <option value="fastq_pair">Paired FASTQ Collection</option> | |
119 </param> | |
120 <when value="fastq_fr"> | |
121 <param name="fastq1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Forward FASTQ" /> | |
122 <param name="fastq2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Reverse FASTQ" /> | |
123 </when> | |
124 <when value="fastq_pair"> | |
125 <param name="coll" label="Paired FASTQ" type="data_collection" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" collection_type="paired" /> | |
126 </when> | |
127 </conditional> | |
128 | |
129 <conditional name="options"> | |
130 <param name="select" type="select" label="Options Type"> | |
131 <option value="basic">Basic</option> | |
132 <option value="advanced">Advanced</option> | |
133 </param> | |
134 <when value="advanced"> | |
135 <param name="cores" optional="true" type="integer" label="Number of cores to use (Default=16)" value=""/> | |
136 <param name="memory" optional="true" type="integer" label="Memory available (Default=32GB)" value=""/> | |
137 <param name="hash_count" optional="true" type="boolean" label="hash counter"/> | |
138 <param name="estimated_kmers" optional="true" type="integer" label="Estimated number of unique kmers for bloom filter (Default=100)" value=""/> | |
139 <param name="skip" optional="true" type="boolean" label="skip bloom filter, use estimate kmers as the hash"/> | |
140 <param name="kmer" optional="true" type="integer" label="Minimal kmer length for assembly (Default=21)" value=""/> | |
141 <param name="min_count" optional="true" type="integer" label="Minimal count for kmers retained for comparing alternate choices" value=""/> | |
142 <param name="max_kmer_count" optional="true" type="integer" label="Minimum acceptable average count for estimating the maximal kmer length in reads" value=""/> | |
143 <param name="vector_percent" optional="true" type="float" label="Count for vectors as a fraction of the read number (0-1,1=disabled)" value=""> | |
144 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/> | |
145 </param> | |
146 <param name="insert_size" optional="true" type="integer" label="Expected insert size for paired reads" value=""/> | |
147 <param name="steps" optional="true" type="integer" label="Number of assembly iterations from minimal to maximal kmer length in reads (Default=11)" value=""/> | |
148 <param name="fraction" optional="true" type="float" label="Maximum noise to signal ratio acceptable for extension (Default=0.1)" value=""> | |
149 <validator type="in_range" message="Must be float(0,1)." min="0" max="1"/> | |
150 </param> | |
151 <param name="max_snp_len" optional="true" type="integer" label="Maximal snp length (Default=150)" value=""/> | |
152 <param name="min_contig" optional="true" type="integer" label="Minimal contig length reported in output (Default=200)" value=""/> | |
153 <param name="allow_snps" optional="true" type="boolean" label="Turn SNP discovery (Default=false)"/> | |
154 <param name="mincov" type="integer" label="Minimum DNA %coverage" value="10" help="Minimum DNA %coverage to report partial allele at all (default 10, must be between 0-100)" optional="true" /> | |
155 <param name="minid" type="integer" label="Minimum DNA %identity" value="95" min="0" max="100" help="Minimum DNA %identity of full allelle to consider 'similar' (default 95, must be between 0-100)" optional="true" /> | |
156 <param name="minscore" type="integer" label="Minimum score to match scheme" value="50" min="0" max="100" help="Minumum score out of 100 to match a scheme" optional="true" /> | |
157 </when> | |
158 <when value="basic"/> | |
159 </conditional> | |
160 </inputs> | |
161 <outputs> | |
162 <data format="fasta" name="results.skesa.fasta" label="${tool.name} on ${on_string}: Contigs" from_work_dir="*.fasta"/> | |
163 <data format="tabular" name="results.mlst.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.mlst.tsv"/> | |
164 <data format="tabular" name="qc_results.tsv" label="${tool.name} on ${on_string}: MLST" from_work_dir="*.txt"/> | |
165 </outputs> | |
166 | |
167 <help><![CDATA[ | |
168 | |
169 ]]></help> | |
170 <citations> | |
171 <citation type="bibtex"> | |
172 @misc{pope_dashnow_zobel_holt_raven_schultz_inouye_tomita_2014, | |
173 title={skesa: eSKESA is a de-novo sequence read assembler for cultured single isolate genomes | |
174 based on DeBruijn graphs. It uses conservative heuristics and is designed to | |
175 create breaks at repeat regions in the genome. This leads to excellent sequence | |
176 quality but not necessarily a large N50 statistic. It is a multi-threaded | |
177 application that scales well with the number of processors. For different runs | |
178 with the same inputs, including the order of reads, the order and orientation | |
179 of contigs in the output is deterministic. }, | |
180 url={https://github.com/ncbi/ngs-tools/tree/master/tools/skesa/}, | |
181 author={National Center for Biotechnology Information }, | |
182 }</citation> | |
183 | |
184 <citation type="bibtex"> | |
185 @UNPUBLISHED{Seemann2016, | |
186 author = "Seemann T", | |
187 title = "MLST: Scan contig files against PubMLST typing schemes", | |
188 year = "2016", | |
189 url = {https://github.com/tseemann/mlst} | |
190 }</citation> | |
191 </citations> | |
192 </tool> |