comparison lotus2.xml @ 2:cf56a6553385 draft

"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/lotus2 commit 4f154640ea9b8d9472307287f0ee6483649c9466"
author earlhaminst
date Wed, 19 May 2021 19:15:08 +0000
parents 85da3173a488
children 77cb867e9608
comparison
equal deleted inserted replaced
1:85da3173a488 2:cf56a6553385
1 <tool id="lotus2" name="LotuS2" version="@VERSION@" profile="20.01"> 1 <tool id="lotus2" name="LotuS2" version="@VERSION@" profile="20.01">
2 <description>fast OTU processing pipeline</description> 2 <description>fast OTU processing pipeline</description>
3 <macros> 3 <macros>
4 <token name="@VERSION@">2.05.1</token> 4 <token name="@VERSION@">2.06</token>
5 <xml name="refDB_macro"> 5 <xml name="refDB_macro">
6 <param argument="-refDB" type="select" label="Reference Database"> 6 <param argument="-refDB" type="select" label="Reference Database">
7 <option value="SLV" selected="true">Silva LSU (23/28S) or SSU (16/18S) (SLV)</option> 7 <option value="SLV" selected="true">Silva LSU (23/28S) or SSU (16/18S) (SLV)</option>
8 <option value="GG">Greengenes (GG)</option> 8 <option value="GG">Greengenes (GG)</option>
9 <option value="UNITE">ITS focused on fungi (UNITE)</option> 9 <option value="UNITE">ITS focused on fungi (UNITE)</option>
10 <option value="PR2">SSU focused on Protists (PR2)</option> 10 <option value="PR2">SSU focused on Protists (PR2)</option>
11 <option value="beetax">Bee gut specific database and tax names (beetax)</option> 11 <option value="beetax">Bee gut specific database and tax names (beetax)</option>
12 <option value="HITdb">Human gut microbiota (HITdb)</option> 12 <option value="HITdb">Human gut microbiota (HITdb)</option>
13 </param> 13 </param>
14 <param argument="-useBestBlastHitOnly" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use the best Blast hit only" help="Do not use LCA (lowest common ancestor) to determine the most likely taxonomic level (not recommended)" />
15 </xml>
16 <xml name="id_macro">
17 <param argument="-id" type="float" min="0" max="1" value="0.97" label="Clustering threshold for OTUs" />
14 </xml> 18 </xml>
15 </macros> 19 </macros>
16 <requirements> 20 <requirements>
17 <requirement type="package" version="@VERSION@">lotus2</requirement> 21 <requirement type="package" version="@VERSION@">lotus2</requirement>
18 </requirements> 22 </requirements>
19 <version_command>lotus2 --version</version_command> 23 <version_command>lotus2 --version</version_command>
20 <command detect_errors="exit_code"><![CDATA[ 24 <command detect_errors="exit_code"><![CDATA[
25 #import os.path
26 #import re
27 #def symlink_basename($f):
28 #set fn = re.sub('[^\w\-_.]', '_', $f.element_identifier)
29 #if fn.endswith('.gz'):
30 #set fn = fn[:-3]
31 #end if
32 #for ext in ('.fq', '.fastq', '.fastqsanger'):
33 #if fn.endswith($ext):
34 #set fn = fn[:-len($ext)]
35 #break
36 #end if
37 #end for
38 $fn#slurp
39 #end def
40
21 mkdir input 41 mkdir input
22 && 42 &&
23 #if $inputs.paired_or_single == 'single': 43 #if $inputs.paired_or_single == 'single':
24 #for i, f in enumerate($inputs.input): 44 #for f in $inputs.input:
25 #set ext = $f.ext.replace('sanger', '') 45 #set ext = $f.ext.replace('sanger', '')
26 ln -s '$f' 'input/input${i}.${ext}' && 46 ln -s '$f' 'input/${symlink_basename(f)}.${ext}' &&
27 #end for 47 #end for
28 #elif $inputs.paired_or_single == 'paired': 48 #elif $inputs.paired_or_single == 'paired':
29 #for i, f in enumerate($inputs.left_input): 49 #for i, f in enumerate($inputs.left_input):
30 #set ext = $f.ext.replace('sanger', '') 50 #set ext = $f.ext.replace('sanger', '')
31 ln -s '$f' 'input/input${i}.1.${ext}' && 51 ln -s '$f' 'input/input${i}.1.${ext}' &&
33 #for i, f in enumerate($inputs.right_input): 53 #for i, f in enumerate($inputs.right_input):
34 #set ext = $f.ext.replace('sanger', '') 54 #set ext = $f.ext.replace('sanger', '')
35 ln -s '$f' 'input/input${i}.2.${ext}' && 55 ln -s '$f' 'input/input${i}.2.${ext}' &&
36 #end for 56 #end for
37 #else: 57 #else:
38 #for i, f in enumerate($inputs.pair_input): 58 #for f in $inputs.pair_input:
39 #set ext = $f.forward.ext.replace('sanger', '') 59 #set ext = $f.forward.ext.replace('sanger', '')
40 ln -s '$f.forward' 'input/input${i}.1.${ext}' && 60 ln -s '$f.forward' 'input/${symlink_basename(f)}.1.${ext}' &&
41 #set ext = $f.reverse.ext.replace('sanger', '') 61 #set ext = $f.reverse.ext.replace('sanger', '')
42 ln -s '$f.reverse' 'input/input${i}.2.${ext}' && 62 ln -s '$f.reverse' 'input/${symlink_basename(f)}.2.${ext}' &&
43 #end for 63 #end for
44 #end if 64 #end if
45 65
46 lotus2 -create_map mapping.txt -i input/ && 66 lotus2 -create_map mapping.txt -i input/ &&
47 cat mapping.txt && 67 cat mapping.txt &&
60 -forwardPrimer '$forwardPrimer' 80 -forwardPrimer '$forwardPrimer'
61 #end if 81 #end if
62 #if $reversePrimer: 82 #if $reversePrimer:
63 -reversePrimer '$reversePrimer' 83 -reversePrimer '$reversePrimer'
64 #end if 84 #end if
65 85 #if $offtarget_cond.offtargetDB != 'no':
66 -clustering $clu_args.clustering 86 -offtargetDB '$offtarget_cond.ref_file'
67 -id $clu_args.id 87 #end if
88
89 -clustering $clu_args.clu_cond.clustering
90 #if $clu_args.clu_cond.clustering in ('1', '3'):
91 -id $clu_args.clu_cond.id
92 #elif $clu_args.clu_cond.clustering == '2':
93 -swarm_distance $clu_args.clu_cond.swarm_distance
94 #end if
68 #if $clu_args.derepMin: 95 #if $clu_args.derepMin:
69 -derepMin '$clu_args.derepMin' 96 -derepMin '$clu_args.derepMin'
70 #end if 97 #end if
71 -deactivateChimeraCheck $clu_args.deactivateChimeraCheck 98 -deactivateChimeraCheck $clu_args.deactivateChimeraCheck
72 -chim_skew $clu_args.chim_skew 99 -chim_skew $clu_args.chim_skew
77 -rdp_thr $tax_args.aligner_cond.rdp_thr 104 -rdp_thr $tax_args.aligner_cond.rdp_thr
78 #elif $tax_args.aligner_cond.taxAligner == '3': 105 #elif $tax_args.aligner_cond.taxAligner == '3':
79 -utax_thr $tax_args.aligner_cond.utax_thr 106 -utax_thr $tax_args.aligner_cond.utax_thr
80 #else: 107 #else:
81 -refDB $tax_args.aligner_cond.refDB 108 -refDB $tax_args.aligner_cond.refDB
109 -useBestBlastHitOnly $tax_args.aligner_cond.useBestBlastHitOnly
82 #end if 110 #end if
83 -amplicon_type $tax_args.amplicon_type 111 -amplicon_type $tax_args.amplicon_type
84 -tax_group $tax_args.tax_group 112 -tax_group $tax_args.tax_group
85 -keepUnclassified $tax_args.keepUnclassified 113 -keepUnclassified $tax_args.keepUnclassified
86 -useBestBlastHitOnly $tax_args.useBestBlastHitOnly 114 -useBestBlastHitOnly $tax_args.useBestBlastHitOnly
87 -LCA_cover $tax_args.LCA_cover 115 -LCA_cover $tax_args.LCA_cover
88 -LCA_frac $tax_args.LCA_frac 116 -LCA_frac $tax_args.LCA_frac
89 -greengenesSpecies $tax_args.greengenesSpecies 117 -greengenesSpecies $tax_args.greengenesSpecies
118 -lulu $tax_args.lulu
119 -buildPhylo $tax_args.buildPhylo
90 120
91 ; EXIT_VALUE=\$? ; 121 ; EXIT_VALUE=\$? ;
92 122
93 tar -cvzf output.tar.gz output/ 123 tar -cvzf output.tar.gz output/
94 && 124 &&
97 127
98 <inputs> 128 <inputs>
99 <conditional name="inputs"> 129 <conditional name="inputs">
100 <param name="paired_or_single" type="select" label="Paired or Single-end data?"> 130 <param name="paired_or_single" type="select" label="Paired or Single-end data?">
101 <option value="single" selected="true">Single-end</option> 131 <option value="single" selected="true">Single-end</option>
102 <option value="paired">Paired-end</option>
103 <option value="paired_collection">Paired-end collection</option> 132 <option value="paired_collection">Paired-end collection</option>
104 </param> 133 </param>
105 <when value="single"> 134 <when value="single">
106 <param name="input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Single-end reads" /> 135 <param name="input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Single-end reads" />
107 </when> 136 </when>
137 <!--
108 <when value="paired"> 138 <when value="paired">
109 <param name="left_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Left/Forward strand reads" /> 139 <param name="left_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Left/Forward strand reads" />
110 <param name="right_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Right/Reverse strand reads" /> 140 <param name="right_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Right/Reverse strand reads" />
111 </when> 141 </when>
142 -->
112 <when value="paired_collection"> 143 <when value="paired_collection">
113 <param name="pair_input" type="data_collection" collection_type="list:paired" format="fastqsanger,fastqsanger.gz" label="List of paired reads" /> 144 <param name="pair_input" type="data_collection" collection_type="list:paired" format="fastqsanger,fastqsanger.gz" label="List of paired reads" />
114 </when> 145 </when>
115 </conditional> 146 </conditional>
116 <param argument="-platform" type="select" label="Sequencing platform"> 147 <param argument="-platform" type="select" label="Sequencing platform">
120 <option value="PacBio">PacBio</option> 151 <option value="PacBio">PacBio</option>
121 </param> 152 </param>
122 <param argument="-barcode" type="data" format="fastqsanger" optional="true" label="Barcode (MID) sequences (optional)" help="FASTQ file with barcodes (in the processed mi/hiSeq format), if provided by the sequencer" /> 153 <param argument="-barcode" type="data" format="fastqsanger" optional="true" label="Barcode (MID) sequences (optional)" help="FASTQ file with barcodes (in the processed mi/hiSeq format), if provided by the sequencer" />
123 <param argument="-forwardPrimer" type="text" value="" label="Forward primer used to amplify DNA region" help="E.g. 16S primer fwd" /> 154 <param argument="-forwardPrimer" type="text" value="" label="Forward primer used to amplify DNA region" help="E.g. 16S primer fwd" />
124 <param argument="-reversePrimer" type="text" value="" label="Reverse primer used to amplify DNA region" help="E.g. 16S primer rev" /> 155 <param argument="-reversePrimer" type="text" value="" label="Reverse primer used to amplify DNA region" help="E.g. 16S primer rev" />
156 <conditional name="offtarget_cond">
157 <param argument="-offtargetDB" type="select" label="Remove likely contaminant OTUs/ASVs based on alignment to host genome" help="Useful for low-bacterial biomass samples to remove possible host genome contaminations">
158 <option value="no" selected="true">Disabled</option>
159 <option value="cached">Use a built-in genome</option>
160 <option value="history">Use a genome from history</option>
161 </param>
162 <when value="no" />
163 <when value="cached">
164 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
165 <options from_data_table="all_fasta">
166 <filter type="sort_by" column="2" />
167 <validator type="no_options" message="No reference genomes are available" />
168 </options>
169 </param>
170 </when>
171 <when value="history">
172 <param name="ref_file" type="data" format="fasta" label="FASTA reference genome" />
173 </when>
174 </conditional>
125 <section name="clu_args" title="Clustering Options"> 175 <section name="clu_args" title="Clustering Options">
126 <param argument="-clustering" type="select" label="Clustering algorithm"> 176 <conditional name="clu_cond">
127 <option value="1">UPARSE</option> 177 <param argument="-clustering" type="select" label="Clustering algorithm">
128 <option value="2">swarm</option> 178 <option value="1">UPARSE</option>
129 <option value="3">cd-hit</option> 179 <option value="2">swarm</option>
130 <option value="6">unoise3</option> 180 <option value="3">cd-hit</option>
131 <option value="7" selected="true">dada2</option> 181 <option value="6">unoise3</option>
132 </param> 182 <option value="7" selected="true">dada2</option>
133 <param argument="-id" type="float" min="0" max="1" value="0.97" label="Clustering threshold for OTUs" /> 183 </param>
134 <param argument="-derepMin" type="text" value="" label="Minimum size of dereplicated raw reads" help="E.g. 4:1,4:2,3:3 . See http://lotus2.earlham.ac.uk/images/Derep_options.pdf for how to specify this parameter" /> 184 <when value="1">
185 <expand macro="id_macro" />
186 </when>
187 <when value="2">
188 <param argument="-swarm_distance" type="integer" min="1" value="1" label="Clustering threshold for OTUs when using swarm clustering" />
189 </when>
190 <when value="3">
191 <expand macro="id_macro" />
192 </when>
193 <when value="6">
194 </when>
195 <when value="7">
196 </when>
197 </conditional>
198 <param argument="-derepMin" type="text" value="" label="Minimum size of dereplicated raw reads (optional)" help="E.g. 4:1,4:2,3:3 . See http://lotus2.earlham.ac.uk/images/Derep_options.pdf for how to specify this parameter. If not specified, LotuS2 will select an appropriate default for the chosen clustering algorithm." />
135 <param argument="-deactivateChimeraCheck" type="select" label="Chimera check"> 199 <param argument="-deactivateChimeraCheck" type="select" label="Chimera check">
136 <option value="0" selected="true">OTU chimera checks</option> 200 <option value="0" selected="true">OTU chimera checks</option>
137 <option value="1">No chimera check at all</option> 201 <option value="1">No chimera check at all</option>
138 <option value="2">Deactivate deNovo chimera check</option> 202 <option value="2">Disable deNovo chimera check</option>
139 <option value="3">Deactivate ref based chimera check</option> 203 <option value="3">Disable ref based chimera check</option>
140 </param> 204 </param>
141 <param argument="-chim_skew" type="integer" min="0" value="2" label="Skew in chimeric fragment abundance" /> 205 <param argument="-chim_skew" type="integer" min="0" value="2" label="Skew in chimeric fragment abundance" />
142 <param argument="-readOverlap" type="integer" min="0" value="300" label="Maximum number of basepairs that two reads are overlapping" /> 206 <param argument="-readOverlap" type="integer" min="0" value="300" label="Maximum number of basepairs that two reads are overlapping" />
143 </section> 207 </section>
144 <section name="tax_args" title="Taxonomy Options"> 208 <section name="tax_args" title="Taxonomy Options">
145 <conditional name="aligner_cond"> 209 <conditional name="aligner_cond">
146 <param argument="-taxAligner" type="select" label="Taxonomy aligner"> 210 <param argument="-taxAligner" type="select" label="Taxonomy aligner for taxonomic profiling of OTUs">
147 <option value="0" selected="true">Deactivated (just use RDP)</option> 211 <option value="0" selected="true">RDPclassifier (max likelihood)</option>
148 <option value="1">Blast</option> 212 <option value="1">Blast LCA against custom reference database</option>
149 <option value="2">Use LAMBDA to search against a 16S reference database for taxonomic profiling of OTUs</option> 213 <option value="2">LAMBDA LCA against custom reference database</option>
150 <option value="3">Use UTAX with custom databases</option> 214 <option value="3">UTAX likelihood corrected</option>
151 <option value="4">Use VSEARCH to align OTUs to custom databases</option> 215 <option value="4">VSEARCH LCA against custom reference database</option>
152 </param> 216 </param>
153 <when value="0"> 217 <when value="0">
154 <param argument="-rdp_thr" type="float" min="0" max="1" value="0.8" label="Confidence threshold for RDP"/> 218 <param argument="-rdp_thr" type="float" min="0" max="1" value="0.8" label="Confidence threshold for RDP"/>
155 </when> 219 </when>
156 <when value="1"> 220 <when value="1">
180 <param argument="-keepUnclassified" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Keep unclassified OTUs" help="Includes unclassified OTUs (i.e. no match in RDP/Blast database) in OTU and taxa abundance matrix calculations" /> 244 <param argument="-keepUnclassified" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Keep unclassified OTUs" help="Includes unclassified OTUs (i.e. no match in RDP/Blast database) in OTU and taxa abundance matrix calculations" />
181 <param argument="-useBestBlastHitOnly" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use best blast hit only" help="If selected, do not use LCA (lowest common ancestor) to determine most likely taxonomic level (not recommended)" /> 245 <param argument="-useBestBlastHitOnly" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use best blast hit only" help="If selected, do not use LCA (lowest common ancestor) to determine most likely taxonomic level (not recommended)" />
182 <param argument="-LCA_cover" type="float" min="0" max="1" value="0.9" label="Minimum horizontal coverage of an OTU sequence against ref DB"/> 246 <param argument="-LCA_cover" type="float" min="0" max="1" value="0.9" label="Minimum horizontal coverage of an OTU sequence against ref DB"/>
183 <param argument="-LCA_frac" type="float" min="0" max="1" value="0.9" label="Minimum fraction of reads with identical taxonomy"/> 247 <param argument="-LCA_frac" type="float" min="0" max="1" value="0.9" label="Minimum fraction of reads with identical taxonomy"/>
184 <param argument="-greengenesSpecies" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Create greengenes output labels instead of OTU" /> 248 <param argument="-greengenesSpecies" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Create greengenes output labels instead of OTU" />
249 <param argument="-lulu" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use LULU to merge OTUs based on their occurence" />
250 <param argument="-buildPhylo" type="select" label="Build OTU phylogeny">
251 <option value="0">Disable</option>
252 <option value="1" selected="true">Use fasttree2</option>
253 <option value="2">Use iqtree2</option>
254 </param>
185 </section> 255 </section>
186 </inputs> 256 </inputs>
187 257
188 <outputs> 258 <outputs>
189 <data name="otu" format="tabular" label="${tool.name} on ${on_string}: OTU abundance matrix" from_work_dir="output/OTU.txt" /> 259 <data name="otu" format="tabular" label="${tool.name} on ${on_string}: OTU abundance matrix" from_work_dir="output/OTU.txt" />
190 <data name="otu_biom" format="biom" label="${tool.name} on ${on_string}: biom-formatted OTU abundance matrix" from_work_dir="output/OTU.biom" /> 260 <data name="otu_biom" format="biom" label="${tool.name} on ${on_string}: biom-formatted OTU abundance matrix" from_work_dir="output/OTU.biom" />
191 <data name="otu_fna" format="fasta" label="${tool.name} on ${on_string}: FASTA-formatted extended OTU seed sequences" from_work_dir="output/OTU.fna" /> 261 <data name="otu_fna" format="fasta" label="${tool.name} on ${on_string}: FASTA-formatted extended OTU seed sequences" from_work_dir="output/OTU.fna" />
192 <data name="OTUphylo_nwk" format="newick" label="${tool.name} on ${on_string}: Newick-formatted phylogenetic tree between sequences" from_work_dir="output/OTUphylo.nwk" /> 262 <data name="OTUphylo_nwk" format="newick" label="${tool.name} on ${on_string}: Newick-formatted phylogenetic tree between sequences" from_work_dir="output/OTUphylo.nwk" />
193 <data name="hiera_blast" format="tabular" label="${tool.name} on ${on_string}: OTU taxonomy assignments based on Blastn" from_work_dir="output/hiera_BLAST.txt" /> 263 <data name="mapping" format="tabular" label="${tool.name} on ${on_string}: mapping file" from_work_dir="output/primary/in.map" />
194 <data name="hiera_rdp" format="tabular" label="${tool.name} on ${on_string}: OTU taxonomy assignments based on RDP classifier" from_work_dir="output/hiera_RDP.txt" /> 264 <data name="outputs" format="tar" label="${tool.name} on ${on_string}: All output files" from_work_dir="output.tar.gz" />
195 <data name="primary" format="tar" label="${tool.name} on ${on_string}: All output files" from_work_dir="output.tar.gz" />
196 </outputs> 265 </outputs>
197 266
198 <tests> 267 <tests>
199 <test> 268 <test>
200 <param name="paired_or_single" value="single"/> 269 <param name="paired_or_single" value="single"/>
201 <param name="input" value="Anh_sample1.fastq.gz,Anh_sample2.fastq.gz" ftype="fastqsanger.gz"/> 270 <param name="input" value="Anh_sample1.fastq.gz,Anh_sample2.fastq.gz" ftype="fastqsanger.gz"/>
202 <param name="platform" value="454" /> 271 <param name="platform" value="454" />
203 <param name="clustering" value="3" /> 272 <param name="clustering" value="3" />
204 <output name="otu" file="OTU.txt" compare="sim_size" /> 273 <output name="otu" file="OTU.txt" compare="sim_size" />
205 <output name="otu_fna" file="OTU.fna" compare="sim_size" /> 274 <output name="otu_fna" file="OTU.fna" compare="sim_size" />
206 <output name="hiera_rdp" file="hiera_RDP.txt" compare="sim_size" /> 275 <output name="mapping" file="mapping.txt" />
207 </test> 276 </test>
208 </tests> 277 </tests>
209 278
210 <help><![CDATA[ 279 <help><![CDATA[
211 If you have separate FASTA and quality files, these can be combined in a FASTQ file using the "Combine FASTA and QUAL into FASTQ" tool. 280 If you have separate FASTA and quality files, these can be combined in a FASTQ file using the "Combine FASTA and QUAL into FASTQ" tool.