Mercurial > repos > earlhaminst > lotus2
view lotus2.xml @ 3:77cb867e9608 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/lotus2 commit ba755f943aaa8b37434744d32ea0c524503bf50f"
author | earlhaminst |
---|---|
date | Wed, 26 May 2021 11:54:37 +0000 |
parents | cf56a6553385 |
children | 59b8864c9fa0 |
line wrap: on
line source
<tool id="lotus2" name="LotuS2" version="@VERSION@+galaxy1" profile="20.01"> <description>fast OTU processing pipeline</description> <macros> <token name="@VERSION@">2.06</token> <xml name="refDB_macro"> <param argument="-refDB" type="select" label="Reference Database"> <option value="SLV" selected="true">Silva LSU (23/28S) or SSU (16/18S) (SLV)</option> <option value="GG">Greengenes (GG)</option> <option value="UNITE">ITS focused on fungi (UNITE)</option> <option value="PR2">SSU focused on Protists (PR2)</option> <option value="beetax">Bee gut specific database and tax names (beetax)</option> <option value="HITdb">Human gut microbiota (HITdb)</option> </param> <param argument="-useBestBlastHitOnly" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use the best Blast hit only" help="Do not use LCA (lowest common ancestor) to determine the most likely taxonomic level (not recommended)" /> </xml> <xml name="id_macro"> <param argument="-id" type="float" min="0" max="1" value="0.97" label="Clustering threshold for OTUs" /> </xml> </macros> <requirements> <requirement type="package" version="@VERSION@">lotus2</requirement> </requirements> <version_command>lotus2 --version</version_command> <command detect_errors="exit_code"><![CDATA[ #import os.path #import re #def symlink_basename($f): #set fn = re.sub('[^\w\-_.]', '_', $f.element_identifier) #if fn.endswith('.gz'): #set fn = fn[:-3] #end if #for ext in ('.fq', '.fastq', '.fastqsanger'): #if fn.endswith($ext): #set fn = fn[:-len($ext)] #break #end if #end for $fn#slurp #end def mkdir input && #if $inputs.paired_or_single == 'single': #for f in $inputs.input: #set ext = $f.ext.replace('sanger', '') ln -s '$f' 'input/${symlink_basename(f)}.${ext}' && #end for #elif $inputs.paired_or_single == 'paired': #for i, f in enumerate($inputs.left_input): #set ext = $f.ext.replace('sanger', '') ln -s '$f' 'input/input${i}.1.${ext}' && #end for #for i, f in enumerate($inputs.right_input): #set ext = $f.ext.replace('sanger', '') ln -s '$f' 'input/input${i}.2.${ext}' && #end for #else: #for f in $inputs.pair_input: #set ext = $f.forward.ext.replace('sanger', '') ln -s '$f.forward' 'input/${symlink_basename(f)}.1.${ext}' && #set ext = $f.reverse.ext.replace('sanger', '') ln -s '$f.reverse' 'input/${symlink_basename(f)}.2.${ext}' && #end for #end if lotus2 -create_map mapping.txt -i input/ && cat mapping.txt && lotus2 -i input/ -o output -tmpDir tmp_folder -threads "\${GALAXY_SLOTS:-1}" -map mapping.txt -platform $platform #if $barcode: -barcode '$barcode' #end if #if $forwardPrimer: -forwardPrimer '$forwardPrimer' #end if #if $reversePrimer: -reversePrimer '$reversePrimer' #end if #if $offtarget_cond.offtargetDB != 'no': -offtargetDB '$offtarget_cond.ref_file' #end if -clustering $clu_args.clu_cond.clustering #if $clu_args.clu_cond.clustering in ('1', '3'): -id $clu_args.clu_cond.id #elif $clu_args.clu_cond.clustering == '2': -swarm_distance $clu_args.clu_cond.swarm_distance #end if #if $clu_args.derepMin: -derepMin '$clu_args.derepMin' #end if -deactivateChimeraCheck $clu_args.deactivateChimeraCheck -chim_skew $clu_args.chim_skew -readOverlap $clu_args.readOverlap -taxAligner $tax_args.aligner_cond.taxAligner #if $tax_args.aligner_cond.taxAligner == '0': -rdp_thr $tax_args.aligner_cond.rdp_thr #elif $tax_args.aligner_cond.taxAligner == '3': -utax_thr $tax_args.aligner_cond.utax_thr #else: -refDB $tax_args.aligner_cond.refDB -useBestBlastHitOnly $tax_args.aligner_cond.useBestBlastHitOnly #end if -amplicon_type $tax_args.amplicon_type -tax_group $tax_args.tax_group -keepUnclassified $tax_args.keepUnclassified -useBestBlastHitOnly $tax_args.useBestBlastHitOnly -LCA_cover $tax_args.LCA_cover -LCA_frac $tax_args.LCA_frac -greengenesSpecies $tax_args.greengenesSpecies -lulu $tax_args.lulu -buildPhylo $tax_args.buildPhylo ; EXIT_VALUE=\$? ; tar -cvzf output.tar.gz output/ && exit \$EXIT_VALUE ]]></command> <inputs> <conditional name="inputs"> <param name="paired_or_single" type="select" label="Paired or Single-end data?"> <option value="single" selected="true">Single-end</option> <option value="paired_collection">Paired-end collection</option> </param> <when value="single"> <param name="input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Single-end reads" /> </when> <!-- <when value="paired"> <param name="left_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Left/Forward strand reads" /> <param name="right_input" type="data" format="fastqsanger,fastqsanger.gz" multiple="true" label="Right/Reverse strand reads" /> </when> --> <when value="paired_collection"> <param name="pair_input" type="data_collection" collection_type="list:paired" format="fastqsanger,fastqsanger.gz" label="List of paired reads" /> </when> </conditional> <param argument="-platform" type="select" label="Sequencing platform"> <option value="miSeq" selected="true">miSeq</option> <option value="hiSeq">hiSeq</option> <option value="454">454</option> <option value="PacBio">PacBio</option> </param> <param argument="-barcode" type="data" format="fastqsanger" optional="true" label="Barcode (MID) sequences (optional)" help="FASTQ file with barcodes (in the processed mi/hiSeq format), if provided by the sequencer" /> <param argument="-forwardPrimer" type="text" value="" label="Forward primer used to amplify DNA region" help="E.g. 16S primer fwd" /> <param argument="-reversePrimer" type="text" value="" label="Reverse primer used to amplify DNA region" help="E.g. 16S primer rev" /> <conditional name="offtarget_cond"> <param argument="-offtargetDB" type="select" label="Remove likely contaminant OTUs/ASVs based on alignment to host genome" help="Useful for low-bacterial biomass samples to remove possible host genome contaminations"> <option value="no" selected="true">Disabled</option> <option value="cached">Use a built-in genome</option> <option value="history">Use a genome from history</option> </param> <when value="no" /> <when value="cached"> <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> <options from_data_table="all_fasta"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No reference genomes are available" /> </options> </param> </when> <when value="history"> <param name="ref_file" type="data" format="fasta" label="FASTA reference genome" /> </when> </conditional> <section name="clu_args" title="Clustering Options"> <conditional name="clu_cond"> <param argument="-clustering" type="select" label="Clustering algorithm"> <option value="1">UPARSE</option> <option value="2">swarm</option> <option value="3">cd-hit</option> <option value="6">unoise3</option> <option value="7" selected="true">dada2</option> </param> <when value="1"> <expand macro="id_macro" /> </when> <when value="2"> <param argument="-swarm_distance" type="integer" min="1" value="1" label="Clustering threshold for OTUs when using swarm clustering" /> </when> <when value="3"> <expand macro="id_macro" /> </when> <when value="6"> </when> <when value="7"> </when> </conditional> <param argument="-derepMin" type="text" value="" label="Minimum size of dereplicated raw reads (optional)" help="E.g. 4:1,4:2,3:3 . See http://lotus2.earlham.ac.uk/images/Derep_options.pdf for how to specify this parameter. If not specified, LotuS2 will select an appropriate default for the chosen clustering algorithm." /> <param argument="-deactivateChimeraCheck" type="select" label="Chimera check"> <option value="0" selected="true">OTU chimera checks</option> <option value="1">No chimera check at all</option> <option value="2">Disable deNovo chimera check</option> <option value="3">Disable ref based chimera check</option> </param> <param argument="-chim_skew" type="integer" min="0" value="2" label="Skew in chimeric fragment abundance" /> <param argument="-readOverlap" type="integer" min="0" value="300" label="Maximum number of basepairs that two reads are overlapping" /> </section> <section name="tax_args" title="Taxonomy Options"> <conditional name="aligner_cond"> <param argument="-taxAligner" type="select" label="Taxonomy aligner for taxonomic profiling of OTUs"> <option value="0" selected="true">RDPclassifier (max likelihood)</option> <option value="1">Blast LCA against custom reference database</option> <option value="2">LAMBDA LCA against custom reference database</option> <option value="3">UTAX likelihood corrected</option> <option value="4">VSEARCH LCA against custom reference database</option> </param> <when value="0"> <param argument="-rdp_thr" type="float" min="0" max="1" value="0.8" label="Confidence threshold for RDP"/> </when> <when value="1"> <expand macro="refDB_macro" /> </when> <when value="2"> <expand macro="refDB_macro" /> </when> <when value="3"> <param argument="-utax_thr" type="float" min="0" max="1" value="0.8" label="Confidence threshold for UTAX"/> </when> <when value="4"> <expand macro="refDB_macro" /> </when> </conditional> <param argument="-amplicon_type" type="select" label="Amplicon type"> <option value="LSU">LSU Large subunit (23S/28S)</option> <option value="SSU" selected="true">SSU small subunit (16S/18S)</option> <option value="ITS">ITS internal transcribed spacer</option> <option value="ITS1">ITS1</option> <option value="ITS2">ITS2</option> </param> <param argument="-tax_group" type="select" label="Tax group"> <option value="bacteria" selected="true">bacterial 16S rDNA annnotation</option> <option value="fungi">fungal 18S/23S/ITS annotation</option> </param> <param argument="-keepUnclassified" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Keep unclassified OTUs" help="Includes unclassified OTUs (i.e. no match in RDP/Blast database) in OTU and taxa abundance matrix calculations" /> <param argument="-useBestBlastHitOnly" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use best blast hit only" help="If selected, do not use LCA (lowest common ancestor) to determine most likely taxonomic level (not recommended)" /> <param argument="-LCA_cover" type="float" min="0" max="1" value="0.9" label="Minimum horizontal coverage of an OTU sequence against ref DB"/> <param argument="-LCA_frac" type="float" min="0" max="1" value="0.9" label="Minimum fraction of reads with identical taxonomy"/> <param argument="-greengenesSpecies" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Create greengenes output labels instead of OTU" /> <param argument="-lulu" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Use LULU to merge OTUs based on their occurence" /> <param argument="-buildPhylo" type="select" label="Build OTU phylogeny"> <option value="0">Disable</option> <option value="1" selected="true">Use fasttree2</option> <option value="2">Use iqtree2</option> </param> </section> </inputs> <outputs> <data name="otu" format="tabular" label="${tool.name} on ${on_string}: OTU abundance matrix" from_work_dir="output/OTU.txt" /> <data name="otu_biom" format="biom1" label="${tool.name} on ${on_string}: biom-formatted OTU abundance matrix" from_work_dir="output/OTU.biom" /> <data name="otu_fna" format="fasta" label="${tool.name} on ${on_string}: FASTA-formatted extended OTU seed sequences" from_work_dir="output/OTU.fna" /> <data name="OTUphylo_nwk" format="newick" label="${tool.name} on ${on_string}: Newick-formatted phylogenetic tree between sequences" from_work_dir="output/OTUphylo.nwk" /> <data name="mapping" format="tabular" label="${tool.name} on ${on_string}: mapping file" from_work_dir="output/primary/in.map" /> <data name="outputs" format="tar" label="${tool.name} on ${on_string}: All output files" from_work_dir="output.tar.gz" /> </outputs> <tests> <test> <param name="paired_or_single" value="single"/> <param name="input" value="Anh_sample1.fastq.gz,Anh_sample2.fastq.gz" ftype="fastqsanger.gz"/> <param name="platform" value="454" /> <param name="clustering" value="3" /> <output name="otu" file="OTU.txt" compare="sim_size" /> <output name="otu_fna" file="OTU.fna" compare="sim_size" /> <output name="mapping" file="mapping.txt" /> </test> </tests> <help><![CDATA[ If you have separate FASTA and quality files, these can be combined in a FASTQ file using the "Combine FASTA and QUAL into FASTQ" tool. Documentation can be found at `<http://lotus2.earlham.ac.uk/>`_. ]]></help> <citations> <citation type="doi">10.1186/s40168-021-01012-1</citation> </citations> </tool>