Mercurial > repos > iuc > mothur_chimera_slayer
diff chimera.slayer.xml @ 0:72b33fe8c05a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mothur commit a9d1e0debcd357d8080a1c6c5f1d206dd45a7a4d
author | iuc |
---|---|
date | Fri, 19 May 2017 05:42:41 -0400 |
parents | |
children | 6f7f890e6a54 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimera.slayer.xml Fri May 19 05:42:41 2017 -0400 @@ -0,0 +1,169 @@ +<tool profile="16.07" id="mothur_chimera_slayer" name="Chimera.slayer" version="@WRAPPER_VERSION@.0"> + <description>Find putative chimeras using slayer</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + <command><![CDATA[ + @SHELL_OPTIONS@ + + ## enable mothur to find blast + loc=`which mothur` && ln -s "\${loc%/mothur}"/* . && + + ## create symlinks to input datasets + ln -s "$fasta" fasta.dat && + #if $alignment.source == "self": + ln -s "$alignment.name" alignment.name.dat && + ln -s "$alignment.group" alignment.group.dat && + ln -s "$alignment.count" alignment.count.dat && + #else: + ln -s "$alignment.template" alignment.template.dat && + #end if + + echo 'chimera.slayer( + fasta=fasta.dat, + #if $alignment.source == "self": + reference=self, + #if $alignment.name: + name=$alignment.name, + #end if + #if $alignment.group: + group=$alignment.group, + #end if + #if $alignment.count: + count=$alignment.count, + #end if + #else: + reference=$alignment.template, + #end if + #if $options.setby == "user": + search=$options.search, + window=$options.window, + increment=$options.increment, + match=$options.match, + mismatch=$options.mismatch, + numwanted=$options.numwanted, + parents=$options.parents, + minsim=$options.minsim, + mincov=$options.mincov, + iters=$options.iters, + minbs=$options.minbs, + minsnp=$options.minsnp, + divergence=$options.divergence, + trim=$options.trim, + split=$options.split, + #end if + dereplicate=$dereplicate, + processors='\${GALAXY_SLOTS:-8}' + )' + | sed 's/ //g' ## mothur trips over whitespace + | ./mothur + | tee mothur.out.log + ]]></command> + <inputs> + <param name="fasta" type="data" format="fasta" label="fasta - Candiate Sequences"/> + <conditional name="alignment"> + <param name="source" type="select" label="Select Reference Template from" help=""> + <option value="hist">History</option> + <option value="ref">Cached Reference</option> + <option value="self">Self - Use abundant sequences from the input Candiate Sequences fasta </option> + </param> + <when value="ref"> + <param name="template" type="select" label="reference - Select an alignment database " help=""> + <options from_data_table="mothur_aligndb"> + </options> + </param> + </when> + <when value="hist"> + <param name="template" type="data" format="fasta" label="reference - Reference to align with" help=""/> + </when> + <when value="self"> + <param name="count" type="data" format="mothur.count_table" optional="true" label="count - Count file"/> + <param name="name" type="data" format="mothur.names" optional="true" label="names - Sequences Names"/> + <param name="group" type="data" format="mothur.groups" optional="true" label="group - Sequences Name reference" help="use the more abundant sequences from the same sample to check the query sequence"/> + </when> + </conditional> + <conditional name="options"> + <param name="setby" type="select" label="Slayer Options" help=""> + <option value="default">Use default settings</option> + <option value="user">Manually set options</option> + </param> + <when value="default"/> + <when value="user"> + <param name="search" type="select" label="search - Search method for finding the closest parent" help=""> + <option value="blast" selected="true">blast</option> + <option value="kmer">kmer</option> + </param> + <param name="window" type="integer" value="50" label="window - Window size for searching for chimeras (default 50)"/> + <param name="increment" type="integer" value="5" label="increment - Increment for window slide on each iteration (default 5)"/> + <param name="match" type="integer" value="5" label="match - Reward matched bases (default 5)"/> + <param name="mismatch" type="integer" value="-4" label="mismatch - Penalty for mismatched bases (default -4)"/> + <param name="numwanted" type="integer" value="15" label="numwanted - Number of potential parents to to compare with query sequence (default 15)"/> + <param name="parents" type="integer" value="3" label="parents - Number of potential parents to investigate from the numwanted best matches"/> + <param name="minsim" type="integer" value="90" label="minsim - Minimum similarity % between the query and parent (default 90)"/> + <param name="mincov" type="integer" value="70" label="mincov - Minimum coverage % of closest matches in reference and the query (default 70)"/> + <param name="iters" type="integer" value="1000" label="iters - Number of bootstrap iterations to try (default 100)"/> + <param name="minbs" type="integer" value="90" label="minbs - Minimum bootstrap support % for calling a sequence chimeric (default 90)"/> + <param name="minsnp" type="integer" value="10" label="minsnp - Percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default 100)"/> + <param name="divergence" type="float" value="1.007" label="divergence - Divergence cutoff for chimera determination (default 1.007)"/> + <param name="trim" type="boolean" truevalue="true" falsevalue="false" checked="false" label="trim - include chimeric sequences trimmed to their longest peice"/> + <param name="split" type="boolean" truevalue="true" falsevalue="false" checked="false" label="split - detect tri- and quadmeras" help="if a sequence comes back as non-chimeric, mothur will test the two sides to see if they are chimeric."/> + </when> + </conditional> + <param name="dereplicate" type="boolean" falsevalue="false" truevalue="true" checked="false" label="dereplicate - remove chimeric sequences from all groups, default=f" help="If parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric"/> + </inputs> + <outputs> + <expand macro="logfile-output"/> + <data name="out_file" from_work_dir="fasta.slayer.chimeras" format="txt" label="${tool.name} on ${on_string}: slayer.chimeras"/> + <data name="out_accnos" from_work_dir="fasta.slayer.accnos" format="mothur.accnos" label="${tool.name} on ${on_string}: slayer.accnos"/> + </outputs> + <tests> + <test><!-- test with external reference --> + <param name="fasta" value="Mock_S280_L001_R1_001_small.trim.contigs.good.align_head"/> + <param name="source" value="hist"/> + <param name="template" value="HMP_MOCK.v35.align"/> + <output name="out_file" file="Mock_S280_L001_R1_001_small.trim.contigs.good.slayer.chimeras" ftype="txt"/> + <expand macro="logfile-test"/> + </test> + <test><!-- test with self as reference --> + <param name="fasta" value="HMP_MOCK.v35.align"/> + <param name="source" value="self"/> + <param name="setby" value="user"/> + <param name="search" value="kmer"/> + <param name="name" value="HMP_MOCK.v35.align.names"/> + <output name="out_file" file="HMP_MOCK.v35.slayer.chimeras" ftype="txt"/> + <expand macro="logfile-test"/> + </test> + </tests> + + <help> +<![CDATA[ + +@MOTHUR_OVERVIEW@ + +**Command Documentation** + +The chimera.slayer_ command identifies putative chimeras using the slayer approach. + +ChimeraSlayer_ is a chimeric sequence detection utility, compatible with near-full length Sanger sequences and shorter 454-FLX sequences (~500 bp). + +Chimera Slayer involves the following series of steps that operate to flag chimeric 16S rRNA sequences: + + (A) the ends of a query sequence are searched against an included database of reference chimera-free 16S sequences to identify potential parents of a chimera; + (B) candidate parents of a chimera are selected as those that form a branched best scoring alignment to the NAST-formatted query sequence; + (C) the NAST alignment of the query sequence is improved in a 'chimera-aware' profile-based NAST realignment to the selected reference parent sequences; and + (D) an evolutionary framework is used to flag query sequences found to exhibit greater sequence homology to an in silico chimera formed between any two of the selected reference parent sequences. + +Note: +It is not recommended to blindly discard all sequences flagged as chimeras. Some may represent naturally formed chimeras that do not represent PCR artifacts. Sequences flagged may warrant further investigation. + + +.. _ChimeraSlayer: http://microbiomeutil.sourceforge.net/ +.. _chimera.slayer: https://www.mothur.org/wiki/Chimera.slayer + +]]> + </help> + <expand macro="citations"/> +</tool>