diff pick_rep_set.xml @ 0:6ebade612aa0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit c9bf747b23b4a9d6adc20c7740b9247c22654862
author iuc
date Thu, 18 May 2017 09:32:11 -0400
parents
children 8218f848c2b9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pick_rep_set.xml	Thu May 18 09:32:11 2017 -0400
@@ -0,0 +1,156 @@
+<tool id="qiime_pick_rep_set" name="Pick representative set of sequences" version="@WRAPPER_VERSION@.0">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command>pick_rep_set.py --version</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+        pick_rep_set.py
+            --input_file '$input_file'
+            #if $picking.type == "de_novo"
+                --fasta_file '$picking.fasta_file'
+            #else if $picking.type == "preferred"
+                --reference_seqs_fp '$picking.reference_seqs_fp'
+                #if $picking.fasta_file
+                    --fasta_file '$picking.fasta_file'
+                #end if
+            #end if
+            --rep_set_picking_method '$rep_set_picking_method'
+            --sort_by '$sort_by'
+            --result_fp '$result_fp'
+            --log_fp '$log_fp'
+    ]]></command>
+    <inputs>
+        <param argument="--input_file" type="data" format="txt" label="Input OTU mapping file"/>
+        <conditional name="picking">
+            <param name="type" type="select" label="Type of OTU picking">
+                <option value="de_novo">Picking a representative set for de novo-picked OTUs</option>
+                <option value="preferred">Picking OTUs with "preferred representative" sequence</option>
+            </param>
+            <when value="de_novo">
+                <param argument="--fasta_file" type="data" format="fasta" label="Input fasta file with all of the sequences whose identifiers are listed in the OTU map"/>
+            </when>
+            <when value="preferred">
+                <param argument="--reference_seqs_fp" type="data" format="fasta" label="Collection of preferred representative sequences "/>
+                <param argument="--fasta_file" type="data" format="fasta" optional="True" label="Input fasta file with all of the sequences whose identifiers are listed in the OTU map" help="If new clusters were allowed"/>
+            </when>
+        </conditional>
+        <param argument="--rep_set_picking_method" type="select" label="Method for picking representative sets">
+            <option value="random">random</option>
+            <option value="longest">longest</option>
+            <option value="most_abundant">most_abundant</option>
+            <option value="first" selected="true">first (chooses cluster seed when picking otus with uclust)</option>
+        </param>
+        <param argument="--sort_by" type="select" label="Sorting by">
+            <option value="otu" selected="true">OTU</option>
+            <option value="seq_id">Sequence id</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data format="fasta" name="result_fp" label="${tool.name} on ${on_string}: Representative sequences"/>
+        <data format="txt" name="log_fp" label="${tool.name} on ${on_string}: Log"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="pick_rep_set/seqs_otus.txt"/>
+            <param name="type" value="de_novo"/>
+            <param name="fasta_file" value="pick_rep_set/seqs.fna"/>
+            <param name="rep_set_picking_method" value="first"/>
+            <param name="sort_by" value="otu"/>
+            <output name="result_fp" file="pick_rep_set/first_otu_fasta.fasta"/>
+            <output name="log_fp">
+                <assert_contents>
+                    <has_text text="GenericRepSetPicker"/>
+                    <has_text text="Algorithm:first"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_file" value="pick_rep_set/seqs_otus.txt"/>
+            <param name="type" value="preferred"/>
+            <param name="fasta_file" value="pick_rep_set/seqs.fna"/>
+            <param name="reference_seqs_fp" value="pick_rep_set/refseqs.fasta"/>
+            <param name="rep_set_picking_method" value="first"/>
+            <param name="sort_by" value="otu"/>
+            <output name="result_fp" file="pick_rep_set/first_otu_fasta_ref.fasta"/>
+            <output name="log_fp">
+                <assert_contents>
+                    <has_text text="ReferenceRepSetPicker"/>
+                    <has_text text="Algorithm:first"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_file" value="pick_rep_set/seqs_otus.txt"/>
+            <param name="type" value="de_novo"/>
+            <param name="fasta_file" value="pick_rep_set/seqs.fna"/>
+            <param name="rep_set_picking_method" value="longest"/>
+            <param name="sort_by" value="otu"/>
+            <output name="result_fp" file="pick_rep_set/longest_otu_fasta.fasta"/>
+            <output name="log_fp">
+                <assert_contents>
+                    <has_text text="GenericRepSetPicker"/>
+                    <has_text text="Algorithm:longest"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_file" value="pick_rep_set/seqs_otus.txt"/>
+            <param name="type" value="de_novo"/>
+            <param name="fasta_file" value="pick_rep_set/seqs.fna"/>
+            <param name="rep_set_picking_method" value="most_abundant"/>
+            <param name="sort_by" value="otu"/>
+            <output name="result_fp" file="pick_rep_set/most_abundant_otu_fasta.fasta"/>
+            <output name="log_fp">
+                <assert_contents>
+                    <has_text text="GenericRepSetPicker"/>
+                    <has_text text="Algorithm:most_abundant"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_file" value="pick_rep_set/seqs_otus.txt"/>
+            <param name="type" value="de_novo"/>
+            <param name="fasta_file" value="pick_rep_set/seqs.fna"/>
+            <param name="rep_set_picking_method" value="random"/>
+            <param name="sort_by" value="otu"/>
+            <output name="result_fp">
+                <assert_contents>
+                    <has_text text="1 PC.636_263"/>
+                    <has_text text="101 PC.634_99"/>
+                </assert_contents>
+            </output>
+            <output name="log_fp">
+                <assert_contents>
+                    <has_text text="GenericRepSetPicker"/>
+                    <has_text text="Algorithm:random"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input_file" value="pick_rep_set/seqs_otus.txt"/>
+            <param name="type" value="de_novo"/>
+            <param name="fasta_file" value="pick_rep_set/seqs.fna"/>
+            <param name="rep_set_picking_method" value="first"/>
+            <param name="sort_by" value="seq_id"/>
+            <output name="result_fp" file="pick_rep_set/first_seq_id_fasta.fasta"/>
+            <output name="log_fp">
+                <assert_contents>
+                    <has_text text="GenericRepSetPicker"/>
+                    <has_text text="Algorithm:first"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+After picking OTUs, you can then pick a representative set of sequences. For each OTU, you will end up with one sequence that can be used in subsequent analyses.
+
+The output from pick_rep_set.py is a single FASTA file containing one sequence per OTU. The FASTA header lines will be the OTU identifier (from here on used as the unique sequence identifier) followed by a space, followed by the sequence identifier originally associated with the representative sequence. The name of the output FASTA file will be <input_sequences_filepath>_rep_set.fasta by default, or can be specified via the "-o" parameter.
+    ]]></help>
+    <citations>
+        <expand macro="citations"/>
+    </citations>
+</tool>