diff data_manager/mash_sketch_builder.xml @ 0:2af9137ba067 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit c6efcbece52dec310253537b35419839746fff7f"
author iuc
date Wed, 26 Feb 2020 17:06:21 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/mash_sketch_builder.xml	Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,74 @@
+<?xml version="1.0"?>
+<tool id="mash_sketch_builder" name="Mash Sketch" tool_type="manage_data" version="@TOOL_VERSION@+galaxy0" profile="18.09">
+    <description>builder</description>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">mash</requirement>
+        <requirement type="package" version="3.7">python</requirement>
+    </requirements>
+    <macros>
+        <token name="@TOOL_VERSION@">2.1</token>
+    </macros>
+    <version_command>mash --version</version_command>
+    <command detect_errors="exit_code">
+    <![CDATA[
+        python '$__tool_directory__/mash_sketch_builder.py'
+          '${out_file}'
+          --threads \${GALAXY_SLOTS:-1}
+          #if str( $input_sequence_source.input_sequence_source_selector ) == "tool_data_table":
+            --fasta '${input_sequence_source.input_sequence.fields.path}'
+          #elif str( $input_sequence_source.input_sequence_source_selector ) == 'history':
+            --fasta '${input_sequence_source.input_sequence}'
+          #end if
+          --sketch-name '${sketch_name}'
+          --sketch-size '${sketch_size}'
+          --kmer-size '${kmer_size}'
+          --probability-threshold '${probability_threshold}'
+          ${individual_sequences}
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="input_sequence_source">
+            <param name="input_sequence_source_selector" type="select"
+                   label="Select a sequence from your history or use one from a tool data table?">
+                <option value="tool_data_table">Sequence from tool data table</option>
+                <option selected="True" value="history">Sequence from history</option>
+            </param>
+            <when value="tool_data_table">
+                <param name="input_sequence" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="input_sequence" type="data" format="fasta" label="Input sequence" help=""/>
+            </when>
+        </conditional>
+        <param type="text" name="sketch_name" label="Sketch name" help="Human-readable description of the sketch"/>
+        <param type="integer" name="sketch_size" value="1000" min="10" max="1000000" label="Sketch size" help="Each sketch will have at most this many non-redundant min-hashes."/>
+        <param type="integer" name="kmer_size" value="21" min="1" max="32" label="K-mer size" help="Hashes will be based on strings of this many nucleotides."/>
+        <param type="float" name="probability_threshold" value="0.01" min="0." max="1." label="Probability threshold for warning about low k-mer size." />
+        <param type="boolean" name="individual_sequences" truevalue="--individual-sequences" falsevalue="" label="Sketch individual sequences" help="e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons."/>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_sequence_source_selector" value="history"/>
+            <param name="input_sequence" value="test_assembly.fasta"/>
+            <param name="sketch_name" value="Test Sketch" />
+            <output name="out_file" value="mash_sketch_data_manager.json" compare="sim_size" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**What it does**
+
+  Create a sketch file, which is a reduced representation of a sequence or set
+  of sequences (based on min-hashes) that can be used for fast distance
+  estimations. For output, one sketch file will be generated, but it can have
+  multiple sketches within it, divided by sequences or files.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1186/s13059-016-0997-x</citation>
+    </citations>
+</tool>