diff split_wide_dataset.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
children
line wrap: on
line diff
--- a/split_wide_dataset.xml	Fri Jun 18 20:23:19 2021 +0000
+++ b/split_wide_dataset.xml	Thu Jul 29 20:48:10 2021 +0000
@@ -1,12 +1,12 @@
 <tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@">
-  <description>from an Input wide dataset</description>
+  <description>from an input wide dataset</description>
   <macros>
     <import>macros.xml</import>
   </macros>
   <expand macro="requirements" />
-  <stdio> 
-    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/> 
-  </stdio> 
+  <stdio>
+    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
+  </stdio>
   <command detect_errors="exit_code"><![CDATA[
   split_wide_dataset.py
     -i=$input
@@ -40,6 +40,7 @@
           <when value="y">
             <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/>
           </when>
+          <when value="n" />
         </conditional>
       </when>
       <when value="n">
@@ -55,40 +56,42 @@
   </outputs>
   <tests>
     <test>
-      <param name="input" value="gene_input_dataset_01fhl.tsv"/>
+      <param name="input" value="gene_input_dataset.tsv"/>
       <param name="prefix" value="Gene"/>
+      <param name="hasUniqID" value="n"/>
       <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
-      <output name="wide" file="gene_wide_dataset_01fhl.tsv"/>
-      <output name="design" file="gene_design_file_01fhl.tsv"/>
-      <output name="annot" file="gene_annot_file_01fhl.tsv"/>
+      <output name="wide" file="gene_wide_dataset.tsv"/>
+      <output name="design" file="gene_design.tsv"/>
+      <output name="annot" file="gene_annotation.tsv"/>
     </test>
     <test>
-      <param name="input" value="metabolite_input_dataset_01fhl.tsv"/>
+      <param name="input" value="metabolite_input_dataset.tsv"/>
       <param name="prefix" value="Met"/>
+      <param name="hasUniqID" value="n"/>
       <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
-      <output name="wide" file="metabolite_wide_dataset_01fhl.tsv"/>
-      <output name="design" file="met_design_file_01fhl.tsv"/>
-      <output name="annot" file="met_annot_file_01fhl.tsv"/>
+      <output name="wide" file="metabolite_wide_dataset.tsv"/>
+      <output name="design" file="metabolite_design.tsv"/>
+      <output name="annot" file="metabolite_annotation.tsv"/>
     </test>
   </tests>
   <help><![CDATA[
 
 **Tool Description**
 
-  This tool can be used to perform two tasks 1) convert a single file that contains 
-  data and annotation in wide format to two files in wide format, one with data and 
-  one with annotation 2) create a design file template that will be compatible with 
-  the wide data and annotation files. The tool will automatically check for a column 
-  containing unique feature identifiers (FeatureIDs). If no unique FeatureID is located 
-  the tool will generate one. The user can specify a prefix for the unique FeatureID 
-  (e.g. 'met' for metabolite data). The Design Dataset is a template with an exact 
-  match to the columns with a single column called 'SampleID' that contains the names 
-  of the samples in the input Wide Dataset. This Design Dataset can be modified by 
-  the user to include metadata columns. The tool also creates a separate Annotation 
-  Dataset containing the unique FeatureIDs (user-specified or generated by the tool) 
-  and any non-sample descriptor columns that were present in the input wide dataset 
-  (such as m/z ratio, retention time, compound name, etc.). Finally, the tool creates 
-  a 'clean' Wide Dataset containing only samples in columns and features in rows.
+  This tool takes a single file containing both feature data (e.g. gene or metabolite expression values) and annotation
+  information (e.g. m/z ratio, compound name) and generates the following three files;
+
+  (1) a wide dataset containing a unique row identifier and the expression values,
+  (2) a wide annotation file with the unique row identifier and any non-data descriptor columns, and
+  (3) a design file with a single column called ‘sampleID’ with the name of the columns containing the expression data.
+
+  If the input dataset does not already contain a column with a unique identifier, the tool will create one.
+  The user can specify a prefix for the unique identifier column (e.g. 'met' for metabolite data).  In cases where the input
+  dataset contains a numeric identifier, the tool will append a user-specified prefix or, if no prefix is specified, an underbar.
+  Since the user specifies which columns contain expression values, the resulting wide dataset contains only these data columns
+  and the unique row identifier column.  Columns not specified as containing expression values are output into the annotation dataset.
+  The resulting design file template contains a single column called ‘sampleID’ that contains the names of the user-specified samples
+  in the input data file.  The design file can be modified by the user to include additional metadata columns.
 
 --------------------------------------------------------------------------------
 
@@ -110,7 +113,7 @@
   | ...     | ...       | ...     | ...     | ... |
   +---------+-----------+---------+---------+-----+
 
-    **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File. 
+    **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File.
 
 **Unique FeatureID**