diff split_wide_dataset.xml @ 1:ec9ee8edb84d draft

Initial upload of 21.6.10 release.
author malex
date Fri, 18 Jun 2021 20:23:19 +0000
parents
children 2c218a253d56
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/split_wide_dataset.xml	Fri Jun 18 20:23:19 2021 +0000
@@ -0,0 +1,205 @@
+<tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@">
+  <description>from an Input wide dataset</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <stdio> 
+    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/> 
+  </stdio> 
+  <command detect_errors="exit_code"><![CDATA[
+  split_wide_dataset.py
+    -i=$input
+    #if $cond_UniqID.hasUniqID == "y":
+      -id=$cond_UniqID.uniqID
+      #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y":
+        -p2=$cond_UniqID.only_numbers.prefix2
+      #end if
+    #else:
+      -p=$cond_UniqID.prefix
+    #end if
+    -s=$samples
+    -w=$wide
+    -d=$design
+    -a=$annot
+  ]]></command>
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/>
+    <conditional name="cond_UniqID">
+      <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?">
+        <option value="y">Yes</option>
+        <option value="n">No</option>
+      </param>
+      <when value="y">
+        <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/>
+        <conditional name="only_numbers">
+          <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?">
+            <option value="y">Yes</option>
+            <option value="n">No</option>
+          </param>
+          <when value="y">
+            <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/>
+          </when>
+        </conditional>
+      </when>
+      <when value="n">
+        <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/>
+      </when>
+    </conditional>
+    <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/>
+    <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/>
+    <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="gene_input_dataset_01fhl.tsv"/>
+      <param name="prefix" value="Gene"/>
+      <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
+      <output name="wide" file="gene_wide_dataset_01fhl.tsv"/>
+      <output name="design" file="gene_design_file_01fhl.tsv"/>
+      <output name="annot" file="gene_annot_file_01fhl.tsv"/>
+    </test>
+    <test>
+      <param name="input" value="metabolite_input_dataset_01fhl.tsv"/>
+      <param name="prefix" value="Met"/>
+      <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
+      <output name="wide" file="metabolite_wide_dataset_01fhl.tsv"/>
+      <output name="design" file="met_design_file_01fhl.tsv"/>
+      <output name="annot" file="met_annot_file_01fhl.tsv"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+
+**Tool Description**
+
+  This tool can be used to perform two tasks 1) convert a single file that contains 
+  data and annotation in wide format to two files in wide format, one with data and 
+  one with annotation 2) create a design file template that will be compatible with 
+  the wide data and annotation files. The tool will automatically check for a column 
+  containing unique feature identifiers (FeatureIDs). If no unique FeatureID is located 
+  the tool will generate one. The user can specify a prefix for the unique FeatureID 
+  (e.g. 'met' for metabolite data). The Design Dataset is a template with an exact 
+  match to the columns with a single column called 'SampleID' that contains the names 
+  of the samples in the input Wide Dataset. This Design Dataset can be modified by 
+  the user to include metadata columns. The tool also creates a separate Annotation 
+  Dataset containing the unique FeatureIDs (user-specified or generated by the tool) 
+  and any non-sample descriptor columns that were present in the input wide dataset 
+  (such as m/z ratio, retention time, compound name, etc.). Finally, the tool creates 
+  a 'clean' Wide Dataset containing only samples in columns and features in rows.
+
+--------------------------------------------------------------------------------
+
+**INPUT**
+
+**Example -  Wide Format Input Dataset**
+
+  +---------+-----------+---------+---------+-----+
+  | rowID   | m/z ratio | sample1 | sample2 | ... |
+  +=========+===========+=========+=========+=====+
+  | 1       | 8.845     | 20      | 10      | ... |
+  +---------+-----------+---------+---------+-----+
+  | 2       | 0.258     | 22      | 30      | ... |
+  +---------+-----------+---------+---------+-----+
+  | 3       | 10.54     | 27      | 2       | ... |
+  +---------+-----------+---------+---------+-----+
+  | 4       | 8.594     | 17      | 8       | ... |
+  +---------+-----------+---------+---------+-----+
+  | ...     | ...       | ...     | ...     | ... |
+  +---------+-----------+---------+---------+-----+
+
+    **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File. 
+
+**Unique FeatureID**
+
+  If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one.
+
+**Prefix**
+
+  The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number.
+
+**Sample Columns**
+
+  Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File.
+
+--------------------------------------------------------------------------------
+
+**OUTPUT**
+
+
+**A Wide Dataset containing the FeatureID column and all columns selected as samples**
+
+  +------------+---------+---------+---------+-----+
+  | FeatureID  | sample1 | sample2 | sample3 | ... |
+  +============+=========+=========+=========+=====+
+  | met_1      | 10      | 20      | 10      | ... |
+  +------------+---------+---------+---------+-----+
+  | met_2      | 5       | 22      | 30      | ... |
+  +------------+---------+---------+---------+-----+
+  | met_3      | 30      | 27      | 2       | ... |
+  +------------+---------+---------+---------+-----+
+  | met_4      | 32      | 17      | 8       | ... |
+  +------------+---------+---------+---------+-----+
+  | ...        | ...     | ...     | ...     | ... |
+  +------------+---------+---------+---------+-----+
+
+  In the above example, *met* was input for Prefix
+
+
+**A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples**
+
+  +----------+---------+
+  | SampleID |         |
+  +==========+=========+
+  | sample1  |         |
+  +----------+---------+
+  | sample2  |         |
+  +----------+---------+
+  | sample3  |         |
+  +----------+---------+
+  | sample4  |         |
+  +----------+---------+
+  | ...      |         |
+  +----------+---------+
+
+
+**An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns**
+
+  +-------------+------------+-----+
+  | FeatureID   | m/z ratio  | ... |
+  +=============+============+=====+
+  | FeatureID_1 | 8.845      | ... |
+  +-------------+------------+-----+
+  | FeatureID_2 | 0.258      | ... |
+  +-------------+------------+-----+
+  | FeatureID_3 | 10.54      | ... |
+  +-------------+------------+-----+
+  | FeatureID_4 | 8.594      | ... |
+  +-------------+------------+-----+
+  | ...         | ...        | ... |
+  +-------------+------------+-----+
+
+
+  ]]>
+  </help>
+  <citations>
+    <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
+    author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
+    title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
+    journal = {BMC Bioinformatics},
+    year = {in press}
+    }</citation>
+    <citation type="bibtex">@article{garcia2010paintomics,
+    title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
+    author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
+    journal={Bioinformatics},
+    volume={27},
+    number={1},
+    pages={137--139},
+    year={2010},
+    publisher={Oxford University Press}
+    }</citation>
+  </citations>
+</tool>