view split_wide_dataset.xml @ 1:ec9ee8edb84d draft

Initial upload of 21.6.10 release.
author malex
date Fri, 18 Jun 2021 20:23:19 +0000
parents
children 2c218a253d56
line wrap: on
line source

<tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@">
  <description>from an Input wide dataset</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <stdio> 
    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/> 
  </stdio> 
  <command detect_errors="exit_code"><![CDATA[
  split_wide_dataset.py
    -i=$input
    #if $cond_UniqID.hasUniqID == "y":
      -id=$cond_UniqID.uniqID
      #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y":
        -p2=$cond_UniqID.only_numbers.prefix2
      #end if
    #else:
      -p=$cond_UniqID.prefix
    #end if
    -s=$samples
    -w=$wide
    -d=$design
    -a=$annot
  ]]></command>
  <inputs>
    <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/>
    <conditional name="cond_UniqID">
      <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?">
        <option value="y">Yes</option>
        <option value="n">No</option>
      </param>
      <when value="y">
        <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/>
        <conditional name="only_numbers">
          <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?">
            <option value="y">Yes</option>
            <option value="n">No</option>
          </param>
          <when value="y">
            <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/>
          </when>
        </conditional>
      </when>
      <when value="n">
        <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/>
      </when>
    </conditional>
    <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/>
  </inputs>
  <outputs>
    <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/>
    <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/>
    <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/>
  </outputs>
  <tests>
    <test>
      <param name="input" value="gene_input_dataset_01fhl.tsv"/>
      <param name="prefix" value="Gene"/>
      <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
      <output name="wide" file="gene_wide_dataset_01fhl.tsv"/>
      <output name="design" file="gene_design_file_01fhl.tsv"/>
      <output name="annot" file="gene_annot_file_01fhl.tsv"/>
    </test>
    <test>
      <param name="input" value="metabolite_input_dataset_01fhl.tsv"/>
      <param name="prefix" value="Met"/>
      <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
      <output name="wide" file="metabolite_wide_dataset_01fhl.tsv"/>
      <output name="design" file="met_design_file_01fhl.tsv"/>
      <output name="annot" file="met_annot_file_01fhl.tsv"/>
    </test>
  </tests>
  <help><![CDATA[

**Tool Description**

  This tool can be used to perform two tasks 1) convert a single file that contains 
  data and annotation in wide format to two files in wide format, one with data and 
  one with annotation 2) create a design file template that will be compatible with 
  the wide data and annotation files. The tool will automatically check for a column 
  containing unique feature identifiers (FeatureIDs). If no unique FeatureID is located 
  the tool will generate one. The user can specify a prefix for the unique FeatureID 
  (e.g. 'met' for metabolite data). The Design Dataset is a template with an exact 
  match to the columns with a single column called 'SampleID' that contains the names 
  of the samples in the input Wide Dataset. This Design Dataset can be modified by 
  the user to include metadata columns. The tool also creates a separate Annotation 
  Dataset containing the unique FeatureIDs (user-specified or generated by the tool) 
  and any non-sample descriptor columns that were present in the input wide dataset 
  (such as m/z ratio, retention time, compound name, etc.). Finally, the tool creates 
  a 'clean' Wide Dataset containing only samples in columns and features in rows.

--------------------------------------------------------------------------------

**INPUT**

**Example -  Wide Format Input Dataset**

  +---------+-----------+---------+---------+-----+
  | rowID   | m/z ratio | sample1 | sample2 | ... |
  +=========+===========+=========+=========+=====+
  | 1       | 8.845     | 20      | 10      | ... |
  +---------+-----------+---------+---------+-----+
  | 2       | 0.258     | 22      | 30      | ... |
  +---------+-----------+---------+---------+-----+
  | 3       | 10.54     | 27      | 2       | ... |
  +---------+-----------+---------+---------+-----+
  | 4       | 8.594     | 17      | 8       | ... |
  +---------+-----------+---------+---------+-----+
  | ...     | ...       | ...     | ...     | ... |
  +---------+-----------+---------+---------+-----+

    **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File. 

**Unique FeatureID**

  If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one.

**Prefix**

  The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number.

**Sample Columns**

  Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File.

--------------------------------------------------------------------------------

**OUTPUT**


**A Wide Dataset containing the FeatureID column and all columns selected as samples**

  +------------+---------+---------+---------+-----+
  | FeatureID  | sample1 | sample2 | sample3 | ... |
  +============+=========+=========+=========+=====+
  | met_1      | 10      | 20      | 10      | ... |
  +------------+---------+---------+---------+-----+
  | met_2      | 5       | 22      | 30      | ... |
  +------------+---------+---------+---------+-----+
  | met_3      | 30      | 27      | 2       | ... |
  +------------+---------+---------+---------+-----+
  | met_4      | 32      | 17      | 8       | ... |
  +------------+---------+---------+---------+-----+
  | ...        | ...     | ...     | ...     | ... |
  +------------+---------+---------+---------+-----+

  In the above example, *met* was input for Prefix


**A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples**

  +----------+---------+
  | SampleID |         |
  +==========+=========+
  | sample1  |         |
  +----------+---------+
  | sample2  |         |
  +----------+---------+
  | sample3  |         |
  +----------+---------+
  | sample4  |         |
  +----------+---------+
  | ...      |         |
  +----------+---------+


**An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns**

  +-------------+------------+-----+
  | FeatureID   | m/z ratio  | ... |
  +=============+============+=====+
  | FeatureID_1 | 8.845      | ... |
  +-------------+------------+-----+
  | FeatureID_2 | 0.258      | ... |
  +-------------+------------+-----+
  | FeatureID_3 | 10.54      | ... |
  +-------------+------------+-----+
  | FeatureID_4 | 8.594      | ... |
  +-------------+------------+-----+
  | ...         | ...        | ... |
  +-------------+------------+-----+


  ]]>
  </help>
  <citations>
    <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
    author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
    title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
    journal = {BMC Bioinformatics},
    year = {in press}
    }</citation>
    <citation type="bibtex">@article{garcia2010paintomics,
    title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
    author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
    journal={Bioinformatics},
    volume={27},
    number={1},
    pages={137--139},
    year={2010},
    publisher={Oxford University Press}
    }</citation>
  </citations>
</tool>