view split_wide_dataset.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
children
line wrap: on
line source

<tool id="secimtools_split_input_wide_dataset" name="Create: Design, Wide, and Annotation datasets" version="@WRAPPER_VERSION@">
  <description>from an input wide dataset</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <stdio>
    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
  </stdio>
  <command detect_errors="exit_code"><![CDATA[
  split_wide_dataset.py
    -i=$input
    #if $cond_UniqID.hasUniqID == "y":
      -id=$cond_UniqID.uniqID
      #if $cond_UniqID.only_numbers.hasOnlyNumbers == "y":
        -p2=$cond_UniqID.only_numbers.prefix2
      #end if
    #else:
      -p=$cond_UniqID.prefix
    #end if
    -s=$samples
    -w=$wide
    -d=$design
    -a=$annot
  ]]></command>
  <inputs>
    <param name="input" type="data" format="tabular" label="Input Wide Dataset" help="Input tab separated Wide Dataset. If input is not tab separated, see TIP below."/>
    <conditional name="cond_UniqID">
      <param name="hasUniqID" type="select" display="radio" label="Does your Wide Dataset have a unique FeatureID column?">
        <option value="y">Yes</option>
        <option value="n">No</option>
      </param>
      <when value="y">
        <param name="uniqID" type="text" size="30" value="" label="Unique FeatureID" help="Name of the column in your Wide Dataset that contains the unique FeatureIDs."/>
        <conditional name="only_numbers">
          <param name="hasOnlyNumbers" type="select" display="radio" label="Are your unique FeatureIDs ONLY Numbers?">
            <option value="y">Yes</option>
            <option value="n">No</option>
          </param>
          <when value="y">
            <param name="prefix2" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="This prefix will be prepended to your NUMERIC unique FeatureID, with an underbar in between."/>
          </when>
          <when value="n" />
        </conditional>
      </when>
      <when value="n">
        <param name="prefix" type="text" size="30" value="" label="Prefix to use during generation of unique IDs" help="Unique IDs are required. You can input a prefix for the tool to use when creating a unique identifier (Optional). If you chose not to use a prefix, the tool-created uniqueID will be an underbar followed by a number."/>
      </when>
    </conditional>
    <param name="samples" label="Sample Columns" type="text" help="Enter the numbers (1-based) of the columns in your Wide Dataset that contain sample data. E.g. if your sample data is in columns 2-4 then enter '2,3,4' (no spaces). Columns that are not selected are treated as descriptor annotation columns. NOTE: annotation columns are expected to ALL be left of the data columns. "/>
  </inputs>
  <outputs>
    <data format="tabular" name="wide" label="${tool.name} on ${on_string}: Wide Dataset"/>
    <data format="tabular" name="design" label="${tool.name} on ${on_string}: Design Dataset"/>
    <data format="tabular" name="annot" label="${tool.name} on ${on_string}: Annotation Dataset"/>
  </outputs>
  <tests>
    <test>
      <param name="input" value="gene_input_dataset.tsv"/>
      <param name="prefix" value="Gene"/>
      <param name="hasUniqID" value="n"/>
      <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
      <output name="wide" file="gene_wide_dataset.tsv"/>
      <output name="design" file="gene_design.tsv"/>
      <output name="annot" file="gene_annotation.tsv"/>
    </test>
    <test>
      <param name="input" value="metabolite_input_dataset.tsv"/>
      <param name="prefix" value="Met"/>
      <param name="hasUniqID" value="n"/>
      <param name="samples" value="2,3,4,5,6,7,8,9,10,11"/>
      <output name="wide" file="metabolite_wide_dataset.tsv"/>
      <output name="design" file="metabolite_design.tsv"/>
      <output name="annot" file="metabolite_annotation.tsv"/>
    </test>
  </tests>
  <help><![CDATA[

**Tool Description**

  This tool takes a single file containing both feature data (e.g. gene or metabolite expression values) and annotation
  information (e.g. m/z ratio, compound name) and generates the following three files;

  (1) a wide dataset containing a unique row identifier and the expression values,
  (2) a wide annotation file with the unique row identifier and any non-data descriptor columns, and
  (3) a design file with a single column called ‘sampleID’ with the name of the columns containing the expression data.

  If the input dataset does not already contain a column with a unique identifier, the tool will create one.
  The user can specify a prefix for the unique identifier column (e.g. 'met' for metabolite data).  In cases where the input
  dataset contains a numeric identifier, the tool will append a user-specified prefix or, if no prefix is specified, an underbar.
  Since the user specifies which columns contain expression values, the resulting wide dataset contains only these data columns
  and the unique row identifier column.  Columns not specified as containing expression values are output into the annotation dataset.
  The resulting design file template contains a single column called ‘sampleID’ that contains the names of the user-specified samples
  in the input data file.  The design file can be modified by the user to include additional metadata columns.

--------------------------------------------------------------------------------

**INPUT**

**Example -  Wide Format Input Dataset**

  +---------+-----------+---------+---------+-----+
  | rowID   | m/z ratio | sample1 | sample2 | ... |
  +=========+===========+=========+=========+=====+
  | 1       | 8.845     | 20      | 10      | ... |
  +---------+-----------+---------+---------+-----+
  | 2       | 0.258     | 22      | 30      | ... |
  +---------+-----------+---------+---------+-----+
  | 3       | 10.54     | 27      | 2       | ... |
  +---------+-----------+---------+---------+-----+
  | 4       | 8.594     | 17      | 8       | ... |
  +---------+-----------+---------+---------+-----+
  | ...     | ...       | ...     | ...     | ... |
  +---------+-----------+---------+---------+-----+

    **NOTE:** The input dataset has features in rows and samples in columns. Any descriptor columns that are present will be used to populate the Annotation File.

**Unique FeatureID**

  If the Input Dataset has a column with unique FeatureIDs, the user can specify the name of this column. If the Input Dataset does not have a column with unique FeatureIDs, the tool will create a numeric one.

**Prefix**

  The user can add a prefix to the tool-generated unique FeatureID, if desired. Example: If met is input then the unique FeatureID column will consist of met\_ followed by a number.

**Sample Columns**

  Name of the columns in the Input Dataset that contain sample information. All columns not specified as samples will be used to populate the Annotation File.

--------------------------------------------------------------------------------

**OUTPUT**


**A Wide Dataset containing the FeatureID column and all columns selected as samples**

  +------------+---------+---------+---------+-----+
  | FeatureID  | sample1 | sample2 | sample3 | ... |
  +============+=========+=========+=========+=====+
  | met_1      | 10      | 20      | 10      | ... |
  +------------+---------+---------+---------+-----+
  | met_2      | 5       | 22      | 30      | ... |
  +------------+---------+---------+---------+-----+
  | met_3      | 30      | 27      | 2       | ... |
  +------------+---------+---------+---------+-----+
  | met_4      | 32      | 17      | 8       | ... |
  +------------+---------+---------+---------+-----+
  | ...        | ...     | ...     | ...     | ... |
  +------------+---------+---------+---------+-----+

  In the above example, *met* was input for Prefix


**A Design Dataset template containing a column called sampleID with the column headers from the input dataset that were chosen as samples**

  +----------+---------+
  | SampleID |         |
  +==========+=========+
  | sample1  |         |
  +----------+---------+
  | sample2  |         |
  +----------+---------+
  | sample3  |         |
  +----------+---------+
  | sample4  |         |
  +----------+---------+
  | ...      |         |
  +----------+---------+


**An Annotation Dataset containing the unique FeatureID column and any non-sample descriptor columns**

  +-------------+------------+-----+
  | FeatureID   | m/z ratio  | ... |
  +=============+============+=====+
  | FeatureID_1 | 8.845      | ... |
  +-------------+------------+-----+
  | FeatureID_2 | 0.258      | ... |
  +-------------+------------+-----+
  | FeatureID_3 | 10.54      | ... |
  +-------------+------------+-----+
  | FeatureID_4 | 8.594      | ... |
  +-------------+------------+-----+
  | ...         | ...        | ... |
  +-------------+------------+-----+


  ]]>
  </help>
  <citations>
    <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
    author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
    title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
    journal = {BMC Bioinformatics},
    year = {in press}
    }</citation>
    <citation type="bibtex">@article{garcia2010paintomics,
    title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
    author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
    journal={Bioinformatics},
    volume={27},
    number={1},
    pages={137--139},
    year={2010},
    publisher={Oxford University Press}
    }</citation>
  </citations>
</tool>