view Marea/marea_cluster.xml @ 2:3b3d0e5d0802 draft

Uploaded
author bimib
date Wed, 07 Nov 2018 07:16:37 -0500
parents 9e63d5f02d62
children 02577e65dcca
line wrap: on
line source

<tool id="MaREA_cluester" name="MaREA cluster analysis">
    <description>of Reaction Activity Scores</description>
    <macros>
        <import>marea_macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package">pandas</requirement>
        <requirement type="package">scipy</requirement>
        <requirement type="package">cobrapy</requirement>
        <requirement type="package">python-libsbml</requirement>
        <requirement type="package">scikit-learn</requirement>
        <requirement type="package">matplotlib</requirement>
    </requirements>
    <command>
        <![CDATA[
      	python $__tool_directory__/marea_cluster.py
        --rules_selector $cond_rule.rules_selector
        #if $cond_rule.rules_selector == 'Custom':
            --custom ${cond_rule.Custom_rules}
        #end if
        --cond_hier $cond_hier.hier
        #if $cond_hier.hier == 'yes':
            --linkage ${cond_hier.linkage}
            --dendro $dendrogram
        #end if
        --k_max $k_max
        --k_min $k_min
        --data $input
        --name $name
      	--none $None
      	--tool_dir $__tool_directory__
        --out_log $log
        --elbow $elbow
        ]]>
    </command>
    <inputs>
        <conditional name="cond_rule">
            <expand macro="options"/>
            <when value="Custom">
                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
            </when>
        </conditional>
        <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
        <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" />
        <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/>
        <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/>
        <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" />
	<conditional name="cond_hier">
            <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):">
                <option value="no" selected="true">no</option>
                <option value="yes">yes</option>
            </param>
            <when value="yes">
                <param name="linkage" argument="--linkage" type="select" label="Linkage type:">
                    <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>
                    <option value="complete">Complete: maximum distance between all observations of two sets</option>
                    <option value="average">Average: average distance between all observations of two sets</option>
                </param>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data format="txt" name="log" label="Log" />
        <data format="pdf" name="dendrogram" label="$name dendrogram">
            <filter>cond_hier['hier'] == 'yes'</filter>
        </data>
        <data format="pdf" name="elbow" label="$name elbow evaluation method" />
        <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">
            <discover_datasets pattern="__name_and_ext__" directory="cluster_out" />
        </collection>
    </outputs>

    <help>
<![CDATA[

What it does
-------------

This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.

Accepted files are:
    1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*");
    2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*").

Optional files:
    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:

        * (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
        * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.

The tool generates:
    1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to;
    2) Log: a log file (.txt);
    3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method;
    4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering).

RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.


Example input
-------------

**RNA-seq dataset**:						

@DATASET_EXEMPLE@

**Custom Rules Dataset**:

@CUSTOM_RULES_EXEMPLE@

**Custom Map**:

*see the generated HMRcore .svg map for example*



.. class:: infomark

**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.

.. class:: warningmark

If dendrogram it's too populated, each path and label can be not clear.

@REFERENCE@

.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724
.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj


]]>
    </help>
    <expand macro="citations" />
</tool>