diff Marea/marea_cluster.xml @ 1:9e63d5f02d62 draft

Uploaded
author bimib
date Wed, 07 Nov 2018 07:07:46 -0500
parents 23ac9cf12788
children 3b3d0e5d0802
line wrap: on
line diff
--- a/Marea/marea_cluster.xml	Tue Nov 06 03:16:21 2018 -0500
+++ b/Marea/marea_cluster.xml	Wed Nov 07 07:07:46 2018 -0500
@@ -1,12 +1,12 @@
 <tool id="MaREA_cluester" name="MaREA cluster analysis">
     <description>of Reaction Activity Scores</description>
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
     <requirements>
-        <requirement type="package">pandas</requirement>
         <requirement type="package">scikit-learn</requirement>
-        <requirement type="package">scipy</requirement>
         <requirement type="package">matplotlib</requirement>
-        <requirement type="package">cobrapy</requirement>
-        <requirement type="package">python-libsbml</requirement>
     </requirements>
     <command>
         <![CDATA[
@@ -32,27 +32,23 @@
     </command>
     <inputs>
         <conditional name="cond_rule">
-            <param name="rules_selector" type="select" label="Gene-Protein-Reaction rules:">
-                <option value="HMRcore" selected="true">HMRcore rules</option>
-                <option value="Recon">Recon 2.2 rules</option>
-                <option value="Custom">Custom rules</option>
-            </param>
+            <expand macro="options" />
             <when value="Custom">
-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules"/>
+                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
             </when>
         </conditional>
-        <param name="input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples"/>
-        <param name="name" type="text" label="Output name prefix" value = "dataset"/>
-        <param name="k_min" type="integer" size="20" value="3" min="2" max="30" label="min number of clusters (k) to be tested (k-means)"/>
-        <param name="k_max" type="integer" size="20" value="3" min="2" max="30" label="max number of clusters (k) to be tested (k-means)"/>
-        <param name="None" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="if NO is selected (A and NaN) is solved as (NaN)"/>
+        <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
+        <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" />
+        <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/>
+        <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/>
+        <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" />
 	<conditional name="cond_hier">
-            <param name="hier" type="select" label="Produce dendrogram (hierarchical clustering):">
+            <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):">
                 <option value="no" selected="true">no</option>
                 <option value="yes">yes</option>
             </param>
             <when value="yes">
-                <param name="linkage" type="select" label="Linkage type:">
+                <param name="linkage" argument="--linkage" type="select" label="Linkage type:">
                     <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>
                     <option value="complete">Complete: maximum distance between all observations of two sets</option>
                     <option value="average">Average: average distance between all observations of two sets</option>
@@ -60,38 +56,80 @@
             </when>
         </conditional>
     </inputs>
+
     <outputs>
-        <data format="txt" name="log" label="Log"/>
+        <data format="txt" name="log" label="Log" />
         <data format="pdf" name="dendrogram" label="$name dendrogram">
             <filter>cond_hier['hier'] == 'yes'</filter>
         </data>
-        <data format="pdf" name="elbow" label="$name elbow evaluation method"/>
+        <data format="pdf" name="elbow" label="$name elbow evaluation method" />
         <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">
-            <discover_datasets pattern="__name_and_ext__" directory="cluster_out"/>
+            <discover_datasets pattern="__name_and_ext__" directory="cluster_out" />
         </collection>
     </outputs>
+
     <help>
+<![CDATA[
+
+What it does
+-------------
+
+This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
+
+Accepted files are:
+    1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*");
+    2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*").
+
+Optional files:
+    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
+
+        * (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
+        * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
+    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.
 
-.. class:: warningmark
+The tool generates:
+    1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to;
+    2) Log: a log file (.txt);
+    3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method;
+    4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering).
+
+RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.
+
+
+Example input
+-------------
 
-This tool expects input datasets consisting of tab-delimited columns.
+**RNA-seq dataset**:						
+
+@DATASET_EXEMPLE@
+
+**Custom Rules Dataset**:
+
+@CUSTOM_RULES_EXEMPLE@
+
+**Custom Map**:
+
+*see the generated HMRcore .svg map for example*
+
+
 
 .. class:: infomark
 
-**TIP:** If your data is not TAB delimited, use *Text Manipulation > Convert delimiters to TAB*
+**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
+
+.. class:: warningmark
+
+If dendrogram it's too populated, each path and label can be not clear.
+
+@REFERENCE@
 
+.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724
+.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj
+
+
+]]>
     </help>
+    <expand macro="citations" />
 </tool>
 	
 	
-	
-	
-	
-	
-	
-	
-	
-	
-	
-	
-