diff Marea/marea_cluster.xml @ 16:c71ac0bb12de draft

Uploaded
author bimib
date Tue, 01 Oct 2019 06:05:13 -0400
parents 68a5f2db55b9
children 640f303d0cec
line wrap: on
line diff
--- a/Marea/marea_cluster.xml	Tue Oct 01 06:03:12 2019 -0400
+++ b/Marea/marea_cluster.xml	Tue Oct 01 06:05:13 2019 -0400
@@ -1,148 +1,92 @@
-<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.0">
-    <description>of Reaction Activity Scores</description>
-    <macros>
-        <import>marea_macros.xml</import>
-    </macros>
-    <requirements>
-        <requirement type="package" version="0.23.0">pandas</requirement>
-        <requirement type="package" version="1.1.0">scipy</requirement>
-        <requirement type="package" version="0.10.1">cobra</requirement>
-        <requirement type="package" version="0.19.1">scikit-learn</requirement>
-        <requirement type="package" version="2.2.2">matplotlib</requirement>
-    </requirements>
-    <command detect_errors="exit_code">
-        <![CDATA[
-      	python $__tool_directory__/marea_cluster.py
-        --rules_selector $cond_rule.rules_selector
-        #if $cond_rule.rules_selector == 'Custom':
-            --custom ${cond_rule.Custom_rules}
-        #end if
-        --cond_hier $cond_hier.hier
-        #if $cond_hier.hier == 'yes':
-            --linkage ${cond_hier.linkage}
-            --dendro $dendrogram
-        #end if
-        --k_max $k_max
-        --k_min $k_min
-        --data $input
-        --name $name
-      	--none $None
-      	--tool_dir $__tool_directory__
-        --out_log $log
-        --elbow $elbow
-        ]]>
-    </command>
-    <inputs>
-        <conditional name="cond_rule">
-            <expand macro="options"/>
-            <when value="Custom">
-                <param name="Custom_rules" type="data" format="tabular, csv, tsv, xml" label="Custom rules" />
-            </when>
-            <when value="HMRcore">
-            </when>
-            <when value="Recon">
-            </when>
-        </conditional>
-        <param name="input" argument="--data" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
-        <param name="name" argument="--name" type="text" label="Output name prefix" value="dataset" />
-        <param name="k_min" argument="--k_min" type="integer" size="20" value="3" min="2" max="30" label="Min number of clusters (k) to be tested (k-means)"/>
-        <param name="k_max" argument="--k_max" type="integer" size="20" value="3" min="2" max="30" label="Max number of clusters (k) to be tested (k-means)"/>
-        <param name="None" argument="--none" type="boolean" truevalue="true" falsevalue="false" checked="true" label="(A and NaN) solved as (A)?" help="If NO is selected, (A and NaN) is solved as (NaN)" />
-	<conditional name="cond_hier">
-            <param name="hier" argument="--cond_hier" type="select" label="Produce dendrogram (hierarchical clustering):">
-                <option value="no" selected="true">no</option>
-                <option value="yes">yes</option>
-            </param>
-            <when value="yes">
-                <param name="linkage" argument="--linkage" type="select" label="Linkage type:">
-                    <option value="single" selected="true">Single: minimum distance between all observations of two sets</option>
-                    <option value="complete">Complete: maximum distance between all observations of two sets</option>
-                    <option value="average">Average: average distance between all observations of two sets</option>
-                </param>
-            </when>
-            <when value="no">
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="txt" name="log" label="Log" />
-        <data format="pdf" name="dendrogram" label="$name dendrogram">
-            <filter>cond_hier['hier'] == 'yes'</filter>
-        </data>
-        <data format="pdf" name="elbow" label="$name elbow evaluation method" />
-        <collection name="cluster_out" type="list" label="Clusters $k_min - $k_max">
-            <discover_datasets pattern="__name_and_ext__" directory="cluster_out" />
-        </collection>
-    </outputs>
-    <tests>
-        <test>
-            <param name="k_min" value="4"/>
-            <output name="log" file="log.txt"/>
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-
-What it does
--------------
-
-This tool performs cluster analysis of RNA-seq dataset(s) based of Graudenzi et al."`MaREA`_: Metabolic feature extraction, enrichment and visualization of RNAseq data" bioRxiv (2018): 248724.
-
-Accepted files are:
-    1) For "Recon 2.2 rules" or "HMRcore rules" options: RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*");
-    2) For "Custom rules" option: custom rules dataset, custom map (.svg) and RNA-seq dataset. The user can specify a label of output prefix (as e.g. "K=3 *dataset*" and "K=4 *MyDataset*").
-
-Optional files:
-    - custom GPR (Gene-Protein-Reaction) rules. Two accepted formats:
-
-        * (Cobra Toolbox and CobraPy compliant) xml of metabolic model;
-        * .csv file specifyig for each reaction ID (column 1) the corresponding GPR rule (column 2).
-    - custom svg map. Graphical elements must have the same IDs of reactions. See HmrCore svg map for an example.
-
-The tool generates:
-    1) Clusters n1 - n2 (n1 and n2 refer to min and max number of clusters): class-files (as many files as the chosen different number of clusters k to be tested) specifying the class/condition each sample belongs to;
-    2) Log: a log file (.txt);
-    3) *dataset* elbow evaluation method: diagram (.pdf) of elbow evaluation method;
-    4) *dataset* dendrogram (optional): dendrogram (.pdf) if the user chooses to produce a dendrogram (hierachical clustering).
-
-RNA-seq datasets format: tab-separated text files, reporting the expression level (e.g., TPM, RPKM, ...) of each gene (row) for a given sample (column). Header: sample ID.
-
-
-Example input
--------------
-
-**RNA-seq dataset**:						
-
-@DATASET_EXEMPLE1@
-
-**Custom Rules Dataset**:
-
-@CUSTOM_RULES_EXEMPLE@
-
-**Custom Map**:
-
-*see the generated HMRcore .svg map for example*
-
-
-
-.. class:: infomark
-
-**TIP**: If your data is not TAB delimited, use `Convert delimiters to TAB`_.
-
-.. class:: warningmark
-
-If dendrogram it's too populated, each path and label can be not clear.
-
-@REFERENCE@
-
-.. _MaREA: https://www.biorxiv.org/content/early/2018/01/16/248724
-.. _Convert delimiters to TAB: https://usegalaxy.org/?tool_id=Convert+characters1&version=1.0.0&__identifer=6t22teyofhj
-
-
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
-	
-	
+<tool id="MaREA_cluester" name="MaREA cluster analysis" version="1.0.1">
+    <description>of Reaction Activity Scores - 1.0.1</description>
+    <macros>
+        <import>marea_macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="0.23.0">pandas</requirement>
+        <requirement type="package" version="1.1.0">scipy</requirement>
+        <requirement type="package" version="0.10.1">cobra</requirement>
+        <requirement type="package" version="0.21.3">scikit-learn</requirement>
+        <requirement type="package" version="2.2.2">matplotlib</requirement>
+	<requirement type="package" version="1.17">numpy</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+      	python $__tool_directory__/marea_cluster.py
+        --input $input
+      	--tool_dir $__tool_directory__
+        --out_log $log
+        #if $data.clust_type == 'kmeans':
+        	--k_min ${data.k_min}
+        	--k_max ${data.k_max}
+        	--elbow ${data.elbow}
+        	--silhouette ${data.silhouette}
+        #end if
+        #if $data.clust_type == 'dbscan':
+        	#if $data.dbscan_advanced.advanced == 'true'
+        		--eps ${data.dbscan_advanced.eps}
+        		--min_samples ${data.dbscan_advanced.min_samples}
+        	#end if
+        #end if
+        #if $data.clust_type == 'hierarchy':
+        	--k_min ${data.k_min}
+        	--k_max ${data.k_max}
+      	#end if
+        ]]>
+    </command>
+    <inputs>
+        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="RNAseq of all samples" />
+        
+        <conditional name="data">
+			<param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
+                	<option value="kmeans" selected="true">KMeans</option>
+                	<option value="dbscan">DBSCAN</option>
+                	<option value="hierarchy">Agglomerative Hierarchical</option>
+        	</param>
+        	<when value="kmeans">
+        		<param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
+        		<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
+        		<param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
+        		<param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
+        	</when>
+        	<when value="dbscan">
+        		<conditional name="dbscan_advanced">
+        			<param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
+        				<option value="true">Yes</option>
+        				<option value="false">No</option>
+        			</param>
+        			<when value="false"></when>
+        			<when value="true">
+        				<param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
+        				<param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
+        			
+        			</when>
+        		</conditional>   	
+        	</when>
+        	<when value="hierarchy">
+        		<param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
+        		<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
+        	</when>
+		</conditional>
+    </inputs>
+
+    <outputs>
+        <data format="txt" name="log" label="${tool.name} - Log" />
+        <collection name="results" type="list" label="${tool.name} - Results">
+            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
+        </collection>
+    </outputs>
+    <help>
+<![CDATA[
+
+What it does
+-------------
+
+
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
+	
+