diff high_dim_visu.xml @ 4:8e17c31c536a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_high_dimension_visualization commit 1282ac9de7c926ab251f88afb2453f52c8b14200
author artbio
date Thu, 11 Jul 2019 12:31:28 -0400
parents 8e44c9e18a56
children 569334568afa
line wrap: on
line diff
--- a/high_dim_visu.xml	Thu Jun 27 06:17:08 2019 -0400
+++ b/high_dim_visu.xml	Thu Jul 11 12:31:28 2019 -0400
@@ -1,12 +1,12 @@
-<tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="0.9.3">
+<tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="0.9.4">
     <description>from highly dimensional expression data</description>
     <requirements>
-        <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
-        <requirement type="package" version="1.39=r3.3.2_0">r-factominer</requirement>
-        <requirement type="package" version="1.0.5=r3.3.2_0">r-factoextra</requirement>
-        <requirement type="package" version="0.13=r3.3.2_0">r-rtsne</requirement>
-        <requirement type="package" version="2.2.1=r3.3.2_0">r-ggplot2</requirement>
-        <requirement type="package" version="0.4.1=r3.3.2_0">r-ggfortify</requirement>
+        <requirement type="package" version="1.6.2=r35h6115d3f_0">r-optparse</requirement>
+        <requirement type="package" version="1.42=r35h6115d3f_0">r-factominer</requirement>
+        <requirement type="package" version="1.0.5">r-factoextra</requirement>
+        <requirement type="package" version="0.15=r351he1b5a44_0">r-rtsne</requirement>
+        <requirement type="package" version="0.4.7=r351h6115d3f_0">r-ggfortify</requirement>
+        <requirement type="package" version="1.1.9=r351h0357c0b_0">r-clusterr</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" description="Tool exception" />
@@ -53,13 +53,21 @@
                 --HCPC_max '$visualisation.HCPC_max'
                 --HCPC_clusterCA '$visualisation.HCPC_clusterCA'
                 --HCPC_kk '$visualisation.HCPC_kk'
+                #if $visualisation.res_clustering == "yes":
+                    --HCPC_clust '$HCPC_clust'
+                #end if 
             #end if
             
             #if $visualisation.visu_choice == "PCA":
                 --PCA_npc '$visualisation.PCA_npc'
+                --PCA_x_axis '$visualisation.PCA_x_axis'
+                --PCA_y_axis '$visualisation.PCA_y_axis'
             #end if
             
-           
+            #if $visualisation.visu_choice == "HCPC" and $factor_condition.factor_choice == "Yes":
+                --mutual_info '$mutual_info'
+            #end if            
+
             --pdf_out '$pdf_out'
             
 ]]></command>
@@ -111,10 +119,10 @@
 					<option value="TRUE">Yes</option>
 					<option value="FALSE" selected="true">False</option>
 				</param>
-				<param name="Rtsne_normalize"  type="select" label="Normalisation of data"
-				             help="Should variables (gene expressions) be normalized internally prior to distance calculations? " > 
-					<option value="TRUE" selected="true">Yes</option>
-					<option value="FALSE">False</option>
+				<param name="Rtsne_normalize"  type="select" label="Normalisation of data"	
+				             help="Should variables (gene expressions) be normalized internally prior to distance calculations? " > 	
+					<option value="TRUE" selected="true">Yes</option>	
+					<option value="FALSE">False</option>	
 				</param>
                 <param name="Rtsne_perplexity" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbr observations)-1)/3" /> 
                 <param name="Rtsne_theta" value="1.0" type="float" label="theta (t-SNE)"/>
@@ -154,9 +162,15 @@
 				</param>
 				 <param name="HCPC_kk" value="-1" type="text" label="kk, Number of clusters used in a Kmeans preprocessing "
                        help="No k-means consolidation is done if a kk value is provided (default=-1)" />
+                <param label="Return HCPC clustering table" name="res_clustering" type="select">
+                    <option value="no" selected="True">No</option>
+                    <option value="yes">Yes</option>
+                </param>
             </when>
             <when value="PCA">
             	  <param name="PCA_npc" value="5" type="integer" label="Number of principal components to keep" help="The number of dimensions which are kept for PCA analysis (default=5)" />
+                  <param name="PCA_x_axis" value="1" type="integer" label="First principal component to plot" help="X axis for PCA plot (default=1)" />
+                  <param name="PCA_y_axis" value="2" type="integer" label="Second principal component to plot" help="Y axis for PCA plot (default=2)" />
             </when>
         </conditional>
             <param label="Return scatter plot table coordinates" name="coord" type="select">
@@ -170,6 +184,12 @@
         <data name="table_coordinates" format="tabular" label="Scatter plot coordinates from ${visualisation.visu_choice} of ${on_string}" >
             <filter>coord == 'yes'</filter>
         </data>
+        <data name="mutual_info" format="txt" label="External validation of clustering from ${visualisation.visu_choice} of ${on_string}" >
+            <filter>visualisation['visu_choice'] == 'HCPC' and factor_condition['factor_choice'] == 'Yes'</filter>
+        </data>
+        <data name="HCPC_clust" format="tabular" label="Clustering table from ${visualisation.visu_choice} of ${on_string}" >
+            <filter>visualisation['visu_choice'] == 'HCPC' and visualisation['res_clustering'] == 'yes'</filter>
+        </data>
     </outputs>
     <tests>
         <!-- test PCA -->
@@ -187,6 +207,17 @@
             <param name="factor_choice" value="No" />
             <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/>
         </test>
+        <!-- test PCA PC2 vs PC3 -->
+        <test>
+            <param name="input" value="cpm_input.tsv" ftype="txt"/>
+            <param name="labels" value="no" />
+            <param name="visu_choice" value="PCA" />
+            <param name="factor_choice" value="No" />
+            <param name="PCA_x_axis" value="2" />
+            <param name="PCA_y_axis" value="3" />
+            <output name="pdf_out" file="pca.2vs3.pdf" ftype="pdf"/>
+        </test>
+
         <!-- test Coordinates tables on PCA -->
         <test>
             <param name="input" value="cpm_input.tsv" ftype="txt"/>
@@ -213,7 +244,16 @@
             <param name="visu_choice" value="PCA" />
             <param name="factor_choice" value="Yes" />
             <param name="factor" value="2-lev_factor.tsv" ftype="txt"/>
-            <output name="pdf_out" file="pca.nolabels.2-lev-factor.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="pca.nolabels.2-lev-factor.pdf" compare="sim_size" ftype="pdf"/>
+        </test>
+        <!-- test numerical factor contrasting on PCA -->
+        <test>
+            <param name="input" value="cpm_input.tsv" ftype="txt"/>
+            <param name="labels" value="no" />
+            <param name="visu_choice" value="PCA" />
+            <param name="factor_choice" value="Yes" />
+            <param name="factor" value="numeric_factor.tsv" ftype="txt"/>
+            <output name="pdf_out" file="pca.nolabels.numerical-factor.pdf" compare="sim_size" ftype="pdf"/>
         </test>
         <test>
             <param name="input" value="cpm_input.tsv" ftype="txt"/>
@@ -221,7 +261,7 @@
             <param name="visu_choice" value="PCA" />
             <param name="factor_choice" value="Yes" />
             <param name="factor" value="shuffled_factor.tsv" ftype="txt"/>
-            <output name="pdf_out" file="pca.nolabels.factors.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="pca.nolabels.factors.pdf" compare="sim_size" ftype="pdf"/>
         </test>
         <!-- test HCPC -->
         <test>
@@ -230,7 +270,7 @@
             <param name="visu_choice" value="HCPC" />
             <param name="HCPC_npc" value="5"/>
             <param name="HCPC_ncluster" value="-1"/>
-            <output name="pdf_out" file="hcpc.labels.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="hcpc.labels.pdf" compare="sim_size" ftype="pdf"/>
         </test>
         <!-- test factor contrasting on HCPC -->
         <test>
@@ -239,9 +279,12 @@
             <param name="visu_choice" value="HCPC" />
             <param name="HCPC_npc" value="5"/>
             <param name="HCPC_ncluster" value="-1"/>
+            <param name="res_clustering" value="yes"/>
             <param name="factor_choice" value="Yes" />
             <param name="factor" value="factor.tsv" ftype="txt"/>
-            <output name="pdf_out" file="hcpc.nolabels.factor.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="hcpc.nolabels.factor.pdf" compare="sim_size" ftype="pdf"/>
+            <output name="mutual_info" file="hcpc.factor.extval.txt" ftype="txt"/>
+            <output name="HCPC_clust" file="hcpc.clusters.tab" ftype="tabular"/>
         </test>
         <test>
             <param name="input" value="cpm_input.tsv" ftype="txt"/>
@@ -249,7 +292,7 @@
             <param name="HCPC_npc" value="5"/>
             <param name="HCPC_ncluster" value="-1"/>
             <param name="visu_choice" value="HCPC" />
-            <output name="pdf_out" file="hcpc.nolabels.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="hcpc.nolabels.pdf" compare="sim_size" ftype="pdf"/>
         </test>
         <test>
             <param name="input" value="cpm_input.tsv" ftype="txt"/>
@@ -271,7 +314,7 @@
             <param name="HCPC_metric" value="euclidian"/>
             <param name="HCPC_npc" value="4" />
             <param name="HCPC_clusterCA" value="cols" />
-            <output name="pdf_out" file="hcpc-3.labels.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="hcpc-3.labels.pdf" compare="sim_size" ftype="pdf"/>
             <output name="table_coordinates" file="hcpc-3.coord.tab" ftype="tabular"/>
         </test>
         <!-- test t-SNE -->
@@ -282,7 +325,7 @@
             <param name="Rtsne_seed" value="49"/>
             <param name="Rtsne_perplexity" value="10"/>
             <param name="Rtsne_theta" value="1" />
-            <output name="pdf_out" file="tsne.labels.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="tsne.labels.pdf" ftype="pdf" compare="sim_size" delta="500"/>
         </test>
         <test>
             <param name="input" value="cpm_input.tsv" ftype="txt"/>
@@ -291,22 +334,9 @@
             <param name="Rtsne_seed" value="49"/>
             <param name="Rtsne_perplexity" value="10"/>
             <param name="Rtsne_theta" value="1" />
-            <output name="pdf_out" file="tsne.nolabels.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="tsne.nolabels.pdf" ftype="pdf" compare="sim_size" delta="500"/>
         </test>
-       	<test>
-            <param name="input" value="cpm_input.tsv" ftype="txt"/>
-            <param name="labels" value="no" />
-            <param name="visu_choice" value="tSNE" />
-            <param name="coord" value="yes" />
-            <param name="Rtsne_seed" value="42"/>
-            <param name="Rtsne_perplexity" value="5.0"/>
-            <param name="Rtsne_theta" value="1.0" />
-            <param name="Rtsne_dims" value="3" />
-            <param name="Rtsne_exaggeration_factor" value="15.0" />
-            <output name="pdf_out" file="tsne-2.nolabels.pdf" ftype="pdf"/>
-            <output name="table_coordinates" file="tsne-2.coord.tab" ftype="tabular"/>
-        </test>
-        <!-- test factor contrasting on t-SNE -->
+                <!-- test factor contrasting on t-SNE -->
         <test>
             <param name="input" value="cpm_input.tsv" ftype="txt"/>
             <param name="labels" value="yes" />
@@ -316,7 +346,20 @@
             <param name="Rtsne_seed" value="49"/>
             <param name="Rtsne_perplexity" value="10"/>
             <param name="Rtsne_theta" value="1" />
-            <output name="pdf_out" file="tsne.labels.factor.pdf" ftype="pdf"/>
+            <output name="pdf_out" file="tsne.labels.factor.pdf" ftype="pdf" compare="sim_size" delta="500"/>
+        </test>
+       	<test>
+            <param name="input" value="cpm_input.tsv" ftype="txt"/>
+            <param name="labels" value="no" />
+            <param name="visu_choice" value="tSNE" />
+            <param name="Rtsne_seed" value="49" />
+            <param name="coord" value="yes" />
+            <param name="Rtsne_dims" value="3" />
+            <param name="Rtsne_perplexity" value="10"/>
+            <param name="Rtsne_theta" value="1" />
+            <param name="Rtsne_normalize" value="FALSE" />
+            <output name="pdf_out" file="tsne-2.nolabels.pdf" ftype="pdf" compare="sim_size" delta="500"/>
+            <output name="table_coordinates" file="tsne-2.coord.tab" ftype="tabular" compare="sim_size" delta="500"/>
         </test>
     </tests>
     <help>
@@ -339,19 +382,31 @@
 
 * Principal Components Analysis (PCA)
 * Hierarchical Clustering of Principal Components (HCPC)
-* t-distributed Stochastic Neighbor Embedding
+* t-distributed Stochastic Neighbor Embedding (t-SNE)
 
 The tool returns in addition the table of the coordinates of the observations (eg RNAseq libraries)
 in the low dim space, which can be used for post-treatment or to further adjust the provided visualisation.
 
+If HCPC is used, this tool can also return the clustering table. It contains two columns of n observations :
+
+* Observation labels
+* Cluster labels
+
 ** Contrast data with a factor **
 The tool offers the possibility to colour data points according to the levels of a factor.
 To use the option "Factor to contrast data", provide a tabulated-separated, two-column table
 with first column containing the cell/data library identifiers (same identifiers as those
 provided as column headers in the input data table) and second column containing the corresponding
-factor levels value. This table does not need to be sorted in the same order as in the data
+factor levels value (if this vector is numerical, then the color palette used is quantitative). 
+This table does not need to be sorted in the same order as in the data
 table. It may also contain more identifiers than those provided in the data table.
 
+If HCPC visualisation and constrasting data are chosen, an additional text file is given. It contains
+several metrics of external validation of clustering. It will compare the capacity of HCPC clustering
+to recreate classes contained in the factor data file. If the constrasting factor is quantitative,
+the file will be empty. 
+
+
     </help>
     <citations>
         <citation type="bibtex">@Article{,
@@ -381,5 +436,14 @@
             url = {https://github.com/jkrijthe/Rtsne},
             }
         </citation>
+        <citation type="bibtex">@Manual{,
+            title = {{ClusterR}: Gaussian Mixture Models, K-Means, Mini-Batch-Kmeans,
+            K-Medoids and Affinity Propagation Clustering},
+            author = {Lampros Mouselimis},
+            year = {2019},
+            note = {R package version 1.1.9},
+            url = {https://github.com/mlampros/ClusterR},
+            }
+        </citation>
   </citations>
 </tool>