diff all_by_all_correlation.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
children
line wrap: on
line diff
--- a/all_by_all_correlation.xml	Fri Jun 18 20:23:19 2021 +0000
+++ b/all_by_all_correlation.xml	Thu Jul 29 20:48:10 2021 +0000
@@ -28,7 +28,7 @@
     <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset" help="Select the Gene Expression Wide Dataset from your history"/>
     <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the Column in your Gene Expression Wide Dataset that contains unique identifiers."/>
     <conditional name="geneAnnotation">
-      <param name="useGeneAnnot" type="select" label="Use Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling output files.">
+      <param name="useGeneAnnot" type="select" label="Use Annotation File?" help="You can choose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling output files.">
         <option value="No">No</option>
         <option value="Yes">Yes</option>
       </param>
@@ -36,11 +36,12 @@
         <param name="geneAnnot" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history"/>
         <param name="geneAnnotName" type="text" value="" label="Gene Labels" help="Name of the column in the Gene Expression Annotation File to use for labeling output files."/>
       </when>
+      <when value="No" />
     </conditional>
     <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset" help="Select the Metabolite Wide Dataset from your history"/>
     <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/>
     <conditional name="metAnnotation">
-      <param name="useMetAnnot" type="select" label="Use Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) for labeling output files.">
+      <param name="useMetAnnot" type="select" label="Use Annotation File?" help="You can choose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) for labeling output files.">
         <option value="No">No</option>
         <option value="Yes">Yes</option>
       </param>
@@ -48,6 +49,7 @@
         <param name="metAnnot" type="data" format="tabular" label="Metabolite Annotation File" help="Select the Metabolite Annotation File from your history"/>
         <param name="metAnnotName" type="text" value="" label="Metabolite Labels" help="Name of the column in the Metabolite Annotation File to use for labeling output files"/>
       </when>
+      <when value="No" />
     </conditional>
     <param name="method" type="select" label="Correlation Method" help="Select a correlation method.">
       <option value="pearson">Pearson</option>
@@ -63,28 +65,35 @@
   </outputs>
   <tests>
     <test>
-      <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/>
+      <param name="geneDataset" value="gene_wide_dataset.tsv"/>
       <param name="geneId" value="UniqueID"/>
-      <param name="geneAnnot" value="gene_annotation_file_01fhl.tsv"/>
+      <param name="geneAnnot" value="gene_annotation.tsv"/>
       <param name="geneName" value="GeneName"/>
-      <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/>
+      <param name="metDataset" value="metabolite_wide_dataset.tsv"/>
       <param name="metId" value="UniqueID"/>
-      <param name="metAnnot" value="metabolite_annotation_file_01fhl.tsv"/>
+      <param name="metAnnot" value="metabolite_annotation.tsv"/>
       <param name="metName" value="MetName"/>
-      <output name="output" file="correlation_file_01fhl.tsv"/>
-      <output name="corMat" file="correlation_matrix_01fhl.tsv"/>
-      <output name="figure" file="correlation_figure_01fhl.pdf"/>
+      <output name="output" file="correlation.tsv"  compare="diff" lines_diff="10000"/>
+      <output name="corMat" file="correlation_matrix.tsv" compare="diff" lines_diff="10000"/>
+      <output name="figure" file="correlation_figure.pdf" compare="sim_size"  delta="1000000"/>
     </test>
   </tests>
   <help><![CDATA[
 
 **Tool Description**
 
-  The tool performs a correlation analysis between genes (in a gene expression wide dataset) and metabolites (in a metabolite wide dataset) 
-  to generate a table of correlation coefficients. P-values for the correlation coefficients are calculated by simulating individual gene and 
-  metabolite datasets 1000 times using a normal distribution with means and standard deviations generated from the data. Sample size reflects 
-  the input datasets. Correlations are calculated on the simulated data. Correlations must be higher/lower than 95% of the randomly simulated 
-  values to be considered significant. 
+The tool performs a correlation analysis between genes (Gene Expression Wide Dataset) and
+metabolites (Metabolite Wide Dataset) to generate
+a table of correlation coefficients. P-values for the correlation coefficients are calculated by
+simulating gene and metabolite datasets 1000 times using the mean and standard deviation of both
+datasets.
+
+The tool outputs 2 TSV files and a PDF figure. The 'correlation file' contains gene-metabolite
+correlation coefficients with p-values
+less than the user-specified threshold. The tool also outputs the results in matrix format, the
+‘correlation matrix file’. The ‘correlation
+figure’ is a network representation of the top 500 gene-metabolite correlations based on the
+absolute value of the correlation coefficients.
 
 --------------------------------------------------------------------------------
 
@@ -128,19 +137,19 @@
 
   (5) **Annotation Files**  The user can provide (optional) Annotation Files for the Gene Expression and/or Metabolite Datasets to label the results for easier readability. The user must provide the name of the column with the desired feature name (e.g. Gene Symbol).
 
-    +-----------+------------+-------------+-----+
-    | UniqueID  | ENSEMBL_ID | Gene_Symbol | ... |
-    +===========+============+=============+=====+
-    | gene_1    | ENS...     | one         | ... |
-    +-----------+------------+-------------+-----+
-    | gene_2    | ENS...     | two         | ... |
-    +-----------+------------+-------------+-----+
-    | gene_3    | ENS...     | three       | ... |
-    +-----------+------------+-------------+-----+
-    | gene_4    | ENS...     | four        | ... |
-    +-----------+------------+-------------+-----+
-    | ...       | ...        | ...         | ... |
-    +-----------+------------+-------------+-----+
+    +-----------+--------------+-------------+-----+
+    | UniqueID  | ENSEMBL_ID   | Gene_Symbol | ... |
+    +===========+==============+=============+=====+
+    | gene_1    | ENS...       | one         | ... |
+    +-----------+--------------+-------------+-----+
+    | gene_2    | ENS...       | two         | ... |
+    +-----------+--------------+-------------+-----+
+    | gene_3    | ENS...       | three       | ... |
+    +-----------+--------------+-------------+-----+
+    | gene_4    | ENS...       | four        | ... |
+    +-----------+--------------+-------------+-----+
+    | ...       | ...          | ...         | ... |
+    +-----------+--------------+-------------+-----+
 
   (6) **Correlation method**  Select the correlation coefficient to be computed from the list. Pearson, kendall, or spearman are available.
 
@@ -152,7 +161,7 @@
 
   The user will obtain three outputs from the Gene - Metabolite Correlation Tool:
 
-  (1) **Correlation File.** A file sorted by the absolute values of the correlation coeficient and including the P-value. The file contains only the correlation coefficients where the associated P-values is less than a user-specified value (default = 0.05).
+  (1) **Correlation File.** A file sorted by the absolute values of the correlation coefficient and including the P-value. The file contains only the correlation coefficients where the associated P-values is less than a user-specified value (default = 0.05).
 
     +--------+------------+-------------+-----------+
     | Gene   | Metabolite | Correlation | (p-value) |
@@ -184,33 +193,22 @@
     | ...    | ...   | ...   | ...   | ...   | ... |
     +--------+-------+-------+-------+-------+-----+
 
-  (3) **Correlation Figure.** Network representation of the top 500 gene-metabolite correlations based on the absolute value of the correlation coeficients. Maximum number of correlations in the netork is 500.
+  (3) **Correlation Figure.** Network representation of the top 500 gene-metabolite correlations based on the absolute value of the correlation coefficients. Maximum number of correlations in the network is 500.
 
   ]]>
   </help>
   <citations>
-    <citation type="bibtex">@article{dejean2013mixomics,
-    title={mixOmics: Omics data integration project},
-    author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F},
-    journal={R package},
-    year={2013}
-    }</citation>
     <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
     author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
     title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
     journal = {BMC Bioinformatics},
-    year = {in press}
+    year = {2018}
     }</citation>
-    <citation type="bibtex">
-    @article{garcia2010paintomics,
-    title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
-    author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
-    journal={Bioinformatics},
-    volume={27},
-    number={1},
-    pages={137--139},
-    year={2010},
-    publisher={Oxford University Press}
+    <citation type="bibtex">@article{Mor2021GaitGM,
+    title={GAIT-GM integrative cross-omics analyses reveal cholinergic defects in a C. elegans model of Parkinson's disease},
+    author={Mor, DE and Huertas, F and Morse, AM and Kaletsky, R and Murphy, CT and Kalia, V and Miller, GW and Moskalenko, O and Conesa, A and McIntyre, LM},
+    journal={BMC Genomics},
+    year={submitted},
     }</citation>
   </citations>
 </tool>