Mercurial > repos > malex > gait_gm
diff all_by_all_correlation.xml @ 1:ec9ee8edb84d draft
Initial upload of 21.6.10 release.
author | malex |
---|---|
date | Fri, 18 Jun 2021 20:23:19 +0000 |
parents | |
children | 2c218a253d56 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_by_all_correlation.xml Fri Jun 18 20:23:19 2021 +0000 @@ -0,0 +1,216 @@ +<tool id="allByAllCorr" name="Metabolite - Gene Correlation" version="@WRAPPER_VERSION@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ + all_by_all_correlation.py + -g=$geneDataset + -gid=$geneId + #if $geneAnnotation.useGeneAnnot == "Yes": + -ga=$geneAnnotation.geneAnnot + -gn=$geneAnnotation.geneAnnotName + #end if + -m=$metDataset + -mid=$metId + #if $metAnnotation.useMetAnnot == "Yes": + -ma=$metAnnotation.metAnnot + -mn=$metAnnotation.metAnnotName + #end if + -me=$method + -t=$threshold + -o=$output + -c=$corMat + -f=$figure + ]]></command> + <inputs> + <param name="geneDataset" type="data" format="tabular" label="Gene Expression Wide Dataset" help="Select the Gene Expression Wide Dataset from your history"/> + <param name="geneId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the Column in your Gene Expression Wide Dataset that contains unique identifiers."/> + <conditional name="geneAnnotation"> + <param name="useGeneAnnot" type="select" label="Use Annotation File?" help="You can chose to input a file containing gene annotation information (e.g. gene names, identifiers, etc.) for labeling output files."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param name="geneAnnot" type="data" format="tabular" label="Gene Expression Annotation File" help="Select the Gene Expression Annotation File from your history"/> + <param name="geneAnnotName" type="text" value="" label="Gene Labels" help="Name of the column in the Gene Expression Annotation File to use for labeling output files."/> + </when> + </conditional> + <param name="metDataset" type="data" format="tabular" label="Metabolite Wide Dataset" help="Select the Metabolite Wide Dataset from your history"/> + <param name="metId" type="text" size="30" value="" label="Unique Metabolite FeatureID" help="Name of the column in your Metabolite Wide Dataset that contains unique identifiers."/> + <conditional name="metAnnotation"> + <param name="useMetAnnot" type="select" label="Use Annotation File?" help="You can chose to input a file containing metabolite annotation information (e.g. metabolite names, identifiers, etc.) for labeling output files."> + <option value="No">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param name="metAnnot" type="data" format="tabular" label="Metabolite Annotation File" help="Select the Metabolite Annotation File from your history"/> + <param name="metAnnotName" type="text" value="" label="Metabolite Labels" help="Name of the column in the Metabolite Annotation File to use for labeling output files"/> + </when> + </conditional> + <param name="method" type="select" label="Correlation Method" help="Select a correlation method."> + <option value="pearson">Pearson</option> + <option value="spearman">Spearman</option> + <option value="kendall">Kendall</option> + </param> + <param name="threshold" type="text" value="0.05" label="P-Value threshold." help="Default: 0.05"/> + </inputs> + <outputs> + <data format="tabular" name="output" label="${tool.name} on ${on_string}: Correlation File"/> + <data format="tabular" name="corMat" label="${tool.name} on ${on_string}: Correlation Matrix"/> + <data format="pdf" name="figure" label="${tool.name} on ${on_string}: Correlation Figure"/> + </outputs> + <tests> + <test> + <param name="geneDataset" value="gene_wide_dataset_01fhl.tsv"/> + <param name="geneId" value="UniqueID"/> + <param name="geneAnnot" value="gene_annotation_file_01fhl.tsv"/> + <param name="geneName" value="GeneName"/> + <param name="metDataset" value="metabolite_wide_dataset_01fhl.tsv"/> + <param name="metId" value="UniqueID"/> + <param name="metAnnot" value="metabolite_annotation_file_01fhl.tsv"/> + <param name="metName" value="MetName"/> + <output name="output" file="correlation_file_01fhl.tsv"/> + <output name="corMat" file="correlation_matrix_01fhl.tsv"/> + <output name="figure" file="correlation_figure_01fhl.pdf"/> + </test> + </tests> + <help><![CDATA[ + +**Tool Description** + + The tool performs a correlation analysis between genes (in a gene expression wide dataset) and metabolites (in a metabolite wide dataset) + to generate a table of correlation coefficients. P-values for the correlation coefficients are calculated by simulating individual gene and + metabolite datasets 1000 times using a normal distribution with means and standard deviations generated from the data. Sample size reflects + the input datasets. Correlations are calculated on the simulated data. Correlations must be higher/lower than 95% of the randomly simulated + values to be considered significant. + +-------------------------------------------------------------------------------- + +**Input** + + (1) **Gene Expression Dataset** A wide formatted dataset containing measurements for each sample (where samples are in columns): + + +-----------+---------+---------+---------+-----+ + | UniqueID | sample1 | sample2 | sample3 | ... | + +===========+=========+=========+=========+=====+ + | gene_1 | 1.2 | 3.5 | 2.9 | ... | + +-----------+---------+---------+---------+-----+ + | gene_2 | 1.6 | 3.2 | 3.2 | ... | + +-----------+---------+---------+---------+-----+ + | gene_3 | 1.4 | 3.0 | 3.1 | ... | + +-----------+---------+---------+---------+-----+ + | gene_4 | 1.6 | 2.9 | 3.1 | ... | + +-----------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +-----------+---------+---------+---------+-----+ + + (2) **Unique Gene FeatureID** Name of the column in your Gene Expression Wide Dataset that contains unique gene identifiers. **NOTE:** This identifier must be the gene symbol. + + (3) **Metabolomic Wide Dataset** A wide formatted metabolomic dataset that contains measurements for each sample (where samples are in columns): + + +------------+---------+---------+---------+-----+ + | UniqueID | sample1 | sample2 | sample3 | ... | + +============+=========+=========+=========+=====+ + | met_1 | 10 | 20 | 10 | ... | + +------------+---------+---------+---------+-----+ + | met_2 | 5 | 22 | 30 | ... | + +------------+---------+---------+---------+-----+ + | met_3 | 30 | 27 | 2 | ... | + +------------+---------+---------+---------+-----+ + | met_4 | 32 | 17 | 8 | ... | + +------------+---------+---------+---------+-----+ + | ... | ... | ... | ... | ... | + +------------+---------+---------+---------+-----+ + + (4) **Unique Metabolite FeatureID** Name of the column in your Metabolite Wide Dataset that contains unique metabolite identifiers. + + (5) **Annotation Files** The user can provide (optional) Annotation Files for the Gene Expression and/or Metabolite Datasets to label the results for easier readability. The user must provide the name of the column with the desired feature name (e.g. Gene Symbol). + + +-----------+------------+-------------+-----+ + | UniqueID | ENSEMBL_ID | Gene_Symbol | ... | + +===========+============+=============+=====+ + | gene_1 | ENS... | one | ... | + +-----------+------------+-------------+-----+ + | gene_2 | ENS... | two | ... | + +-----------+------------+-------------+-----+ + | gene_3 | ENS... | three | ... | + +-----------+------------+-------------+-----+ + | gene_4 | ENS... | four | ... | + +-----------+------------+-------------+-----+ + | ... | ... | ... | ... | + +-----------+------------+-------------+-----+ + + (6) **Correlation method** Select the correlation coefficient to be computed from the list. Pearson, kendall, or spearman are available. + + (7) **P-Value threshold** User specified value that limits the data in the resulting 'Correlation File' to only those correlations with P-values less than this value. + +-------------------------------------------------------------------------------- + +**Output** + + The user will obtain three outputs from the Gene - Metabolite Correlation Tool: + + (1) **Correlation File.** A file sorted by the absolute values of the correlation coeficient and including the P-value. The file contains only the correlation coefficients where the associated P-values is less than a user-specified value (default = 0.05). + + +--------+------------+-------------+-----------+ + | Gene | Metabolite | Correlation | (p-value) | + +========+============+=============+===========+ + | gene_1 | met_1 | 0.99 | 0.000 | + +--------+------------+-------------+-----------+ + | gene_2 | met_4 | -0.98 | 0.000 | + +--------+------------+-------------+-----------+ + | gene_3 | met_5 | 0.96 | 0.001 | + +--------+------------+-------------+-----------+ + | gene_4 | met_1 | 0.95 | 0.002 | + +--------+------------+-------------+-----------+ + | ... | ... | ... | ... | + +--------+------------+-------------+-----------+ + + (2) **Correlation Matrix.** Output correlation matrix. + + +--------+-------+-------+-------+-------+-----+ + | Gene | met_1 | met_2 | met_3 | met_4 | ... | + +========+=======+=======+=======+=======+=====+ + | gene_1 | 0.99 | 0.56 | 0.25 | 0.33 | ... | + +--------+-------+-------+-------+-------+-----+ + | gene_2 | -0.57 | 0.63 | -0.14 | 0.01 | ... | + +--------+-------+-------+-------+-------+-----+ + | gene_3 | 0.62 | 0.96 | 0.20 | 0.32 | ... | + +--------+-------+-------+-------+-------+-----+ + | gene_4 | 0.95 | 0.25 | 0.16 | 0.44 | ... | + +--------+-------+-------+-------+-------+-----+ + | ... | ... | ... | ... | ... | ... | + +--------+-------+-------+-------+-------+-----+ + + (3) **Correlation Figure.** Network representation of the top 500 gene-metabolite correlations based on the absolute value of the correlation coeficients. Maximum number of correlations in the netork is 500. + + ]]> + </help> + <citations> + <citation type="bibtex">@article{dejean2013mixomics, + title={mixOmics: Omics data integration project}, + author={Dejean, Sebastien and Gonzalez, Ignacio and L{\^e} Cao, Kim-Anh and Monget, Pierre and Coquery, J and Yao, F and Liquet, B and Rohart, F}, + journal={R package}, + year={2013} + }</citation> + <citation type="bibtex">@ARTICLE{Kirpich17secimtools, + author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, + title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, + journal = {BMC Bioinformatics}, + year = {in press} + }</citation> + <citation type="bibtex"> + @article{garcia2010paintomics, + title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data}, + author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, + journal={Bioinformatics}, + volume={27}, + number={1}, + pages={137--139}, + year={2010}, + publisher={Oxford University Press} + }</citation> + </citations> +</tool>