Mercurial > repos > iuc > dimet_bivariate_analysis
diff dimet_bivariate_analysis.xml @ 0:89e637c99bed draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 30fe10acdf65c6917856a0eae21dc91abd2f609f
author | iuc |
---|---|
date | Thu, 15 Feb 2024 12:53:48 +0000 |
parents | |
children | f070b08ff139 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dimet_bivariate_analysis.xml Thu Feb 15 12:53:48 2024 +0000 @@ -0,0 +1,275 @@ +<tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> + <description> + Computation of the correlation of MDV profiles, or the metabolite time course profiles (by DIMet) + </description> + <macros> + <token name="@TOOL_LABEL@">bivariate analysis</token> + <token name="@EXECUTABLE@">bivariate_analysis</token> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + @INIT_CONFIG@ + @INIT_BI_ANALYSIS@ + @INIT_BIVAR_COMPARISONS@ + HYDRA_FULL_ERROR=1 python -m dimet + -cp '$__new_file_path__/config' + '++hydra.run.dir=bivariate_analysis' + '++figure_path=figures' + '++table_path=tables' + '++analysis={ + dataset:{ + _target_: dimet.data.DatasetConfig, + name: "I am a synthetic data example" + }, + method:{ + _target_: dimet.method.BivariateAnalysisConfig, + label: "bivariate analysis", + name: "Computation of the correlation of MDV profiles, or the metabolite time course profiles" + }, + label: bivariate-analysis-example2 + }' + '++analysis.conditions=${conditions}' + '++analysis.dataset.label=' + '++analysis.method.correction_method=${correction_method}' + '++analysis.method.impute_values=${impute_values}' + '++analysis.method.conditions_metabolite_time_profiles=${conditions_metabolite_time_profiles}' + '++analysis.method.timepoints_MDV_comparison=${timepoints_MDV_comparison}' + '++analysis.method.conditions_MDV_comparison=${conditions_MDV_comparison}' + '++analysis.method.output_include_gmean_arr_columns=true' + '++analysis.dataset.subfolder=' + '++analysis.dataset.conditions=${conditions}' + #if $metadata_path: + '++analysis.dataset.metadata=metadata' + #end if + #if str( $data_input.data_input_selector ) == "abundance": + #if $data_input.abundance_file: + '++analysis.dataset.abundances=abundance' + #end if + #elif str( $data_input.data_input_selector ) == "mean_enrichment": + #if $data_input.me_or_frac_contrib_file: + '++analysis.dataset.mean_enrichment=me_or_frac_contrib' + #end if + #elif str( $data_input.data_input_selector ) == "isotop_prop": + #if $data_input.isotop_prop_file: + '++analysis.dataset.isotopologue_proportions=isotop_prop' + #end if + #else + #if $data_input.isotop_abs_file: + '++analysis.dataset.isotopologues=isotop_abs' + #end if + #end if + @REMOVE_CONFIG@ + ]]></command> + <inputs> + <expand macro="input_parameters_bivar_analysis"/> + <expand macro="plot_factor_list"/> + <expand macro="correction_method"/> + </inputs> + + <outputs> + <collection name="report" type="list"> + <discover_datasets pattern="__designation__" directory="tables" format="tabular"/> + </collection> + </outputs> + <tests> + <test> + <param name="data_input_selector" value="isotop_prop" /> + <param name="isotop_prop_file" ftype="tabular" value="CorrectedIsotopologues_5.csv"/> + <param name="metadata_path" ftype="tabular" value="example5_metadata.csv"/> + <param name="correction_method" value="fdr_bh"/> + <repeat name="plot_factor_list"> + <param name="condition" value="Control"/> + </repeat> + <repeat name="plot_factor_list"> + <param name="condition" value="L-Cycloserine"/> + </repeat> + <output_collection name="report" type="list" count="8"> + <element file="isotop_prop--cell--MDV-Control-L-Cycloserine--T0-pearson.tsv" name="isotop_prop--cell--MDV-Control-L-Cycloserine--T0-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--cell--MDV-Control-L-Cycloserine--T2h-pearson.tsv" name="isotop_prop--cell--MDV-Control-L-Cycloserine--T2h-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--cell--MDV-T2h-T0--Control-pearson.tsv" name="isotop_prop--cell--MDV-T2h-T0--Control-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--cell--MDV-T2h-T0--L-Cycloserine-pearson.tsv" name="isotop_prop--cell--MDV-T2h-T0--L-Cycloserine-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--med--MDV-Control-L-Cycloserine--T0-pearson.tsv" name="isotop_prop--med--MDV-Control-L-Cycloserine--T0-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--med--MDV-Control-L-Cycloserine--T2h-pearson.tsv" name="isotop_prop--med--MDV-Control-L-Cycloserine--T2h-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--med--MDV-T2h-T0--Control-pearson.tsv" name="isotop_prop--med--MDV-T2h-T0--Control-pearson.tsv" ftype="tabular"/> + <element file="isotop_prop--med--MDV-T2h-T0--L-Cycloserine-pearson.tsv" name="isotop_prop--med--MDV-T2h-T0--L-Cycloserine-pearson.tsv" ftype="tabular"/> + </output_collection> + </test> + </tests> + <help><![CDATA[ + +This module is part of DIMet: Computation of the correlation of entire MDV profiles, or the metabolite time course profiles (https://pypi.org/project/DIMet/). + +DIMet bi-variate analysis performs the comparison of entire MDV profiles, with the user provided isotopologue proportions data. Moreover, when total abundances and/or mean enrichment are provided, the comparison of the metabolite time-course profiles is also computed. Specifically, three types of bi-variate comparisons are performed automatically: + + +- MDV profile comparison between two conditions + +- MDV profile comparison between two consecutive time-points + +- Metabolite (total abundances and/or mean enrichment) time course profiles comparison between two conditions + + +For all these three types of bi-variate comparison, the statistical test that is applied is the Pearson's correlation test. + + +To note, MDV (Mass Distribution Vector) are obtained automatically by the tool, using the isotopologue proportions. + + + + **Input data files** + +This tool requires (at max.) 4 tab-delimited .csv files as inputs. There are two types of files: + +- The measures' (or quantifications') files, that can be of 3 types. + +- The metadata, a unique file with the description of the samples in your measures' files. This is compulsory. + +For running DIMet @EXECUTABLE@ you need **at least one file** of measures: + +- The **isotopologue proportions** file + +- The total **abundances** (of the metabolites) file + +- The mean **enrichment** or labelled fractional contributions + + +and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**. + + +**Measures files** + +The measures files must be organized as matrices: + +- The first column must contain Metabolite IDs that are unique (not repeated) within the file. + +- The rest of the columns correspond to the samples + +- The rows correspond to the metabolites + +- The values must be tab separated, with the first row containing the sample/column labels. + +See the following examples of measures files: + + +Example - Metabolites **abundances**: + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956 + 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051 + Glc6P 2310 2142 2683 1683 012532068 1252172 + Gly3P 399298 991656565 525195 6365231 89451625 4952651963 + IsoCit 0 0 0 84915613 856236 954651610 + =============== ================== ================== ================== ================== ================== ================== + +Example - mean **enrichment** or labeled fractional contributions: + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9 + 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68 + Glc6P 0.06 0.66 2683 0.06 2068 2172 + Gly3P 0.06 0.06 0.06 1 5 3 + IsoCit 0.06 1 0.49 0.36 6 10 + =============== ================== ================== ================== ================== ================== ================== + + + +Example - **Isotopologue proportions**: + + =============== ================== ================== ================== ================== ================== ================== + ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** + =============== ================== ================== ================== ================== ================== ================== + 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12 + 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12 + 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743 + 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017 + 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063 + 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263 + =============== ================== ================== ================== ================== ================== ================== + + + +**Metadata File Information** + +Provide a tab-separated file that has the names of the samples in the first column and one header row. +Column names must be exactly in this order: + + name_to_plot + condition + timepoint + timenum + compartment + original_name + + +Example **Metadata File**: + + + ==================== =============== ============= ============ ================ ================= + **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name** + -------------------- --------------- ------------- ------------ ---------------- ----------------- + Control_cell_T0-1 Control T0 0 cell MCF001089_TD01 + Control_cell_T0-2 Control T0 0 cell MCF001089_TD02 + Control_cell_T0-3 Control T0 0 cell MCF001089_TD03 + Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04 + Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05 + Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06 + Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07 + Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08 + Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01 + Control_med_T24-1 Control T24 24 med MCF001090_TD02 + Control_med_T24-2 Control T24 24 med MCF001090_TD03 + Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04 + Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05 + Control_med_T0-1 Control T0 0 med MCF001090_TD06 + Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07 + Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08 + ==================== =============== ============= ============ ================ ================= + + +The column **original_name** must have the names of the samples as given in your data. + +The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that +are meaningful is a better choice, as we will take them to display the results. + +The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) +nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!). + +The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the +compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column. + + +**Running the analysis** + +You can precise how you want your analysis to be executed, with the parameters: + +- **datatypes** : the measures type(s) that you want to run. + +- **conditions**: the two conditions to be compared in the bi-variate analysis. If 3 or more conditions are set, the tool automatically performs all the 1-to-1 condition comparisons. If you only have one condition in your data, select the condition, and see the Note at the end of this section. + +- **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html). + + +**Note**: the tool automatically performs the comparison of *MDV profile between two consecutive time-points*. All the time-points are processed. + + +There exist hints on use that will guide you, next to the parameters. + +For more information about the implemented statistical tests, please visit: https://github.com/cbib/DIMet/wiki/2-Statistical-tests + +The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output + +**Available data for testing** + +You can test our tool with the data from our manuscript https://zenodo.org/records/10579862 (the pertinent +files for you are located in the subfolders inside the data folder). +You can also use the minimal data examples from https://zenodo.org/records/10579891 + + ]]> + </help> + <expand macro="citations" /> +</tool> \ No newline at end of file