view dimet_timecourse_analysis.xml @ 7:b1a9eddf73d9 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 6da96d865a3a557cfa3ad09e1cfa830519e73748
author iuc
date Tue, 06 Aug 2024 17:37:52 +0000
parents e8b6448af300
children
line wrap: on
line source

<tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
    <description>
        Differential analysis of tracer metabolomics data comparing consecutive time-points (by DIMet)
    </description>
    <macros>
        <token name="@TOOL_LABEL@">timecourse analysis</token>
        <token name="@EXECUTABLE@">timecourse_analysis</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
    @INIT_CONFIG@
    @INIT_DIFF_ANALYSIS@
    @INIT_STAT_TEST@
    @INIT_GROUPS@
    @INIT_CONDITIONS_TIMECOURSE@
    HYDRA_FULL_ERROR=1 python -m dimet
        '++hydra.run.dir=.'
        '++figure_path=figures'
        '++table_path=tables'
        '++analysis={
            dataset:{
                _target_: dimet.data.DatasetConfig,
                name: "I am a synthetic data example"
             },
             method:{
                _target_: dimet.method.TimeCourseAnalysisConfig,
                label: "time_course_analysis",
                name: "Time wise computation of statistical differences"
              },
              label: time_course_analysis-example
         }'
         '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}''
         '++analysis.statistical_test=${statistical_test}'
         '++analysis.method.statistical_test=${statistical_test}'
         '++analysis.method.grouping=${groups}'
         '++analysis.method.correction_method=${correction_method}'
         '++analysis.method.disfit_tail_option="auto"'
         '++analysis.method.impute_values=${impute_values}'
         '++analysis.dataset.subfolder='
         '++analysis.dataset.label='
         '++analysis.dataset.conditions=${conditions}'
         #if $metadata_path:
             '++analysis.dataset.metadata=metadata'
         #end if
         #if str( $data_input.data_input_selector ) == "abundance":
            #if $data_input.abundance_file:
                '++analysis.dataset.abundances=abundance'
            #end if
        #elif str( $data_input.data_input_selector ) == "mean_enrichment":
            #if $data_input.me_or_frac_contrib_file:
                '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
            #end if
        #elif str( $data_input.data_input_selector ) == "isotop_prop":
            #if $data_input.isotop_prop_file:
                '++analysis.dataset.isotopologue_proportions=isotop_prop'
            #end if
        #else
            #if $data_input.isotop_abs_file:
                '+analysis.dataset.isotopologues=isotop_abs'
            #end if
        #end if
    @REMOVE_CONFIG@
    ]]></command>
    <inputs>
        <expand macro="input_parameters_diff_analysis"/>
        <expand macro="factor_list"/>
        <expand macro="correction_method"/>
        <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/>
    </inputs>
    <outputs>
        <collection name="report" type="list">
            <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="data_input_selector" value="abundance" />
            <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/>
            <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
            <param name="correction_method" value="bonferroni"/>
            <param name="qualityDistanceOverSpan" value="-0.3"/>
            <param name="statistical_test_type" value="parametric"/>
            <param name="stat_test" value="Tt"/>
            <repeat name="plot_factor_list">
                <param name="condition" value="Control"/>
            </repeat>
            <repeat name="plot_factor_list">
                <param name="condition" value="L-Cycloserine"/>
            </repeat>
            <output_collection name="report" type="list" count="4">
                <element file="abundance--cell-Control-T2h-Control-T0-Tt.tsv"  name="abundance--cell-Control-T2h-Control-T0-Tt.tsv" ftype="tabular"/>
                <element file="abundance--cell-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv"  name="abundance--cell-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
                <element file="abundance--med-Control-T2h-Control-T0-Tt.tsv" name="abundance--med-Control-T2h-Control-T0-Tt.tsv" ftype="tabular"/>
                <element file="abundance--med-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" name="abundance--med-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
            </output_collection>

        </test>
    </tests>
    <help><![CDATA[
This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).

    **Input data files**

This tool performs a time course differential analysis on your time series data.
For illustration see the section **Metadata File Information** which contains several time points.

This time course differential analysis is sequential: by each individual condition, a comparison between the timepoints t_x+1 vs t_x
(e.g. [Control, 90min] vs [Control, 60min]), for all the timepoints present in the data.
Our tool automatically detects the conditions and timepoints, and automatically organizes the comparisons
(you do not need to set this part yourself, DIMet does it for you).

Note that if you need only to compare specific [condition, timepoint] pairs not comprised by
our automatic time course analysis, you can use the differential analysis in the pairwise mode instead.


This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:

- The measures' (or quantifications') files, that can be of 4 types.

- The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.

For running DIMet @EXECUTABLE@ you need **at least one** file of measures:

- The total **abundances** (of the metabolites) file

- The mean **enrichment** or labelled fractional contributions

- The **isotopologues** absolute values files (optional)

- The **isotopologue proportions** file (optional)

and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.

The measure's files must be organized as matrices:

- The first column must contain Metabolite IDs that are unique (not repeated) within the file.

- The rest of the columns correspond to the samples

- The rows correspond to the metabolites

- The values must be tab separated, with the first row containing the sample/column labels.

See the following examples of measures' files:


Example - Metabolites **abundances**:

    =============== ================== ================== ================== ================== ================== ==================
    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
    =============== ================== ================== ================== ================== ================== ==================
    2_3-PG          8698823.9926       10718737.7217      10724373.9         8536484.5          22060650           28898956
    2-OHGLu         36924336           424336             92060650           45165              84951950           965165051
    Glc6P           2310               2142               2683               1683               012532068          1252172
    Gly3P           399298             991656565          525195             6365231            89451625           4952651963
    IsoCit          0                  0                  0                  84915613           856236             954651610
    =============== ================== ================== ================== ================== ================== ==================

Example - mean **enrichment** or labeled fractional contributions:

    =============== ================== ================== ================== ================== ================== ==================
    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
    =============== ================== ================== ================== ================== ================== ==================
    2_3-PG          0.9711             0.968              0.9909             0.991              0.40               0.9
    2-OHGLu         0.01719            0.0246             0.554              0.555              0.73               0.68
    Glc6P           0.06               0.66               2683               0.06               2068               2172
    Gly3P           0.06               0.06               0.06               1                  5                  3
    IsoCit          0.06               1                  0.49               0.36               6                  10
    =============== ================== ================== ================== ================== ================== ==================

Example - **Isotopologues**

    =============== ================== ================== ================== ================== ================== ==================
    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
    =============== ================== ================== ================== ================== ================== ==================
    2_3-PG_m+0      206171.4626        285834.0353        36413.27637        27367.17784        6171.4626          119999
    2_3-PG_m+1      123                432                101                127                206171.4626        119999
    2_3-PG_m+2      133780.182         161461.2364        182631.3947        132170.3807        358749.348         848754.36
    2_3-PG_m+3      8358749.348        10271010.45        10505228.3         8376820.028        62163.30727        1088.8963
    2-OHGLu_m+0     5550339.322        6072872.833        3855047.791        3216178.72         8358749.348        10271010.45
    2-OHGLu_m+1     0.0                0.0                0.0                0.0                206171.4626        285834.0353
    =============== ================== ================== ================== ================== ================== ==================


Example - **Isotopologue proportions**:

    =============== ================== ================== ================== ================== ================== ==================
    ID              **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
    =============== ================== ================== ================== ================== ================== ==================
    2_3-PG_m+0      0.023701408        0.026667837        0.003395407        0.05955            0.034383527        0.12
    2_3-PG_m+1      0.0                0.0                0.0                0.0                0.4                0.12
    2_3-PG_m+2      0.015379329        0.01506            0.017029723        0.35483229         0.54131313         0.743
    2_3-PG_m+3      0.960919263        0.958268099        0.97957487         0.581310816        0.017029723        0.017
    2-OHGLu_m+0     0.972778716        0.960016157        0.238843937        0.234383527        0.9998888          0.015064063
    2-OHGLu_m+1     0.0                0.0                0.0                0.0                0.0001112          0.960919263
    =============== ================== ================== ================== ================== ================== ==================



**Metadata File Information**

Provide a tab-separated file that has the names of the samples in the first column and one header row.
Column names must be exactly in this order:

   name_to_plot
   condition
   timepoint
   timenum
   compartment
   original_name

Example **Metadata File**:


    ==================== =============== ============= ============ ================ =================
    **name_to_plot**     **condition**   **timepoint** **timenum**  **compartment**   **original_name**
    -------------------- --------------- ------------- ------------ ---------------- -----------------
    Spleen1_cell_0-1     Spleen1         0min            0           cell             MCF001089_TD01
    Spleen1_cell_0-2     Spleen1         0min            0           cell             MCF001089_TD02
    Spleen1_cell_10-1    Spleen1         10min           10          cell             MCF001089_TD03
    Spleen1_cell_10-2    Spleen1         10min           10          cell             MCF001089_TD04
    Spleen1_cell_30-1    Spleen1         30min           30          cell             MCF001089_TD05
    Spleen1_cell_30-2    Spleen1         30min           30          cell             MCF001089_TD06
    Spleen1_cell_60-1    Spleen1         60min           60          cell             MCF001089_TD07
    Spleen1_cell_60-2    Spleen1         60min           60          cell             MCF001089_TD08
    Spleen1_cell_90-1    Spleen1         90min           90          cell             MCF001089_TD09
    Spleen1_cell_90-2    Spleen1         90min           90          cell             MCF001089_TD011
    Spleen1_med_30-3     Spleen1         30min           30          med              MCF001089_TD025
    Spleen1_med_30-2     Spleen1         30min           30          med              MCF001089_TD023
    ==================== =============== ============= ============ ================ =================


The column **original_name** must have the names of the samples as given in your data.

The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that are meaningful is a better choice, as we will take them to display the results.

The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).

The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.



**Running the analysis**



You can precise how you want your analysis to be executed, with the parameters:

- **datatypes** : the measures type(s) that you want to run

- **statistical_test** : choose, by type of measure, the specific statistical test to be applied.

 Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test
 t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html).

For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups.

- **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for considering a minimal acceptable "separation". A 'distance/span' == 1 is a perfect separation, whereas if 'distance/span' < 0 there is no separation. To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent)

- **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).

There exist hints on use that will guide you, next to the parameters.

The output consists of tables with the computed metrics, one by each pair of timepoints compared.
The number of output tables = number-of-conditions x (number-of-timepoints)-1 x number-of-compartments.

For more information about the implemented statistical tests, please visit:  https://github.com/cbib/DIMet/wiki/2-Statistical-tests

The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output

**Available data for testing**

You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent
files for you are located in the subfolders inside the data folder).
You can also use the minimal data examples from https://zenodo.org/record/10579891

 ]]>
    </help>
    <expand macro="citations" />
</tool>