Mercurial > repos > iuc > tracegroomer

<tool id="tracegroomer" name="tracegroomer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
    <description>
        TraceGroomer is a solution to format and normalize files for DIMet tool
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        mkdir -p config &&
        #if $sample_metadata_path:
            ln -s '$sample_metadata_path' ./sample_metadata.csv &&
        #end if
        #if $type_of_file.type_of_file_selector == "rule_tsv":
            #if $variable_metadata_path:
                ln -s '$variable_metadata_path' ./variableMetadata.tsv &&
            #end if
        #end if
        ln -s '$param_file' config/config.yaml &&
        python3 -m tracegroomer
            -cf 'config/config.yaml'
            -lm '${labeled_metabo_file}'
            -tf '${type_of_file_selector}'
            --isosprop_min_admitted ${isosprop_min_admitted}
            #if $amount_material:
                --amountMaterial_path '${amount_material}'
            #end if
            #if $remove_these_metabolites:
                --remove_these_metabolites '${remove_these_metabolites}'
            #end if
            #if $advanced_options.use_internal_standard:
                --use_internal_standard ${use_internal_standard}
            #end if
            $alternative_div_amount_material
            $div_isotopologues_by_amount_material
            $isotopologues_preview
            $fractions_stomp_values
            $subtract_blankavg
            $under_detection_limit_set_nan
            -ox $output_files_extension &&
            #if $type_of_file.type_of_file_selector == "IsoCor_out_tsv":
                cp -rf '${AbundanceCorrected}.${output_files_extension}' '${AbundanceCorrected}' &&
                cp -rf '${MeanEnrichment13C}.${output_files_extension}' '${MeanEnrichment13C}' &&
                cp -rf '${IsotopologuesProp}.${output_files_extension}' '${IsotopologuesProp}' &&
                cp -rf '${IsotopologuesAbs}.${output_files_extension}' '${IsotopologuesAbs}'
            #else if $type_of_file.type_of_file_selector == "rule_tsv":
                cp -rf '${Absolute_isotopologue_abundances}.${output_files_extension}' '${Absolute_isotopologue_abundances}'
            #else if $type_of_file.type_of_file_selector == "generic_xlsx":
                cp -rf '${sheetname}.${output_files_extension}' '${isotopologuesCorrValues}'
            #else:
                cp -rf '${raw_abundance_sheet_name}.${output_files_extension}' '${rawAbundances}' &&
                cp -rf '${frac_contribution_sheet_name}.${output_files_extension}' '${FracContribution_C}' &&
                cp -rf '${corrected_isotopologues_sheet_name}.${output_files_extension}' '${CorrectedIsotopologues}'
            #end if

    ]]></command>

    <configfiles>
        <configfile name="param_file"><![CDATA[
            groom_out_path: '.'
            metadata: sample_metadata
            #if $type_of_file.type_of_file_selector == "IsoCor_out_tsv":
            abundances: '${AbundanceCorrected}'
            mean_enrichment: '${MeanEnrichment13C}'
            isotopologue_proportions: '${IsotopologuesProp}'
            isotopologues: '${IsotopologuesAbs}'
            #end if

            #if $type_of_file.type_of_file_selector == "VIBMEC_xlsx":
            abundances: '${raw_abundance_sheet_name}'
            mean_enrichment: '${frac_contribution_sheet_name}'
            isotopologue_proportions: '${corrected_isotopologues_sheet_name}'
            isotopologues: null
            #end if

            #if $type_of_file.type_of_file_selector == "generic_xlsx":
            abundances: null
            mean_enrichment: null
            isotopologue_proportions: null
            isotopologues: '${sheetname}'
            #end if

            #if $type_of_file.type_of_file_selector == "rule_tsv":
            abundances: null
            mean_enrichment: null
            isotopologue_proportions: null
            isotopologues: '${Absolute_isotopologue_abundances}'
            variable_description : variableMetadata
            columns_variable_description:
                identifier: '${columns_variable_metadata_description.identifier}'
                compound: '${columns_variable_metadata_description.compound}'
                isotopologue_number : '${columns_variable_metadata_description.isotopologue_number}'
            #end if
        ]]></configfile>
    </configfiles>
    <inputs>
        <expand macro="input_datasets"/>
        <expand macro="advanced_options"/>
    </inputs>
    <outputs>
        <data label="Total metabolite abundances" name="AbundanceCorrected" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter>
        </data>
        <data label="Mean enrichment data" name="MeanEnrichment13C" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter>
        </data>
        <data label="Isotopologue proportional abundances" name="IsotopologuesProp" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter>
        </data>
        <data label="Absolute isotopologue abundances" name="IsotopologuesAbs" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter>
        </data>
        <data label="Absolute isotopologue abundances for rule tsv" name="Absolute_isotopologue_abundances" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'rule_tsv'</filter>
        </data>
        <data label="Absolute isotopologue abundances for generic xlsx" name="isotopologuesCorrValues" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'generic_xlsx'</filter>
        </data>
        <data label="raw abundances for VIBMEC" name="rawAbundances" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'VIBMEC_xlsx'</filter>
        </data>
        <data label="Mean enrichment data for VIBMEC" name="FracContribution_C" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'VIBMEC_xlsx'</filter>
        </data>
        <data label="Isotopologue corrected abundances for VIBMEC" name="CorrectedIsotopologues" format="tabular">
            <filter>type_of_file['type_of_file_selector'] == 'VIBMEC_xlsx'</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="4">
            <param name="labeled_metabo_file" ftype="tabular" value="TRACER_IsoCor_out_example.tsv"/>
            <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_1.csv"/>
            <param name="type_of_file_selector" value="IsoCor_out_tsv"/>
            <section name="advanced_options">
                <param name="isosprop_min_admitted" ftype="float"  value='-0.5'/>
                <param name="alternative_div_amount_material" ftype="boolean" value='true'/>
                <param name="div_isotopologues_by_amount_material" ftype="boolean" value='true'/>
                <param name="isotopologues_preview" ftype="boolean" value='false'/>
                <param name="fractions_stomp_values" ftype="boolean" value='true'/>
                <param name="subtract_blankavg" ftype="boolean" value='true'/>
                <param name="under_detection_limit_set_nan" ftype="boolean"  value='true'/>
            </section>
            <output file="AbundanceCorrected.tsv" name="AbundanceCorrected" ftype="tabular"/>
            <output file="IsotopologuesAbs.tsv" name="IsotopologuesAbs" ftype="tabular"/>
            <output file="IsotopologuesProp.tsv" name="IsotopologuesProp" ftype="tabular"/>
            <output file="MeanEnrichment13C.tsv" name="MeanEnrichment13C" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <param name="labeled_metabo_file" ftype="tabular" value="TRACER_dataMatrix.tsv"/>
            <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_2.tsv"/>
            <param name="variable_metadata_path" ftype="tabular" value="variableMetadata.tsv"/>
            <param name="type_of_file_selector" value="rule_tsv"/>
            <section name="advanced_options">
                <param name="isosprop_min_admitted" ftype="float"  value='-0.5'/>
                <param name="alternative_div_amount_material" ftype="boolean"  value='true'/>
                <param name="div_isotopologues_by_amount_material" ftype="boolean"  value='true'/>
                <param name="isotopologues_preview" ftype="boolean"  value='false'/>
                <param name="fractions_stomp_values" ftype="boolean"  value='true'/>
                <param name="subtract_blankavg" ftype="boolean"  value='true'/>
                <param name="under_detection_limit_set_nan" ftype="boolean"  value='true'/>
            </section>
            <output file="Absolute_isotopologue_abundances.tsv" name="Absolute_isotopologue_abundances" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <param name="labeled_metabo_file" ftype="xlsx" value="TRACER_generic_sheet.xlsx"/>
            <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_3.csv"/>
            <param name="type_of_file_selector" value="generic_xlsx"/>
            <section name="advanced_options">
                <param name="isosprop_min_admitted" ftype="float"  value='-0.5'/>
                <param name="alternative_div_amount_material" ftype="boolean"  value='true'/>
                <param name="div_isotopologues_by_amount_material" ftype="boolean"  value='true'/>
                <param name="isotopologues_preview" ftype="boolean"  value='false'/>
                <param name="fractions_stomp_values" ftype="boolean"  value='true'/>
                <param name="subtract_blankavg" ftype="boolean"  value='true'/>
                <param name="under_detection_limit_set_nan" ftype="boolean"  value='true'/>
            </section>
            <output file="isotopologuesCorrValues.tsv" name="isotopologuesCorrValues" ftype="tabular"/>
        </test>
        <test expect_num_outputs="3">
            <param name="labeled_metabo_file" ftype="xlsx" value="TRACER_metabo_4.xlsx"/>
            <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_4.csv"/>
            <param name="type_of_file_selector" value="VIBMEC_xlsx"/>
            <section name="advanced_options">
                <param name="isosprop_min_admitted" ftype="float"  value='-0.5'/>
                <param name="alternative_div_amount_material" ftype="boolean"  value='true'/>
                <param name="div_isotopologues_by_amount_material" ftype="boolean"  value='true'/>
                <param name="isotopologues_preview" ftype="boolean"  value='false'/>
                <param name="fractions_stomp_values" ftype="boolean"  value='true'/>
                <param name="subtract_blankavg" ftype="boolean"  value='true'/>
                <param name="under_detection_limit_set_nan" ftype="boolean"  value='true'/>
            </section>
            <output file="rawAbundances.tsv" name="rawAbundances" ftype="tabular"/>
            <output file="CorrectedIsotopologues.tsv" name="CorrectedIsotopologues" ftype="tabular"/>
            <output file="FracContribution_C.tsv" name="FracContribution_C" ftype="tabular"/>
        </test>
    </tests>
    <help><![CDATA[
TraceGroomer is a solution for formatting and normalising Tracer metabolomics given file(s), to produce the tabular files which are ready for DIMet tool.

Currently, the following input formats of Tracer (or Isotope-labeled) metabolomics measurements files are accepted:

   - IsoCor results (.tsv measurments file).
   - The dataMatrix with variableMetadata ("rule" format for TraceGroomer).
   - A 'generic' .xlsx measurements file, manually set by the user.
   - Results provided by the VIB Metabolomics Expertise Center.

For any type of these supported inputs, TraceGroomer generates an independent file for: i) total metabolite abundances ii) Isotopologues iii) Isotopologues' proportions and iv) mean enrichment (a.k.a fractional contributions). When only Isotopologues' absolute values are provided, TraceGroomer generates all the other quantifications automatically!

Fast, automatic formatting is performed, as well as the normalisation chosen by the user: whether by the amount of material and/or by an internal standard. Useful advanced options are offered.

Note : this script does not correct for naturally occurring isotopologues. Your data must be already processed by another software that performs such correction (e.g. IsoCor, or another).


**IsoCor results as input**

A typical IsoCor results file is described in: https://isocor.readthedocs.io/en/latest/tutorials.html#output-files It consists of a .tsv file which has in columns the sample, metabolite, isotopologue and all quantifications, and the rows are in piled version (the samples are repeated vertically).

Our script transforms specific columns of that file into tables. As the total metabolite abundance column is not present in the input data, the total abundance per metabolite is the automatic result of the sum, per metabolite, of Isotopologues' absolute values (see AbundanceCorrected below). So here the correspondances:

================================ ========================================
column in the IsoCor file        TraceGroomer output filename
================================ ========================================
corrected_area 	                 IsotopologuesAbsolute
isotopologue_fraction 	         IsotopologuesProportions
mean_enrichment 	             MeanEnrichment
- 	                             AbundanceCorrected
================================ ========================================

We provide the downloadable example from https://zenodo.org/records/10826303


**The "rule" input format**

This corresponds to the set of "dataMatrix", "variableMetadata", and sample metadata files. Note that the last one is required for any input format, so its explanation is given in the section "Metadata File Information".

The "dataMatrix" contains  isotopologues absolute values organised in a tabular format, having a first column with the isotopologue unique identifiers and the "ID" header, whereas the sample names constitute the header of the rest of the columns. This means that isotopologues represent the rows, and samples represent the columns. The isotopologue identifiers must be unique. Example of a "dataMatrix" file:

====== ========= ========= ========= =========
ID     sampleA1  sampleA2  sampleB1  sampleB2
====== ========= ========= ========= =========
Peak1  0.54406   0.5299    0.6076    0.9352
Peak2  0.2715    0.3129    0.3139    0.0507
Peak3  0.1844    0.1571    0.0784    0.0142
====== ========= ========= ========= =========

The "variableMetadata" provides at least three compulsory columns: ID (isotopologue unique identifiers), metabolite_name (metabolite annotation or chemical formula, being non-unique entries), and the number of labeled atoms as integer numbers. Example:

======== ================= ================
ID       metabolite_name   isotope_numeric
======== ================= ================
Peak1    Acetyl_CoA        0
Peak2    Acetyl_CoA        1
Peak3    Acetyl_CoA        2
======== ================= ================


All these files must be in tab delimited .tsv format. We provide the downloadable example from Zenodo (https://zenodo.org/records/10826303).


Note that when using this input, Tracegroomer automatically generates all the derived metrics (mean enrichment, isotopologue proportions, total abundances).


**Other input formats**


Please visit https://github.com/cbib/TraceGroomer/wiki/3-Supported-input-formats to get information about:
   - A 'generic' .xlsx measurements file, manually set by the user.
   - Results provided by the VIB Metabolomics Expertise Center (El-Maven results are shaped by VIB MEC team into a multi-sheet .xlsx file).


For any type of input formats mentioned above, a samples' metadata file must be provided (section "Metadata File Information").


**Metadata File Information**

Provide a tab-separated file that has the names of the samples in the first column and one header row.
Column names must be exactly in this order:

   name_to_plot
   condition
   timepoint
   timenum
   compartment
   original_name

Example **Metadata File**:


    ==================== =============== ============= ============ ================ =================
    **name_to_plot**     **condition**   **timepoint** **timenum**  **compartment**   **original_name**
    -------------------- --------------- ------------- ------------ ---------------- -----------------
    Control_cell_T0-1    Control         T0            0            cell             MCF001089_TD01
    Control_cell_T0-2    Control         T0            0            cell             MCF001089_TD02
    Tumoral_cell_T0-1    Tumoral         T0            0            cell             MCF001089_TD04
    Tumoral_cell_T0-2    Tumoral         T0            0            cell             MCF001089_TD05
    Tumoral_cell_T24-1   Tumoral         T24           24           cell             MCF001089_TD07
    Tumoral_cell_T24-2   Tumoral         T24           24           cell             MCF001089_TD08
    Control_med_T24-1    Control         T24           24           med              MCF001090_TD02
    Control_med_T24-2    Control         T24           24           med              MCF001090_TD03
    Tumoral_med_T24-1    Tumoral         T24           24           med              MCF001090_TD04
    Tumoral_med_T24-2    Tumoral         T24           24           med              MCF001090_TD05
    Control_med_T0-1     Control         T0            0            med              MCF001090_TD06
    Tumoral_med_T0-1     Tumoral         T0            0            med              MCF001090_TD07
    ==================== =============== ============= ============ ================ =================


The column **original_name** must have the names of the samples as given in your data.

The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
are meaningful is a better choice, as we will take them to display the results.

The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).

The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.


**Advanced options**

These options allow the user to:

    - Normalize by the amount of material (number of cells, tissue weight): the user must provide a tab delimited file, with a header, having the first column with the same names as in metadata 'original_name', and the second column the quantitative information, without zeroes nor negative numbers.
    - Normalize by an internal standard (present in your data) at choice.
    - Print a preview of isotopologues values.

Please visit our Wiki page (https://github.com/cbib/TraceGroomer/wiki) for more information, and how to get help. Details of advanced options available in https://github.com/cbib/TraceGroomer/wiki/4-Advanced-options.

 ]]>
    </help>
    <expand macro="citations" />
</tool>
author	iuc
date	Tue, 03 Sep 2024 19:23:14 +0000
parents	3cb0128a59a6
children