Mercurial > repos > iuc > tracegroomer
view tracegroomer.xml @ 2:5147f08d67c9 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/Tracegroomer commit 26f0c7344995bfa302db821e77ae717636a4f4b2
| author | iuc |
|---|---|
| date | Tue, 03 Sep 2024 19:23:14 +0000 |
| parents | 3cb0128a59a6 |
| children |
line wrap: on
line source
<tool id="tracegroomer" name="tracegroomer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> <description> TraceGroomer is a solution to format and normalize files for DIMet tool </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ mkdir -p config && #if $sample_metadata_path: ln -s '$sample_metadata_path' ./sample_metadata.csv && #end if #if $type_of_file.type_of_file_selector == "rule_tsv": #if $variable_metadata_path: ln -s '$variable_metadata_path' ./variableMetadata.tsv && #end if #end if ln -s '$param_file' config/config.yaml && python3 -m tracegroomer -cf 'config/config.yaml' -lm '${labeled_metabo_file}' -tf '${type_of_file_selector}' --isosprop_min_admitted ${isosprop_min_admitted} #if $amount_material: --amountMaterial_path '${amount_material}' #end if #if $remove_these_metabolites: --remove_these_metabolites '${remove_these_metabolites}' #end if #if $advanced_options.use_internal_standard: --use_internal_standard ${use_internal_standard} #end if $alternative_div_amount_material $div_isotopologues_by_amount_material $isotopologues_preview $fractions_stomp_values $subtract_blankavg $under_detection_limit_set_nan -ox $output_files_extension && #if $type_of_file.type_of_file_selector == "IsoCor_out_tsv": cp -rf '${AbundanceCorrected}.${output_files_extension}' '${AbundanceCorrected}' && cp -rf '${MeanEnrichment13C}.${output_files_extension}' '${MeanEnrichment13C}' && cp -rf '${IsotopologuesProp}.${output_files_extension}' '${IsotopologuesProp}' && cp -rf '${IsotopologuesAbs}.${output_files_extension}' '${IsotopologuesAbs}' #else if $type_of_file.type_of_file_selector == "rule_tsv": cp -rf '${Absolute_isotopologue_abundances}.${output_files_extension}' '${Absolute_isotopologue_abundances}' #else if $type_of_file.type_of_file_selector == "generic_xlsx": cp -rf '${sheetname}.${output_files_extension}' '${isotopologuesCorrValues}' #else: cp -rf '${raw_abundance_sheet_name}.${output_files_extension}' '${rawAbundances}' && cp -rf '${frac_contribution_sheet_name}.${output_files_extension}' '${FracContribution_C}' && cp -rf '${corrected_isotopologues_sheet_name}.${output_files_extension}' '${CorrectedIsotopologues}' #end if ]]></command> <configfiles> <configfile name="param_file"><![CDATA[ groom_out_path: '.' metadata: sample_metadata #if $type_of_file.type_of_file_selector == "IsoCor_out_tsv": abundances: '${AbundanceCorrected}' mean_enrichment: '${MeanEnrichment13C}' isotopologue_proportions: '${IsotopologuesProp}' isotopologues: '${IsotopologuesAbs}' #end if #if $type_of_file.type_of_file_selector == "VIBMEC_xlsx": abundances: '${raw_abundance_sheet_name}' mean_enrichment: '${frac_contribution_sheet_name}' isotopologue_proportions: '${corrected_isotopologues_sheet_name}' isotopologues: null #end if #if $type_of_file.type_of_file_selector == "generic_xlsx": abundances: null mean_enrichment: null isotopologue_proportions: null isotopologues: '${sheetname}' #end if #if $type_of_file.type_of_file_selector == "rule_tsv": abundances: null mean_enrichment: null isotopologue_proportions: null isotopologues: '${Absolute_isotopologue_abundances}' variable_description : variableMetadata columns_variable_description: identifier: '${columns_variable_metadata_description.identifier}' compound: '${columns_variable_metadata_description.compound}' isotopologue_number : '${columns_variable_metadata_description.isotopologue_number}' #end if ]]></configfile> </configfiles> <inputs> <expand macro="input_datasets"/> <expand macro="advanced_options"/> </inputs> <outputs> <data label="Total metabolite abundances" name="AbundanceCorrected" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter> </data> <data label="Mean enrichment data" name="MeanEnrichment13C" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter> </data> <data label="Isotopologue proportional abundances" name="IsotopologuesProp" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter> </data> <data label="Absolute isotopologue abundances" name="IsotopologuesAbs" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'IsoCor_out_tsv'</filter> </data> <data label="Absolute isotopologue abundances for rule tsv" name="Absolute_isotopologue_abundances" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'rule_tsv'</filter> </data> <data label="Absolute isotopologue abundances for generic xlsx" name="isotopologuesCorrValues" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'generic_xlsx'</filter> </data> <data label="raw abundances for VIBMEC" name="rawAbundances" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'VIBMEC_xlsx'</filter> </data> <data label="Mean enrichment data for VIBMEC" name="FracContribution_C" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'VIBMEC_xlsx'</filter> </data> <data label="Isotopologue corrected abundances for VIBMEC" name="CorrectedIsotopologues" format="tabular"> <filter>type_of_file['type_of_file_selector'] == 'VIBMEC_xlsx'</filter> </data> </outputs> <tests> <test expect_num_outputs="4"> <param name="labeled_metabo_file" ftype="tabular" value="TRACER_IsoCor_out_example.tsv"/> <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_1.csv"/> <param name="type_of_file_selector" value="IsoCor_out_tsv"/> <section name="advanced_options"> <param name="isosprop_min_admitted" ftype="float" value='-0.5'/> <param name="alternative_div_amount_material" ftype="boolean" value='true'/> <param name="div_isotopologues_by_amount_material" ftype="boolean" value='true'/> <param name="isotopologues_preview" ftype="boolean" value='false'/> <param name="fractions_stomp_values" ftype="boolean" value='true'/> <param name="subtract_blankavg" ftype="boolean" value='true'/> <param name="under_detection_limit_set_nan" ftype="boolean" value='true'/> </section> <output file="AbundanceCorrected.tsv" name="AbundanceCorrected" ftype="tabular"/> <output file="IsotopologuesAbs.tsv" name="IsotopologuesAbs" ftype="tabular"/> <output file="IsotopologuesProp.tsv" name="IsotopologuesProp" ftype="tabular"/> <output file="MeanEnrichment13C.tsv" name="MeanEnrichment13C" ftype="tabular"/> </test> <test expect_num_outputs="1"> <param name="labeled_metabo_file" ftype="tabular" value="TRACER_dataMatrix.tsv"/> <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_2.tsv"/> <param name="variable_metadata_path" ftype="tabular" value="variableMetadata.tsv"/> <param name="type_of_file_selector" value="rule_tsv"/> <section name="advanced_options"> <param name="isosprop_min_admitted" ftype="float" value='-0.5'/> <param name="alternative_div_amount_material" ftype="boolean" value='true'/> <param name="div_isotopologues_by_amount_material" ftype="boolean" value='true'/> <param name="isotopologues_preview" ftype="boolean" value='false'/> <param name="fractions_stomp_values" ftype="boolean" value='true'/> <param name="subtract_blankavg" ftype="boolean" value='true'/> <param name="under_detection_limit_set_nan" ftype="boolean" value='true'/> </section> <output file="Absolute_isotopologue_abundances.tsv" name="Absolute_isotopologue_abundances" ftype="tabular"/> </test> <test expect_num_outputs="1"> <param name="labeled_metabo_file" ftype="xlsx" value="TRACER_generic_sheet.xlsx"/> <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_3.csv"/> <param name="type_of_file_selector" value="generic_xlsx"/> <section name="advanced_options"> <param name="isosprop_min_admitted" ftype="float" value='-0.5'/> <param name="alternative_div_amount_material" ftype="boolean" value='true'/> <param name="div_isotopologues_by_amount_material" ftype="boolean" value='true'/> <param name="isotopologues_preview" ftype="boolean" value='false'/> <param name="fractions_stomp_values" ftype="boolean" value='true'/> <param name="subtract_blankavg" ftype="boolean" value='true'/> <param name="under_detection_limit_set_nan" ftype="boolean" value='true'/> </section> <output file="isotopologuesCorrValues.tsv" name="isotopologuesCorrValues" ftype="tabular"/> </test> <test expect_num_outputs="3"> <param name="labeled_metabo_file" ftype="xlsx" value="TRACER_metabo_4.xlsx"/> <param name="sample_metadata_path" ftype="tabular" value="sampleMetadata_4.csv"/> <param name="type_of_file_selector" value="VIBMEC_xlsx"/> <section name="advanced_options"> <param name="isosprop_min_admitted" ftype="float" value='-0.5'/> <param name="alternative_div_amount_material" ftype="boolean" value='true'/> <param name="div_isotopologues_by_amount_material" ftype="boolean" value='true'/> <param name="isotopologues_preview" ftype="boolean" value='false'/> <param name="fractions_stomp_values" ftype="boolean" value='true'/> <param name="subtract_blankavg" ftype="boolean" value='true'/> <param name="under_detection_limit_set_nan" ftype="boolean" value='true'/> </section> <output file="rawAbundances.tsv" name="rawAbundances" ftype="tabular"/> <output file="CorrectedIsotopologues.tsv" name="CorrectedIsotopologues" ftype="tabular"/> <output file="FracContribution_C.tsv" name="FracContribution_C" ftype="tabular"/> </test> </tests> <help><![CDATA[ TraceGroomer is a solution for formatting and normalising Tracer metabolomics given file(s), to produce the tabular files which are ready for DIMet tool. Currently, the following input formats of Tracer (or Isotope-labeled) metabolomics measurements files are accepted: - IsoCor results (.tsv measurments file). - The dataMatrix with variableMetadata ("rule" format for TraceGroomer). - A 'generic' .xlsx measurements file, manually set by the user. - Results provided by the VIB Metabolomics Expertise Center. For any type of these supported inputs, TraceGroomer generates an independent file for: i) total metabolite abundances ii) Isotopologues iii) Isotopologues' proportions and iv) mean enrichment (a.k.a fractional contributions). When only Isotopologues' absolute values are provided, TraceGroomer generates all the other quantifications automatically! Fast, automatic formatting is performed, as well as the normalisation chosen by the user: whether by the amount of material and/or by an internal standard. Useful advanced options are offered. Note : this script does not correct for naturally occurring isotopologues. Your data must be already processed by another software that performs such correction (e.g. IsoCor, or another). **IsoCor results as input** A typical IsoCor results file is described in: https://isocor.readthedocs.io/en/latest/tutorials.html#output-files It consists of a .tsv file which has in columns the sample, metabolite, isotopologue and all quantifications, and the rows are in piled version (the samples are repeated vertically). Our script transforms specific columns of that file into tables. As the total metabolite abundance column is not present in the input data, the total abundance per metabolite is the automatic result of the sum, per metabolite, of Isotopologues' absolute values (see AbundanceCorrected below). So here the correspondances: ================================ ======================================== column in the IsoCor file TraceGroomer output filename ================================ ======================================== corrected_area IsotopologuesAbsolute isotopologue_fraction IsotopologuesProportions mean_enrichment MeanEnrichment - AbundanceCorrected ================================ ======================================== We provide the downloadable example from https://zenodo.org/records/10826303 **The "rule" input format** This corresponds to the set of "dataMatrix", "variableMetadata", and sample metadata files. Note that the last one is required for any input format, so its explanation is given in the section "Metadata File Information". The "dataMatrix" contains isotopologues absolute values organised in a tabular format, having a first column with the isotopologue unique identifiers and the "ID" header, whereas the sample names constitute the header of the rest of the columns. This means that isotopologues represent the rows, and samples represent the columns. The isotopologue identifiers must be unique. Example of a "dataMatrix" file: ====== ========= ========= ========= ========= ID sampleA1 sampleA2 sampleB1 sampleB2 ====== ========= ========= ========= ========= Peak1 0.54406 0.5299 0.6076 0.9352 Peak2 0.2715 0.3129 0.3139 0.0507 Peak3 0.1844 0.1571 0.0784 0.0142 ====== ========= ========= ========= ========= The "variableMetadata" provides at least three compulsory columns: ID (isotopologue unique identifiers), metabolite_name (metabolite annotation or chemical formula, being non-unique entries), and the number of labeled atoms as integer numbers. Example: ======== ================= ================ ID metabolite_name isotope_numeric ======== ================= ================ Peak1 Acetyl_CoA 0 Peak2 Acetyl_CoA 1 Peak3 Acetyl_CoA 2 ======== ================= ================ All these files must be in tab delimited .tsv format. We provide the downloadable example from Zenodo (https://zenodo.org/records/10826303). Note that when using this input, Tracegroomer automatically generates all the derived metrics (mean enrichment, isotopologue proportions, total abundances). **Other input formats** Please visit https://github.com/cbib/TraceGroomer/wiki/3-Supported-input-formats to get information about: - A 'generic' .xlsx measurements file, manually set by the user. - Results provided by the VIB Metabolomics Expertise Center (El-Maven results are shaped by VIB MEC team into a multi-sheet .xlsx file). For any type of input formats mentioned above, a samples' metadata file must be provided (section "Metadata File Information"). **Metadata File Information** Provide a tab-separated file that has the names of the samples in the first column and one header row. Column names must be exactly in this order: name_to_plot condition timepoint timenum compartment original_name Example **Metadata File**: ==================== =============== ============= ============ ================ ================= **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name** -------------------- --------------- ------------- ------------ ---------------- ----------------- Control_cell_T0-1 Control T0 0 cell MCF001089_TD01 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08 Control_med_T24-1 Control T24 24 med MCF001090_TD02 Control_med_T24-2 Control T24 24 med MCF001090_TD03 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05 Control_med_T0-1 Control T0 0 med MCF001090_TD06 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07 ==================== =============== ============= ============ ================ ================= The column **original_name** must have the names of the samples as given in your data. The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that are meaningful is a better choice, as we will take them to display the results. The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!). The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column. **Advanced options** These options allow the user to: - Normalize by the amount of material (number of cells, tissue weight): the user must provide a tab delimited file, with a header, having the first column with the same names as in metadata 'original_name', and the second column the quantitative information, without zeroes nor negative numbers. - Normalize by an internal standard (present in your data) at choice. - Print a preview of isotopologues values. Please visit our Wiki page (https://github.com/cbib/TraceGroomer/wiki) for more information, and how to get help. Details of advanced options available in https://github.com/cbib/TraceGroomer/wiki/4-Advanced-options. ]]> </help> <expand macro="citations" /> </tool>
