view COBRAxy/src/flux_simulation.xml @ 549:4c5fdcefce8e draft default tip

Uploaded
author francesco_lapi
date Wed, 29 Oct 2025 11:09:38 +0000
parents 5a73d813b1db
children
line wrap: on
line source

<tool id="fluxSimulation" name="Flux Simulation" version="2.0.0">
    
    <macros>
        <import>marea_macros.xml</import>
    </macros>

    <requirements>
        <requirement type="package" version="1.24.4">numpy</requirement>
        <requirement type="package" version="2.0.3">pandas</requirement>
        <requirement type="package" version="0.29.0">cobra</requirement>
        <requirement type="package" version="5.2.2">lxml</requirement>
        <requirement type="package" version="1.4.2">joblib</requirement>
        <requirement type="package" version="1.11">scipy</requirement>
    </requirements>

    <command detect_errors="exit_code">
        <![CDATA[
        python $__tool_directory__/flux_simulation.py
        --tool_dir $__tool_directory__

        --model_and_bounds $model_and_bounds.model_and_bounds
        
        #if $model_and_bounds.model_and_bounds == 'True':
            --model_upload $model_and_bounds.model_upload
            --input_file $input_paths_file
            --name_file $input_names_file
        #else:
            --input_file $model_paths_file
            --name_file $model_names_file
        #end if
        
        --sampling_enabled $sampling_params.sampling_enabled
        
        #if $sampling_params.sampling_enabled == 'true':
            --thinning 0
            #if $sampling_params.algorithm_param.algorithm == 'OPTGP':
                --thinning $sampling_params.algorithm_param.thinning
            #end if
            --algorithm $sampling_params.algorithm_param.algorithm
            --n_batches $sampling_params.n_batches
            --n_samples $sampling_params.n_samples
            --seed $sampling_params.seed
            --output_type "${",".join(map(str, $sampling_params.output_types))}"
        #else:
            --thinning 0
            --algorithm 'CBS'
            --n_batches 1
            --n_samples 1
            --seed 0
            --output_type 'mean'
        #end if
        
        #if $output_types_analysis:
            --output_type_analysis "${",".join(map(str, $output_types_analysis))}"
        #end if
        
        #if 'FVA' in str($output_types_analysis):
            --perc_opt $fva_params.optimality_fraction
        #end if
        
        --out_log $log

        #if $sampling_params.sampling_enabled == 'true':
            #if 'mean' in str($sampling_params.output_types):
                --out_mean $mean_results
            #end if
            #if 'median' in str($sampling_params.output_types):
                --out_median $median_results
            #end if
            #if 'quantiles' in str($sampling_params.output_types):
                --out_quantiles $quantile_results
            #end if
        #end if
        
        #if 'FVA' in str($output_types_analysis):
            --out_fva $fva_results
        #end if
        #if 'pFBA' in str($output_types_analysis):
            --out_pfba $pFBA_results
        #end if
        #if 'sensitivity' in str($output_types_analysis):
            --out_sensitivity $sensitivity_results
        #end if
        ]]>
    </command>

    <configfiles>
        <configfile name="input_paths_file"><![CDATA[#if $model_and_bounds.model_and_bounds == 'True'
#for $input_temp in $model_and_bounds.inputs
${input_temp}
#end for
#end if]]></configfile>
        
        <configfile name="input_names_file"><![CDATA[#if $model_and_bounds.model_and_bounds == 'True'
#for $input_temp in $model_and_bounds.inputs
${input_temp.element_identifier}
#end for
#end if]]></configfile>
        
        <configfile name="model_paths_file"><![CDATA[#if $model_and_bounds.model_and_bounds == 'False'
#for $input_temp in $model_and_bounds.model_files
${input_temp}
#end for
#end if]]></configfile>
        
        <configfile name="model_names_file"><![CDATA[#if $model_and_bounds.model_and_bounds == 'False'
#for $input_temp in $model_and_bounds.model_files
${input_temp.element_identifier}
#end for
#end if]]></configfile>
    </configfiles>

    <inputs>
        <conditional name="model_and_bounds">
            <param name="model_and_bounds" argument="--model_and_bounds" type="select" label="Input format:" help="Choose whether to upload the model and bounds in separate files or to upload multiple complete model files.">
                <option value="True" selected="true">Model + bounds (separate files)</option>
                <option value="False">Multiple complete models</option>
            </param>

            <when value="True">
                <param name="model_upload" argument="--model_upload" type="data" format="csv,tsv,tabular"
                    label="Model tabular:"
                    help="Upload a CSV/TSV file that contains the model reaction rules generated by the Import Metabolic Model tool. Recommended columns: ReactionID, Reaction (formula), Rule (GPR). Optional columns: name, lower_bound, upper_bound, InMedium. If bounds are present here they may be overridden by separate bound files." />

                <param name="inputs" argument="--inputs" multiple="true" type="data" format="tabular,csv,tsv"
                    label="Bound file(s):"
                    help="Upload one or more CSV/TSV files containing reaction bounds generated by the Ras2Bounds tool. Each file must include at least: ReactionID, lower_bound, upper_bound. Files are applied in the order provided; later files override earlier ones for the same ReactionID." />
            </when>

            <when value="False">
                <param name="model_files" argument="--model_files" multiple="true" type="data" format="csv,tsv,tabular"
                    label="Complete Model tabular:"
                    help="Upload one or more CSV/TSV files, each containing both model rules and reaction bounds for different contexts/cells. Required columns: ReactionID, Reaction, Rule, lower_bound, upper_bound." />
            </when>
        </conditional>

        <conditional name="sampling_params">
            <param name="sampling_enabled" argument="--sampling_enabled" type="boolean" display="checkboxes" checked="false" label="Enable sampling" help="Enable flux sampling"/>
            
            <when value="true">
                <conditional name="algorithm_param">
                    <param name="algorithm" argument="--algorithm" type="select" label="Choose sampling algorithm:">
                        <option value="CBS" selected="true">CBS</option>
                        <option value="OPTGP">OPTGP</option>
                    </param>
                    <when value="OPTGP">
                        <param name="thinning" argument="--thinning" type="integer" label="Thinning:" value="100" help="Number of iterations to wait before taking a sample."/>
                    </when>
                </conditional>

                <param name="n_samples" argument="--n_samples" type="integer" label="Samples:" value="1000" min="1" max="1000"/>
                <param name="n_batches" argument="--n_batches" type="integer" label="Batches:" value="1" help="This is useful for computational performances."/>
                <param name="seed" argument="--seed" type="integer" label="Seed:" value="0" help="Random seed."/>

                <param type="select" argument="--output_types" multiple="true" name="output_types" label="Choose outputs from sampling">
                    <option value="mean" selected="true">Mean</option>
                    <option value="median" selected="true">Median</option>
                    <option value="quantiles" selected="true">Quantiles</option>
                    <option value="fluxes" selected="false">All fluxes</option>
                </param>
            </when>
            
            <when value="false">
                <!-- Hidden parameters when sampling is disabled -->
                <param name="algorithm" type="hidden" value="CBS"/>
                <param name="n_samples" type="hidden" value="1000"/>
                <param name="n_batches" type="hidden" value="1"/>
                <param name="seed" type="hidden" value="0"/>
                <param name="output_types" type="hidden" value="mean"/>
            </when>
        </conditional>

        <param type="select" argument="--output_types_analysis" multiple="true" name="output_types_analysis" label="Choose outputs from optimization">
            <option value="FVA" selected="true">FVA</option>
            <option value="pFBA" selected="false">pFBA</option>
            <option value="sensitivity" selected="false">Sensitivity reaction knock-out (Biomass)</option>
        </param>

        <conditional name="fva_params">
            <param name="show_fva_options" type="boolean" display="checkboxes" checked="false" label="Configure FVA parameters" help="Show additional FVA configuration options"/>
            <when value="true">
                <param name="optimality_fraction" argument="--fva_optimality" type="float" label="FVA Optimality (fraction):" value="0.90" min="0.0" max="1.0" 
                    help="Fraction of optimality for FVA analysis. 1.0 means the flux must be optimal, lower values allow suboptimal solutions."/>
            </when>
            <when value="false">
                <param name="optimality_fraction" argument="--fva_optimality" type="hidden" value="1.0"/>
            </when>
        </conditional>

    </inputs>

    <outputs>
        <data format="txt" name="log" label="Flux Simulation - Log" />
        
        <collection name="fluxes_results" type="list" label="Fluxes">
            <filter>sampling_params['sampling_enabled'] and 'fluxes' in sampling_params['output_types']</filter>
            <discover_datasets name="collection" pattern="__name_and_ext__" directory="flux_simulation"/>
        </collection>
        
        <data format="tabular" name="mean_results" label="Mean">
            <filter>sampling_params['sampling_enabled'] and 'mean' in sampling_params['output_types']</filter>
        </data>
        
        <data format="tabular" name="median_results" label="Median">
            <filter>sampling_params['sampling_enabled'] and 'median' in sampling_params['output_types']</filter>
        </data>
        
        <data format="tabular" name="quantile_results" label="Quantiles">
            <filter>sampling_params['sampling_enabled'] and 'quantiles' in sampling_params['output_types']</filter>
        </data>
        
        <data format="tabular" name="fva_results" label="FVA">
            <filter>output_types_analysis and 'FVA' in output_types_analysis</filter>
        </data>
        
        <data format="tabular" name="pFBA_results" label="pFBA">
            <filter>output_types_analysis and 'pFBA' in output_types_analysis</filter>
        </data>
        
        <data format="tabular" name="sensitivity_results" label="Sensitivity">
            <filter>output_types_analysis and 'sensitivity' in output_types_analysis</filter>
        </data>
    </outputs>

    <help>
    <![CDATA[
Overview
-------------

This tool generates flux samples from a set of metabolic models. It supports two input modes:
    - Model + bounds: upload a single base model and multiple bound files (one per context/cell type)
    - Multiple complete models: upload multiple model files, each already containing its own bounds.

In the first mode:
    - the base model must be in tabular format as produced by the Import Metabolic Model tool.
    - the bound files should come from the RAStoBounds tool.

In the second mode, provide a separate tabular file for each complete model.

Available analyses
------------- 

Two types of analysis are available:
    - flux optimization
    - flux sampling

For flux optimization, one of the following methods can be performed:
    - parsimonious-FBA
    - Flux Variability Analysis
    - Biomass sensitivity analysis (single reaction knock-out)
The objective function — a linear combination of fluxes weighted by specific coefficients — depends on the provided metabolic network.

For flux sampling, one of the following methods can be performed: 
    - CBS (Corner-based sampling)
    - OPTGP (Improved Artificial Centering Hit-and-Run sampler) algorithms.


Output:
-------------

The tool produces:
   - Samples: reporting the sampled fluxes for each reaction (reaction names on the rows and sample names on the columns). Format: tab-separated.
   - a log file (.txt).

The sampled fluxes can be reported as full datasets or summarized using descriptive statistics such as mean, median, and quantiles (0.25, 0.50, 0.75), computed for each model.

**TIP**: Bounds generated by RAStoBound are grouped in a collection. You can select collections by clicking on "Dataset Collection" option in the "Bound file(s):" input parameter.

**TIP**: The Batches parameter helps maintain memory efficiency. For example, for 10,000 samples, use n_samples=1,000 and n_batches=10.

**TIP**: Adjust the Thinning parameter to improve convergence toward the stationary distribution. 
Common thinning values are 100 or 1000.Note that if you set thinning = 100 and request 100 samples, the total number of generated flux samples will be 100 * 100.

**TIP**: FVA optimality percentage allows you to explore suboptimal flux ranges. 100% restricts to optimal solutions, while lower values (e.g., 90%) allow broader flux ranges.
]]>
    </help>
    <expand macro="citations_fluxes" />
</tool>