view XSeekerPreparator.xml @ 20:ce94e7a141bb draft default tip

" master branch Updating"
author lain
date Tue, 06 Dec 2022 10:18:10 +0000
parents 2937e72e5891
children
line wrap: on
line source

<tool id="xseeker_preparator" name="XSeeker Preparator" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
    <description>prepares RData file from XCMS+CAMERA for XSeeker</description>
    <macros>
        <token name="@VERSION@">1.3.1</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <edam_operations>
        <edam_operation>operation_1812</edam_operation>
        <edam_operation>operation_0335</edam_operation>
    </edam_operations>
    <requirements>
        <requirement type="package" version="4.1.3">r-base</requirement>
        <requirement type="package" version="3.14.0">bioconductor-xcms</requirement>
        <requirement type="package" version="1.48.0">bioconductor-camera</requirement>
        <requirement type="package" version="2.29.2">git</requirement>
        <requirement type="package" version="1.2.3">r-blob</requirement>
        <requirement type="package" version="1.1.3">r-dbi</requirement>
        <requirement type="package" version="0.9.8">r-fst</requirement>
        <requirement type="package" version="1.6.6">r-optparse</requirement>
        <requirement type="package" version="1.4.0">r-stringr</requirement>
        <requirement type="package" version="0.3.5">r-purrr</requirement>
        <requirement type="package" version="2.2.18">r-rsqlite</requirement>
        <requirement type="package" version="0.2.0">r-dbmodelr</requirement>
    </requirements>
    <stdio>
        <exit_code 
            range="1"
            level="warning"
            description="Selected samples have no data associated to them."
        />
        <exit_code 
            range="2"
            level="warning"
            description="Some samples have no data associated to them."
        />
    </stdio>
    <version_command>
        Rscript '$__tool_directory__/XSeekerPreparator.R' -v
    </version_command>
    <command>
        <![CDATA[
            Rscript '$__tool_directory__/XSeekerPreparator.R'
                -P
                --input '$input'
                --output '$output'
                #if $samples.selected
                    --samples '${",".join($samples.selected)}'
                #end if
                #if $database.archetypes
                    --archetype '${",".join($database.archetypes)}'
                #end if
                #if $database.base.kind == "tabular"
                    --compounds-csv '${database.base.tabular}'
                #else if $database.base.kind == "sql"
                    --database '${database.base.sql}'
                #end if
                #if $database.models.kind == "default"
                    --models '${base_config}'
                #else
                    --models '${database.models.url}'
                #end if
                #if $class
                    --class '${class}'
                #end if
        ]]>
    </command>
    <configfiles>
        <configfile name="base_config">
<![CDATA[
tryCatch({
    DBModelR::ModelDefinition(table="yui", fields=list(yui="INTEGER"))
}, error=function(e) {
    stop("Please, install DBModelR before you source this file.")
})
list(
    adduct = DBModelR::ModelDefinition(
        table = "adduct",
        fields = list(
            name = "TEXT",
            mass = "FLOAT",
            charge = "INTEGER",
            multi = "INTEGER",
            formula_add = "TEXT",
            formula_ded = "TEXT",
            sign = "TEXT",
            oidscore = "INTEGER",
            quasi = "INTEGER",
            ips = "FLOAT"
        )
    ),
    cluster = DBModelR::ModelDefinition(
        table = "cluster",
        fields = list(
            clusterID = "INTEGER",
            formula = "TEXT",
            annotation = "TEXT",
            coeff = "FLOAT",
            r_squared = "FLOAT",
            charge = "INTEGER",
            mean_rt = "FLOAT",
            score = "FLOAT",
            deviation = "FLOAT",
            status = "TEXT",
            # adduct = "TEXT",
            curent_group = "INTEGER",
            pc_group = "INTEGER",
            align_group = "INTEGER",
            xcms_group = "INTEGER"
        ),
        one = list("compound", "adduct"),
        many = list("sample")
    ),
    compound = DBModelR::ModelDefinition(
        table = "compound",
        fields = list(
            name = "TEXT",
            common_name = "TEXT",
            formula = "TEXT",
            charge = "INTEGER",
            date = "TEXT",
            mz = "FLOAT"
        )
    ),
    feature = DBModelR::ModelDefinition(
        table = "feature",
        fields = list(
            featureID = "INTEGER",
            mz = "FLOAT",
            mz_min = "FLOAT",
            mz_max = "FLOAT",
            rt = "FLOAT",
            rt_min = "FLOAT",
            rt_max = "FLOAT",
            int_o = "FLOAT",
            int_b = "FLOAT",
            max_o = "FLOAT",
            iso = "TEXT",
            abundance = "FLOAT"
        ),
        one = list("cluster"),
        many = list("sample")
    ),
    instrument = DBModelR::ModelDefinition(
        table = "instrument",
        fields = list(
            model = "TEXT",
            manufacturer = "TEXT",
            analyzer = "TEXT",
            detector_type = "TEXT",
            ion_source = "TEXT"
        )
    ),
    instrument_config = DBModelR::ModelDefinition(
        table = "instrument_config",
        fields = list(
            resolution = "TEXT",
            agc_target = "TEXT",
            maximum_IT = "TEXT",
            number_of_scan_range = "TEXT",
            scan_range = "TEXT",
            version = "TEXT"
        )
    ),
    project = DBModelR::ModelDefinition(
        table = "project",
        fields = list(
            name = "TEXT",
            comment = "TEXT"
        ),
        one = list("sample")
    ),
    sample = DBModelR::ModelDefinition(
        table = "sample",
        fields = list(
            name = "TEXT",
            path = "TEXT",
            polarity = "TEXT",
            kind = "TEXT", ## rdata or mxml or enriched_rdata
            raw = "BLOB"
        ),
        one = list(
            "peak_picking_parameters",
            "pairing_parameters",
            "alignmenmt_parameters",
            "camera_parameters",
            "instrument",
            "instrument_config",
            "software",
            "smol_xcms_set"
        )
    ),
    smol_xcms_set = DBModelR::ModelDefinition(
        table = "smol_xcms_set",
        fields = list(
            raw = "BLOB"
        )
    ),
    software = DBModelR::ModelDefinition(
        table = "software",
        fields = list(
            name = "TEXT",
            version = "TEXT"
        )
    ),
    peak_picking_parameters = DBModelR::ModelDefinition(
        table = "peak_picking_parameters",
        fields = list(
            ppm = "FLOAT",
            peakwidth = "TEXT",
            snthresh = "TEXT",
            prefilterStep = "TEXT",
            prefilterLevel = "TEXT",
            mzdiff = "TEXT",
            fitgauss = "TEXT",
            noise = "TEXT",
            mzCenterFun = "TEXT",
            integrate = "INTEGER",
            firstBaselineCheck = "TEXT",
            snthreshIsoROIs = "TEXT",
            maxCharge = "INTEGER",
            maxIso = "INTEGER",
            mzIntervalExtension = "TEXT"
        )
    ),
    alignmenmt_parameters = DBModelR::ModelDefinition(
        table = "alignmenmt_parameters",
        fields = list(
            binSize = "TEXT",
            centerSample = "TEXT",
            response = "TEXT",
            distFun = "TEXT",
            gapInit = "TEXT",
            gapExtend = "TEXT",
            factorDiag = "TEXT",
            factorGap = "TEXT",
            localAlignment = "INTEGER",
            initPenalty = "TEXT",
            bw = "TEXT",
            minFraction = "TEXT",
            minSamples = "TEXT",
            maxFeatures = "TEXT"
        )
    )
)
            ]]>
        </configfile>
    </configfiles>
    <inputs>
        <param 
            argument="input"
            type="data"
            multiple="false"
            optional="false"
            format="rdata"
            label="Rdata to prepare"
            help="
                This rdata must be produced by a xcms+camera processing
                and original files must still be in the history.
            "
        >
        </param>
        <param
            argument="class"
            type="text"
            value=""
            label="Column class name"
            help="
                The name of the column containing the classes - 
                leave empty to let XSeeker Preparator guess
            "
            optional="true"
        >
        </param>
        <section name="samples" title="Samples Options" expanded="false">
            <param 
                name="selected"
                type="data"
                multiple="true"
                label="Samples to visualize"
                optional="true"
                format="mzml"
            >
            </param>
        </section>
        <section name="database" title="Database Options" expanded="false">
            <param
                name="archetypes"
                type="select"
                multiple="true"
                label="Molecule family (for database's compounds enrichment)"
            >
                <option value="G" selected="true">General</option>
                <option value="H">Halogenates</option>
            </param>
            <conditional name="base">
                <param name="kind" type="select" label="File containing compound's type">
                    <option value="none" selected="true">None (default)</option>
                    <option value="tabular">tabular</option>
                    <option value="sql">sql</option>
                </param>
                <when value="none" />
                <when value="sql" />
                <when value="tabular">
                    <param
                        name="tabular"
                        type="data"
                        multiple="true"
                        label="Tabular file containing compound to use in XSeeker"
                        optional="true"
                        format="tabular"
                    >
                    </param>
                </when>
                <when value="sql">
                    <param
                        name="sql"
                        type="data"
                        multiple="true"
                        label="SQL file containing compound to use in XSeeker"
                        optional="true"
                        format="sql"
                    >
                    </param>
                </when>
            </conditional>
            <conditional name="models">
                <param name="kind" type="select" label="How is the database's model defined">
                    <option value="default" selected="true">Default (regular XSeeker Database)</option>
                    <option value="url">Download model file</option>
                    <option value="git">Get versionned model file</option>
                </param>
                <when value="default" />
                <when value="url">
                    <param name="url" type="text" format="url" label="File URL"/>
                </when>
                <when value="git">
                    <param name="url" type="text" format="url" label="Repo URL"/>
                </when>
            </conditional>
        </section>
    </inputs>
    <outputs>
        <data format="sqlite" name="output" />
        <!-- <data format="xseeker.sqlite" name="output" /> -->
    </outputs>
    <help>

.. class:: infomark

**Authors** Lain Pavot (lain.pavot@inrae.fr)

--------------------------------------

==================
XSeeker Preparator
==================

-----------
Description
-----------

A preparation tool to precalculate and reorginize data from XCMS+CAMERA for
XSeeker displayer tool to display them faster (from 30 min/some hours to some seconds).


-----------------
Workflow position
-----------------

**Upstream tools**

================ ======================== =========== ===============
  Name             output file             format       parameter   
================ ======================== =========== ===============
  CAMERA           rdata.camera.quick      rdata                    
================ ======================== =========== ===============


**Downstream tools**

================ ======================== =========== ===============
  Name             output file              format      parameter   
================ ======================== =========== ===============
  XSeeker          sqlite                   sqlite3         NA      
================ ======================== =========== ===============



-----------
Input files
-----------

Takes a rdata outputed by camera (after xcms) as input. The rdata has must have
been produced using some mzml files - mxml files which must still be present
in the history when doing xseekerpreparator:
The rdata contains only some data and the paths to the original mzml files.
These mzml files contains a lot of usefull informations needed to prepare
the data for XSeeker. These files are not given as input, but are still
needed.

----------
Parameters
----------
Column class name:
 - the name of the column that defines classes in your sample metadata.
Sample options:
 - provide mzml file names to process. Other files defined in the rdata will
   not be processed, and will not be available in xseeker.
Database Options:
 - provide the molecular family you want to annotate, a compound file.
 - The database's model can be re-defined - only for devs users.

------------
Output files
------------
An SQLite3 file is a database file that organizes data in such a way it is
easily stored, filtered, modified, retrieved.
    </help>
    <citations></citations>
</tool>