view metfrag.xml @ 1:9ee2e2ceb2c9 draft

"planemo upload for repository https://github.com/computational-metabolomics/metfrag-galaxy commit 4378d616c184a24ef8cf8671bd7d68d9ce3876ac"
author computational-metabolomics
date Thu, 19 Mar 2020 06:04:18 -0400
parents fd5c0b39569a
children f151ee133612
line wrap: on
line source

<tool id="metfrag" name="MetFrag" version="2.4.5+galaxy2" profile="18.01">
    <description>
        in silico fragmentor for compound annotation of mass spectrometry fragmentation spectra
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="2.4.5">metfrag</requirement>
    </requirements>
    <stdio>
        <regex match="Cannot allocate memory"
           source="stderr"
           level="fatal_oom"
           description="Out of memory error occurred" />
    </stdio>
    <command detect_errors="exit_code">
    <![CDATA[
        python '$__tool_directory__/metfrag.py'
            --input_pth '$input'
            --result_pth '$results'
             --temp_dir './temp/'

            --cores_top_level \${GALAXY_SLOTS:-4}

            --MetFragDatabaseType '$db_select.MetFragDatabaseType'

            #if $db_select.MetFragDatabaseType == 'LocalCSV':
                --LocalDatabasePath '$db_select.LocalDatabasePath'
            #elif  $db_select.MetFragDatabaseType == 'MetChem':
                --LocalMetChemDatabaseServerIp '$db_select.LocalMetChemDatabaseServerIp'
            #end if

            --DatabaseSearchRelativeMassDeviation $DatabaseSearchRelativeMassDeviation
            --FragmentPeakMatchRelativeMassDeviation $FragmentPeakMatchRelativeMassDeviation
            --FragmentPeakMatchAbsoluteMassDeviation $FragmentPeakMatchAbsoluteMassDeviation
            --polarity '$polarity'

            --MetFragScoreTypes '$suspectlist.MetFragScoreTypes'
            --MetFragScoreWeights '$suspectlist.MetFragScoreWeights'

            #if $suspectlist.suspectselector == 'includesuspects':
                #if $suspectlist.includesuspects_default_cond:
                    --ScoreSuspectLists '$__tool_directory__/UNPD_DB.inchikeys.txt'
                #else
                    --ScoreSuspectLists '$suspectlist.includesuspects_custom_cond.ScoreSuspectLists'
                #end if
            #end if

            --meta_select_col $meta_select_col
            --minMSMSpeaks $minMSMSpeaks
            --schema $schema

            $PreProcessFilter.UnconnectedCompoundFilter
            $PreProcessFilter.IsotopeFilter

            --FilterMinimumElements '$PreProcessFilter.FilterMinimumElements'
            --FilterMaximumElements '$PreProcessFilter.FilterMaximumElements'
            --FilterSmartsInclusionList '$PreProcessFilter.FilterSmartsInclusionList'
            --FilterSmartsExclusionList '$PreProcessFilter.FilterSmartsExclusionList'
            --FilterIncludedElements '$PreProcessFilter.FilterIncludedElements'
            --FilterExcludedElements '$PreProcessFilter.FilterExcludedElements'
            --FilterIncludedExclusiveElements '$PreProcessFilter.FilterIncludedExclusiveElements'

            $skip_invalid_adducts
            --score_thrshld $PostProcessFilter.score_thrshld
            --pctexplpeak_thrshld $PostProcessFilter.pctexplpeak_thrshld

    ]]></command>
    <inputs>
        <param name="input" type="data" format="msp" label="MSP file (Output from Create MSP tool)"/>
        <conditional name="db_select">
            <param argument="--MetFragDatabaseType" type="select" label="Choose Compound Database">
                <option value="PubChem" selected="true">PubChem</option>
                <option value="KEGG">KEGG</option>
                <option value="LocalCSV">Local database (csv)</option>
                <option value="MetChem">MetChem</option>
            </param>
            <when value="MetChem">
                <param argument="--LocalMetChemDatabaseServerIp" type="text" label="MetChem URL"/>
            </when>
            <when value="LocalCSV">
                <param argument="--LocalDatabasePath" type="data" format="csv"
                       label="Local database of compounds (CSV format)" />
            </when>
            <when value="KEGG"/>
            <when value="PubChem"/>
        </conditional>
        <param argument="--DatabaseSearchRelativeMassDeviation" type="float" min="0" value="10"
               label="Relative Mass Deviation for database search (ppm)"
               help="A value in ppm that defines the deviation of theoretical masses in the database
               vs. the measured masses"/>
        <param argument="--FragmentPeakMatchRelativeMassDeviation" type="float" min="0" value="5"
               label="Fragment Peak Match Relative Mass Deviation (ppm)"
               help="Relative mass deviation in ppm of theoretical fragment peaks vs. measured fragment peaks" />
        <param argument="--FragmentPeakMatchAbsoluteMassDeviation" type="float" min="0" value="0.001"
               label="Fragment Peak Match Absolute Mass Deviation (Da)"
               help="Absolute mass deviation in Dalton of theoretical fragment peaks vs. measured fragment peaks" />
        <param argument="--polarity" type="select" label="Polarity"
               help="The polarity used for the mode of acquisition">
            <option value="pos" selected="true">Positive</option>
            <option value="neg">Negative</option>
        </param>
        <param argument="--schema" type="select" label="Schema"
               help="The schema used for the MSP file (auto will try automatically determine the schema)">
            <option value="auto" selected="True">Auto</option>
            <option value="msp">Generic MSP</option>
            <option value="massbank">MassBank</option>
        </param>
        <param argument="--meta_select_col" type="select"
               label="Choose how additional metadata columns are extracted"
               help="The MetFrag output can have additional meta data columns added, these can be either extracted
               from all MSP parameters or from the 'Name' and 'RECORD_TITLE' MSP parameter. Additionally, columns
               can be added from the 'Name' or 'RECORD_TITLE' parameter by splitting on | and :
               e.g. 'MZ:100.2 | RT:20 | xcms_grp_id:1' would create MZ,RT and xcms_grp_id columns">
            <option value="name" selected="true">Extra metadata columns from the Name or RECORD_TITLE</option>
            <option value="name_split">Extra metadata columns from the Name or RECORD_TITLE (each column is split on "|" and ":" ) </option>
            <option value="all">Extra metadata columns from all MSP parameters</option>
        </param>
        <conditional name="suspectlist">
            <param name="suspectselector" type="select" label="Suspect list"
                   help="Choose whether to include a suspect list">
                <option value="includesuspects" >Include suspect list</option>
                <option value="excludesuspects" selected="True">Do not include suspect list</option>
            </param>
            <when value="includesuspects">
                <conditional name="includesuspects_default_cond">
                    <param name="includesuspects_default_bool" type="boolean"
                           label="Use default list of suspect compounds?"
                           help="Either provide a file containing a list of suspect compounds or a default file
                                 of an aggregated list of in silico predicted MS/MS spectra of natural products
                                 from the Universal Natural Products Database (http://pkuxxj.pku.edu.cn/UNPD/index.php).
                                  The list is an aggregated version of the github repository https://github.com/oolonek/ISDB/tree/master/Data/dbs."/>
                    <when value="true"/>
                    <when value="false">
                        <param argument="--ScoreSuspectLists" type="data" format="txt" optional="True"
                               label="Suspect list file"  help="File containing a list of suspects inchikeys" />
                    </when>
                </conditional>
                <expand macro="metfrag_scoring"/>
            </when>
            <when value="excludesuspects">
                <expand macro="metfrag_scoring" suspectlistscore="False" weights="1.0,1.0"/>
            </when>
        </conditional>
        <param argument="--minMSMSpeaks" type="integer" label="Minimum number of MS/MS peaks" value="0"/>
        <param argument="--skip_invalid_adducts" type="boolean" label="Skip invalid or undefined adduct types?"
               truevalue="--skip_invalid_adducts" falsevalue="" checked="true"
               help="If no adduct type is provided within the MSP file or if the adduct type is not usable
                     with MetFrag, set to 'yes' if these spectra should be skipped or 'no' if the default
                     of [M+H]+ for pos data or [M-H]- for neg data should be used"/>
        <section name="PreProcessFilter" title="PreProcessing filters" expanded="False">
            <param argument="--UnconnectedCompoundFilter" type="boolean" checked="false"
                   truevalue="--UnconnectedCompoundFilter" falsevalue=""
                   label="filter non-connected compounds (e.g. salts)" help=""/>
            <param argument="--IsotopeFilter" type="boolean" checked="false" truevalue="--IsotopeFilter"
                   falsevalue="" label="filter compounds containing non-standard isotopes" help=""/>
            <param argument="--FilterMinimumElements" type="text"
                   optional="true" label="Minimum Elements Filter"
                   help="Filter by minimum of contained elements. Ex: N2O3 include compounds with at least
                         2 nitrogens and 3 oxygens">
                <expand macro="text-alphanumeric-regex-validator"/>
            </param>
            <param argument="--FilterMaximumElements" type="text"
                   optional="true" label="Maximum Elements Filter"
                   help="Filter by maximum of contained elements. Ex: N5O7 filter out compounds with at
                         maximum 5 nitrogens and 7 oxygens">
                    <expand macro="text-alphanumeric-regex-validator"/>
            </param>
            <param argument="--FilterSmartsInclusionList" type="text"
                   optional="true" label="Include substructures"
                   help="Filter by presence of defined sub-structures. Ex: c1ccccc1 include compounds
                         containing benzene"/>
            <param argument="--FilterSmartsExclusionList" type="text"
                   optional="true" label="Exclude substructures"
                   help="Filter by absence of defined sub-structures. Ex: [OX2H] filter out compounds
                         containing hydroxyl groups"/>
            <param argument="--FilterIncludedElements" type="text"
                   optional="true" label="Include elements"
                   help="Filter by presence of defined elements (other elements are allowed).
                         Ex: 'N,O' include compounds containing nitrogen and oxygen" >
                <expand macro="text-alphanumeric-comma-regex-validator"/>
            </param>
            <param argument="--FilterIncludedExclusiveElements" type="text"
                   optional="true" label="Include elements (exclusive)"
                   help="Filter by presence of defined elements (no other elements are allowed).
                         Ex: 'N,O' include compounds only composed of nitrogen and oxygen" >
                <expand macro="text-alphanumeric-comma-regex-validator"/>
            </param>
            <param argument="--FilterExcludedElements" type="text"
                   optional="true" label="Exclude elements"
                   help="Filter by absence of defined sub-structures. Ex: 'Cl,Br' filter out
                         compounds including bromine or chlorine">
                <expand macro="text-alphanumeric-comma-regex-validator"/>
            </param>
        </section>
        <section name="PostProcessFilter" title="PostProcessing filters" expanded="False">
            <param argument="--score_thrshld" type="float" label="Threshold for score after MetFrag search"
                   max="1" min="0" value="0"/>
            <param argument="--pctexplpeak_thrshld" type="float" label="Minimum percentage of explain peaks"
                   max="100" min="0" value="0"/>
        </section>
    </inputs>
    <outputs>
        <data name="results" format="tabular"/>
    </outputs>
    <tests>
        <test>
            <!-- Test "massbank" style data format  -->
            <param name="input" value="massbank_format.txt"/>
            <param name="schema" value="massbank"/>
            <param name="skip_invalid_adducts" value="false"/>
            <param name="MetFragDatabaseType" value="PubChem"/>
            <param name="MetFragDatabaseType" value="LocalCSV"/>
            <param name="LocalDatabasePath" value="demo_db.csv"/>
            <output name="results" file="metfrag_massbank.tabular"/>
        </test>
        <test>
            <!-- Test "generic" style data format  -->
            <param name="input" value="generic_format.msp"/>
            <param name="schema" value="msp"/>
            <param name="MetFragDatabaseType" value="PubChem"/>
            <param name="skip_invalid_adducts" value="false"/>
            <param name="MetFragDatabaseType" value="LocalCSV"/>
            <param name="LocalDatabasePath" value="demo_db.csv"/>
            <output name="results" file="metfrag_msp.tabular"/>
        </test>
        <test>
            <!-- Test PubChem API with "winter" dataset -->
            <param name="input" value="winter_pos.msp"/>
            <section name="PostProcessFilter">
                <param name="score_thrshld" value="0.9"/>
            </section>
            <param name="MetFragDatabaseType" value="PubChem"/>
            <output name="results" file="winter_pos.tabular"/>
        </test>
        <test>
            <!-- Test actual MassBank data for Glucose -->
            <param name="input" value="RP022611.txt"/>
            <param name="MetFragDatabaseType" value="LocalCSV"/>
            <param name="LocalDatabasePath" value="demo_db.csv"/>
            <output name="results" file="RP022611.tabular"/>
        </test>
        <test>
            <!-- Test actual MassBank data for Glucose (all metadata columns in output-->
            <param name="input" value="RP022611.txt"/>
            <param name="schema" value="massbank"/>
            <param name="MetFragDatabaseType" value="LocalCSV"/>
            <param name="LocalDatabasePath" value="demo_db.csv"/>
            <param name="meta_select_col" value="all"/>
            <output name="results" file="RP022611_all_col.tabular"/>
        </test>
        <test>
            <!-- Test actual MassBank data for Glucose (include suspect list - default)-->
            <param name="input" value="RP022611.txt"/>
            <param name="schema" value="massbank"/>
            <conditional name="suspectlist">
                <param name="suspectselector" value="includesuspects"/>
                <conditional name="includesuspects_default_cond">
                    <param name="includesuspects_default_bool" value="true"/>
                </conditional>
            </conditional>
            <output name="results" file="RP022611_suspect_default.txt"/>
        </test>
        <test>
            <!-- Test invalid adduct -->
            <param name="input" value="invalid_adduct.msp"/>
            <param name="skip_invalid_adducts" value="true"/>
            <output name="results" file="invalid_adduct_result.txt" ftype="tabular"/>
        </test>
    </tests>
    <help>
-------
MetFrag
-------

Description
-----------

MetFrag is a freely available software for the annotation of high precision tandem mass spectra of metabolites which is
a first and critical step for the identification of a molecule's structure. Candidate molecules of different databases
are fragmented "in silico" and matched against mass to charge values. A score calculated using the fragment peak
matches gives hints to the quality of the candidate spectrum assignment.

Website: http://ipb-halle.github.io/MetFrag/

Parameters
----------

**\1. MSP file**

MSP file created using *Create MSP* tool

**\2a. MetFragDatabaseType (public databases)**

* PubChem

* KEGG


**\2b. MetFragDatabaseType (local CSV file database)**


Custom database file in CSV format with the following structure:

+-------------+------------------+----------+---------------------------------------------+----------------------+---+
| Identifier  | MonoisotopicMass | SMILES   | InChI                                       | Name                 |...|
+-------------+------------------+----------+---------------------------------------------+----------------------+---+
| HMDB0000123 | 75.03202841      | NCC(O)=O | InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)  | Glycine              |...|
+-------------+------------------+----------+---------------------------------------------+----------------------+---+
| HMDB0002151 | 78.0139355       | CS(C)=O  | InChI=1S/C2H6OS/c1-4(2)3/h1-2H3             | Dimethyl sulfoxide   |...|
+-------------+------------------+----------+---------------------------------------------+----------------------+---+
| ...         | ...              | ...      | ...                                         | ...                  |...|
+-------------+------------------+----------+---------------------------------------------+----------------------+---+


Table continued:

+---+------------------+-----------------------------+------------------+------------+-------------+
|...| MolecularFormula | InChIKey                    | InChIKey1        | InChIKey2  | InChIKey3   |
+---+------------------+-----------------------------+------------------+------------+-------------+
|...| C2H5NO2          | DHMQDGOQFOQNFH-UHFFFAOYSA-N | DHMQDGOQFOQNFH   | UHFFFAOYSA | N           |
+---+------------------+-----------------------------+------------------+------------+-------------+
|...| C2H6OS           | IAZDPXIOMUYVGZ-UHFFFAOYSA-N | IAZDPXIOMUYVGZ   | UHFFFAOYSA | N           |
+---+------------------+-----------------------------+------------------+------------+-------------+
|...| ...              | ...                         | ...              | ...        | ...         |
+---+------------------+-----------------------------+------------------+------------+-------------+



**\2b. MetFragDatabaseType MetChem**

MetChem is a modified PubChem database and can be used in replace of PubChem
for performing API calls to the public PubChem instance.

**\3. Database Search Relative Mass Deviation - ppm**

A value in ppm that defines the deviation of theoretical masses in the database vs. the measured masses.

**\4. Fragment Peak Match Relative Mass Deviation - ppm**

Relative mass deviation in ppm of theoretical fragment peaks vs. measured fragment peaks.

**\5. Fragment Peak Match Absolute Mass Deviation (Da)**

Absolute mass deviation in Dalton of theoretical fragment peaks vs. measured fragment peaks.

**\6. Polarity**

The polarity used for the mode of acquisition.

**\7. Schema**

The Schema used by the MSP file (e.g. generic MSP format or MassBank format)

**\8. Suspect list**

Choose whether to include a file containing a list of suspects.

**\9. MetFrag Score Types**

The type of scores MetFrac is using for the calculations. Please do not change the values unless you know what you are doing!

**\10. MetFrag Score Weights**

The weights of the different score types, separated with a comma and without whitespaces. 1.0 means 100%.

**\11. MetFrag Database Type**

Database to choose from.

**\12. minMSMSpeaks**

Minimum MS/MS peaks within a MS/MS spectra to be used for the MetFrag calculation

**\13. PreProcessFilter**

Various filters can be performed on the potential compounds prior to predicting the in silico spectra

**\14. PostProcessFilter**

To make the output more manageble results below certain criteria can be removed from the various filters can be
performed on the potential compounds prior to predicting the in silico spectra

Output
-------

These columns are derived from any metadata in the MSP input file (additional columns can included if they are recorded in the MSP file)

+-------------+--------------------------------------------+---+
| adduct      | name                                       |...|
+-------------+--------------------------------------------+---+
| [M-H]-      | D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=;   |...|
+-------------+--------------------------------------------+---+
| [M-H]-      | D-Glucose; LC-ESI-QTOF; MS2; CE: 10; R=;   |...|
+-------------+--------------------------------------------+---+
| ...         | ...                                        |...|
+-------------+--------------------------------------------+---+

Table continued (these columns are derived from the MetFrag result):

+---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+
|...| sample_name      | ExplPeaks                                                | FormulasOfExplPeaks                                                                 | ... |
+---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+
|...| 1_metfrag_result | 59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0  | 59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H- | ... |
+---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+
|...| 1_metfrag_result | 59.0138_715.8;71.014_679.7;89.0251_999.0;101.0234_103.0  | 59.0138:[C2H4O2]-H-;71.014:[C3H5O2-H]-H-;89.0251:[C3H6O3]-H-;101.0234:[C4H7O3-H]-H- | ... |
+---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+
|...| ...              | ...                                                      | ...                                                                                 | ... |
+---+------------------+----------------------------------------------------------+-------------------------------------------------------------------------------------+-----+


Table continued (columns are derived from the MetFrag result):

+---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+
|...| FragmenterScore  | FragmenterScore_Values     | FormulasOfExplPeaks                                  | Identifier | InChI                                                                           |...|
+---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+
|...| 105.844569063138 | 696.0;1156.0;696.0;1156.0  | 6-(hydroxymethyl)oxane-2,3,4,5-tetrol                |  206       | InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H                        |...|
+---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+
|...| 105.844569063138 | 696.0;1156.0;696.0;1156.0  | (3R,4S,5S,6R)-6-(hydroxymethyl)oxane-2,3,4,5-tetrol  |  5793      | InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1 |...|
+---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+
|...| ...              | ...                        | ...                                                  | ...        | ...                                                                             |...|
+---+------------------+----------------------------+------------------------------------------------------+------------+---------------------------------------------------------------------------------+---+

Table continued (columns are derived from the MetFrag result):


+---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+
|...| NoExplPeaks | NumberPeaksUsed | OfflineMetFusionScore | SMILES	                                   | Score            | SuspectListScore | XlogP3 |
+---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+
|...| 4           | 5	            | 2.84566828424078	    | C(C1C(C(C(C(O1)O)O)O)O)O                     | 1.82678219603441 | 1                | -2.6   |
+---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+
|...| 4           | 5               | 2.84566828424078      | C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O   | 1.82678219603441 | 1                | -2.6   |
+---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+
|...| ...         | ...             | ...                   | ...                                          | ...              | ...              | ...    |
+---+-------------+-----------------+-----------------------+----------------------------------------------+------------------+------------------+--------+


Additional notes
--------------------

The following adducts (and format) are currently supported in the MSP file. The neutral mass is automatically
 calculated for the precursor m/z by subtracting the adduct mass

- '[M+H]+': 1.007276,
- '[M+NH4]+': 18.034374,
- '[M+Na]+': 22.989218,
- '[M+K]+': 38.963158,
- '[M+CH3OH+H]+': 33.033489,
- '[M+ACN+H]+': 42.033823,
- '[M+ACN+Na]+': 64.015765,
- '[M+2ACN+H]+': 83.06037,
- '[M-H]-': -1.007276,
- '[M+Cl]-': 34.969402,
- '[M+HCOO]-': 44.99819,
- '[M-H+HCOOH]-': 44.99819,
- '[M+CH3COO]-': 59.01385,
- '[M-H+CH3COOH]-': 59.01385

Developers and contributors
---------------------------
- **Jordi Capellades (j.capellades.to@gmail.com) - Universitat Rovira i Virgili (SP)**
- **Julien Saint-Vanne (julien.saint-vanne@sb-roscoff.fr) - ABiMS (France)**
- **Tom Lawson (t.n.lawson@bham.ac.uk) - University of Birmingham (UK)**
- **Ralf Weber (r.j.weber@bham.ac.uk) - University of Birmingham (UK)**
- **Kristian Peters (kpeters@ipb-halle.de) - IPB Halle (Germany)**
- **Payam Emami (payam.emami@medsci.uu.se) - Uppsala Universitet (Sweden)**
- **Christoph Ruttkies (christoph.ruttkies@ipb-halle.de) - IPB Halle (Germany)**
    </help>
    <citations>
        <citation type="doi">10.1186/s13321-016-0115-9</citation>
    </citations>
</tool>