view mzrt_match.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children caba07f41453
line wrap: on
line source

<tool id="secimtools_mzrt_match" name="Mass to Charge Ratio - Retention Time (m/z - RT) Matching" version="@WRAPPER_VERSION@">
    <description>across 2 files.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
mzrt_match.py
--anno1 $anno1
--anno2 $anno2
--uniqID1 $uniqID1
--uniqID2 $uniqID2
--mzID1 $mzID1
--mzID2 $mzID2
--rtID1 $rtID1
--rtID2 $rtID2
--all $all
--matched $matched
--unmatched1 $unmatched1
--unmatched2 $unmatched2
--summary $summary
--figure $figure
--mzcut $mz
--rtcut $rt
--name1 $name1
--name2 $name2
    ]]></command>
    <inputs>
        <param name="anno1" type="data" format="tabular" label="File 1" help="Input dataset 1 in tab-separated wide format. If not tab separated see TIP below."/>
        <param name="anno2" type="data" format="tabular" label="File 2" help="Input dataset 2 in tab-separated wide format. If not tab separated see TIP below."/>
        <param name="uniqID1" type="text" size="30" value="" optional="false" label="Unique IDs for File 1" help="Name of the column in dataset 1 containing unique IDs."/>
        <param name="uniqID2" type="text" size="30" value="" optional="false" label="Unique IDs for File 2" help="Name of the column in dataset 2 containing unique IDs."/>
        <param name="mzID1" type="text" size="30" value="" optional="false" label="Mass/Charge column for File 1" help="Name of the column in dataset 1 containing m/z ratios."/>
        <param name="mzID2" type="text" size="30" value="" optional="false" label="Mass/Charge column for File 2" help="Name of the column in dataset 2 containing m/z ratios."/>
        <param name="rtID1" type="text" size="30" value="" optional="false" label="Retention Time column for File 1" help="Name of the column in dataset 1 containing RTs."/>
        <param name="rtID2" type="text" size="30" value="" optional="false" label="Retention Time column for File 2" help="Name of the column in dataset 2 containing RTs."/>
        <param name="mz" type="text" size="30" value="0.005" optional="true" label="Mass/Charge window" help="Window width for the m/z ratio (Default = 0.005)."/>
        <param name="rt" type="text" size="30" value="0.15" optional="true" label="Retention Time window" help="Window width for RT (Default = 0.15)."/>
        <param name="name1" type="text" size="30" value="F1" optional="true" label="Dataset 1 name" help="Short name for dataset 1 (By default F1)."/>
        <param name="name2" type="text" size="30" value="F2" optional="true" label="Dataset 2 name" help="Short name for dataset 2 (By default F2)."/>
    </inputs>
    <outputs>
        <data format="tabular" name="all" label="${tool.name} on ${on_string}: All"/>
        <data format="tabular" name="matched" label="${tool.name} on ${on_string}: Matches"/>
        <data format="tabular" name="unmatched1" label="${tool.name} on ${on_string}: Unmatched 1"/>
        <data format="tabular" name="unmatched2" label="${tool.name} on ${on_string}: Unmatched 2"/>
        <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary"/>
        <data format="pdf" name="figure" label="${tool.name} on ${on_string}: Venn"/>
    </outputs>
    <tests>
        <test>
            <param name="anno1"    value="TEST0000_mzrt_first.tsv"/>
            <param name="anno2"    value="TEST0000_mzrt_second.tsv"/>
            <param name="uniqID1"  value="rowID_first"/>
            <param name="uniqID2"  value="rowID_second"/>
            <param name="mzID1"    value="MZ_first" />
            <param name="mzID2"    value="MZ_second" />
            <param name="rtID1"    value="RT_first" />
            <param name="rtID2"    value="RT_second" />
            <output name="all"        file="TEST0000_mzrt_match_all.tsv" />
            <output name="matched"    file="TEST0000_mzrt_match_matched.tsv" />
            <output name="unmatched1" file="TEST0000_mzrt_match_unmatched_first.tsv" />
            <output name="unmatched2" file="TEST0000_mzrt_match_unmatched_second.tsv" />
            <output name="summary"    file="TEST0000_mzrt_match_summary.tsv" />
            <output name="figure"     file="TEST0000_mzrt_match_figure.pdf" compare="sim_size" delta="10000" />
        </test>
    </tests>
    <help><![CDATA[

@TIP_AND_WARNING@

**Tool Description**

**NOTE:** This tool is primarily intended for matching mass spectrometry data processed using different parameter settings.

Each metabolite (feature) is characterized by a mass to charge (m/z) ratio and retention time (RT).
After raw metabolomics data are processed (such as in mzMine), features are given internal identifers that are often different for every run or set of parameters, making it very difficult to impossible to directly compare results across different parameter setting using the internal identifiers.
However, it is possible to link internal identifiers using the m/z ratio and RT for each feature since changing parameter settings are predicted to result in only minor variations in m/z ratio and RT.
This tool matches two mass spectroscopy (MS) datasets generated using different parameter settings in mzMine.

Each file should contain at least three columns:

(1) the m/z ratio,
(2) the RT and
(3) the internal identifier (feature ID).

A feature matches across datasets if the m/z ratio and RT values in both MS datasets fall within a user defined window surrounding the m/z ratio (m/z window) and RT (RT window).
The size of the windows can be specified by the user - the final window width is 2 times the specified value.

**NOTE:** Since this is a 'many to many' merge where matching occurs within windows around the m/z ratio and the RT, a single internal identifier in one dataset may match many identifiers in the other dataset.

**NOTE:** While initially designed for MS data, this tool could also be used for other types of data where there is a need to match unique identifiers across datasets using values in 2 columns.
A detection window set to zero (0) would provide an exact match


--------------------------------------------------------------------------------

**Input**

    - Two input datasets are required.

@MZRTFILE@

**Unique Feature ID for File 1**

    - Name of the column in annotation file 1 containing unique IDs.

**Unique Feature ID for File 2**

    - Name of the column in annotation file 2 containing unique IDs.

**Mass/Charge for File 1**

    - Name of the column in annotation file 1 containing m/z ratios.

**Mass/Charge for File 2**

    - Name of the column in annotation file 2 containing m/z ratios.

**Retention Time for File 1**

    - Name of the column on you annotation file 1 containing RTs.

**Retention Time for File 2**

    - Name of the column on you annotation file 2 containing RTs.

**Mass/Charge window value**

    - Window value for the m/z ratio (Default = 0.005).

**Retention Time window value**

    - Window value for the RT (Default = 0.15).

**File Name 1**

    - A short name to idenfiy your dataset 1.

**File Name 2**

    - A short name to idenfiy your dataset 2.


--------------------------------------------------------------------------------

**Output**

This tool outputs six files:

(1) a TSV All peak combinations file that contains all combinations of possible­features between File 1 and File 2.
(2) a TSV Matched peak combinations file that contains only the features that match between File 1 and File 2.
(3) a TSV Unmatched peak combinations in file1 that contains the features in File 1 that do not have a match in File 2.
(4) a TSV Unmatched peak combinations in file2 that contains the features in File 2 that do not have a match in File 1.
(5) a PDF file containing a set of 3 Venn diagrams to visualize matching between File 1 and File 2.


    ]]></help>
    <expand macro="citations"/>
</tool>