comparison secimtools/mzrt_match.xml @ 0:b54326490b4d draft

Upload 21.3.4.2 release
author malex
date Mon, 08 Mar 2021 20:55:03 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b54326490b4d
1 <tool id="secimtools_mzrt_match" name="Mass to Charge Ratio - Retention Time (m/z - RT) Matching" version="@WRAPPER_VERSION@">
2 <description>across 2 files.</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 mzrt_match.py
9 --anno1 $anno1
10 --anno2 $anno2
11 --uniqID1 $uniqID1
12 --uniqID2 $uniqID2
13 --mzID1 $mzID1
14 --mzID2 $mzID2
15 --rtID1 $rtID1
16 --rtID2 $rtID2
17 --all $all
18 --matched $matched
19 --unmatched1 $unmatched1
20 --unmatched2 $unmatched2
21 --summary $summary
22 --figure $figure
23 --mzcut $mz
24 --rtcut $rt
25 --name1 $name1
26 --name2 $name2
27 ]]></command>
28 <inputs>
29 <param name="anno1" type="data" format="tabular" label="File 1" help="Input dataset 1 in tab-separated wide format. If not tab separated see TIP below."/>
30 <param name="anno2" type="data" format="tabular" label="File 2" help="Input dataset 2 in tab-separated wide format. If not tab separated see TIP below."/>
31 <param name="uniqID1" type="text" size="30" value="" optional="false" label="Unique IDs for File 1" help="Name of the column in dataset 1 containing unique IDs."/>
32 <param name="uniqID2" type="text" size="30" value="" optional="false" label="Unique IDs for File 2" help="Name of the column in dataset 2 containing unique IDs."/>
33 <param name="mzID1" type="text" size="30" value="" optional="false" label="Mass/Charge column for File 1" help="Name of the column in dataset 1 containing m/z ratios."/>
34 <param name="mzID2" type="text" size="30" value="" optional="false" label="Mass/Charge column for File 2" help="Name of the column in dataset 2 containing m/z ratios."/>
35 <param name="rtID1" type="text" size="30" value="" optional="false" label="Retention Time column for File 1" help="Name of the column in dataset 1 containing RTs."/>
36 <param name="rtID2" type="text" size="30" value="" optional="false" label="Retention Time column for File 2" help="Name of the column in dataset 2 containing RTs."/>
37 <param name="mz" type="text" size="30" value="0.005" optional="true" label="Mass/Charge window" help="Window width for the m/z ratio (Default = 0.005)."/>
38 <param name="rt" type="text" size="30" value="0.15" optional="true" label="Retention Time window" help="Window width for RT (Default = 0.15)."/>
39 <param name="name1" type="text" size="30" value="F1" optional="true" label="Dataset 1 name" help="Short name for dataset 1 (By default F1)."/>
40 <param name="name2" type="text" size="30" value="F2" optional="true" label="Dataset 2 name" help="Short name for dataset 2 (By default F2)."/>
41 </inputs>
42 <outputs>
43 <data format="tabular" name="all" label="${tool.name} on ${on_string}: All"/>
44 <data format="tabular" name="matched" label="${tool.name} on ${on_string}: Matches"/>
45 <data format="tabular" name="unmatched1" label="${tool.name} on ${on_string}: Unmatched 1"/>
46 <data format="tabular" name="unmatched2" label="${tool.name} on ${on_string}: Unmatched 2"/>
47 <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary"/>
48 <data format="pdf" name="figure" label="${tool.name} on ${on_string}: Venn"/>
49 </outputs>
50 <tests>
51 <test>
52 <param name="anno1" value="TEST0000_mzrt_first.tsv"/>
53 <param name="anno2" value="TEST0000_mzrt_second.tsv"/>
54 <param name="uniqID1" value="rowID_first"/>
55 <param name="uniqID2" value="rowID_second"/>
56 <param name="mzID1" value="MZ_first" />
57 <param name="mzID2" value="MZ_second" />
58 <param name="rtID1" value="RT_first" />
59 <param name="rtID2" value="RT_second" />
60 <output name="all" file="TEST0000_mzrt_match_all.tsv" />
61 <output name="matched" file="TEST0000_mzrt_match_matched.tsv" />
62 <output name="unmatched1" file="TEST0000_mzrt_match_unmatched_first.tsv" />
63 <output name="unmatched2" file="TEST0000_mzrt_match_unmatched_second.tsv" />
64 <output name="summary" file="TEST0000_mzrt_match_summary.tsv" />
65 <output name="figure" file="TEST0000_mzrt_match_figure.pdf" compare="sim_size" delta="10000" />
66 </test>
67 </tests>
68 <help><![CDATA[
69
70 @TIP_AND_WARNING@
71
72 **Tool Description**
73
74 **NOTE:** This tool is primarily intended for matching mass spectrometry data processed using different parameter settings.
75
76 Each metabolite (feature) is characterized by a mass to charge (m/z) ratio and retention time (RT).
77 After raw metabolomics data are processed (such as in mzMine), features are given internal identifers that are often different for every run or set of parameters, making it very difficult to impossible to directly compare results across different parameter setting using the internal identifiers.
78 However, it is possible to link internal identifiers using the m/z ratio and RT for each feature since changing parameter settings are predicted to result in only minor variations in m/z ratio and RT.
79 This tool matches two mass spectroscopy (MS) datasets generated using different parameter settings in mzMine.
80
81 Each file should contain at least three columns:
82
83 (1) the m/z ratio,
84 (2) the RT and
85 (3) the internal identifier (feature ID).
86
87 A feature matches across datasets if the m/z ratio and RT values in both MS datasets fall within a user defined window surrounding the m/z ratio (m/z window) and RT (RT window).
88 The size of the windows can be specified by the user - the final window width is 2 times the specified value.
89
90 **NOTE:** Since this is a 'many to many' merge where matching occurs within windows around the m/z ratio and the RT, a single internal identifier in one dataset may match many identifiers in the other dataset.
91
92 **NOTE:** While initially designed for MS data, this tool could also be used for other types of data where there is a need to match unique identifiers across datasets using values in 2 columns.
93 A detection window set to zero (0) would provide an exact match
94
95
96 --------------------------------------------------------------------------------
97
98 **Input**
99
100 - Two input datasets are required.
101
102 @MZRTFILE@
103
104 **Unique Feature ID for File 1**
105
106 - Name of the column in annotation file 1 containing unique IDs.
107
108 **Unique Feature ID for File 2**
109
110 - Name of the column in annotation file 2 containing unique IDs.
111
112 **Mass/Charge for File 1**
113
114 - Name of the column in annotation file 1 containing m/z ratios.
115
116 **Mass/Charge for File 2**
117
118 - Name of the column in annotation file 2 containing m/z ratios.
119
120 **Retention Time for File 1**
121
122 - Name of the column on you annotation file 1 containing RTs.
123
124 **Retention Time for File 2**
125
126 - Name of the column on you annotation file 2 containing RTs.
127
128 **Mass/Charge window value**
129
130 - Window value for the m/z ratio (Default = 0.005).
131
132 **Retention Time window value**
133
134 - Window value for the RT (Default = 0.15).
135
136 **File Name 1**
137
138 - A short name to idenfiy your dataset 1.
139
140 **File Name 2**
141
142 - A short name to idenfiy your dataset 2.
143
144
145 --------------------------------------------------------------------------------
146
147 **Output**
148
149 This tool outputs six files:
150
151 (1) a TSV All peak combinations file that contains all combinations of possible­features between File 1 and File 2.
152 (2) a TSV Matched peak combinations file that contains only the features that match between File 1 and File 2.
153 (3) a TSV Unmatched peak combinations in file1 that contains the features in File 1 that do not have a match in File 2.
154 (4) a TSV Unmatched peak combinations in file2 that contains the features in File 2 that do not have a match in File 1.
155 (5) a PDF file containing a set of 3 Venn diagrams to visualize matching between File 1 and File 2.
156
157
158 ]]></help>
159 <expand macro="citations"/>
160 </tool>