comparison ACF/analytic_correlation_filtration.xml @ 0:d03fcbeb0a77 draft

Uploaded
author melpetera
date Fri, 18 Oct 2019 04:59:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d03fcbeb0a77
1 <tool id="Analytic_correlation_filtration" name="Analytic correlation filtration" version="2019-06-20">
2 <description>
3 : Detect analytic correlation among data and remove them.
4 </description>
5
6
7 <command><![CDATA[
8
9
10 perl $__tool_directory__/Analytic_correlation_filtration.pl
11
12
13 #if str($mass_file.mass_choice)=="false":
14 #if str($rt_cond.rt_choice)=="false":
15 perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -o 1 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt 9999999999
16 #else:
17 perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -o 1 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt "$rt_cond.rt_threshold"
18 #end if
19 #else:
20 #if str($mass_file.liste.mass_list)=="true":
21 #if str($rt_cond.rt_choice)=="true":
22 perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m "$mass_file.liste.mass_file_in" -o 2 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt "$rt_cond.rt_threshold" -mass "$mass_file.mass_threshold"
23 #end if
24 #if str($rt_cond.rt_choice)=="false":
25 perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m "$mass_file.liste.mass_file_in" -o 3 -d "$dataMatrix_in" -v "$variableMetadata_in" -mass "$mass_file.mass_threshold"
26 #end if
27 #else
28 #if str($rt_cond.rt_choice)=="true":
29 perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m $__tool_directory__/data/default_list.csv -o 2 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt "$rt_cond.rt_threshold" -mass "$mass_file.mass_threshold"
30 #end if
31 #if str($rt_cond.rt_choice)=="false":
32 perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m $__tool_directory__/data/default_list.csv -o 3 -d "$dataMatrix_in" -v "$variableMetadata_in" -mass "$mass_file.mass_threshold"
33 #end if
34 #end if
35 #end if
36
37 -r "$repres_opt.repres_opt_selector"
38
39 #if str($repres_opt.repres_opt_selector)=="max_intensity_max_mass":
40 -IT $repres_opt.int_threshold
41 -IP $repres_opt.int_percentage
42 #end if
43 -correl "$correl_threshold"
44 -output_sif "$sif_out"
45 -output_tabular "$variableMetadata_out"
46
47 ]]></command>
48
49 <inputs>
50 <param type="data" name="file_in" format="txt" help="The .txt similarity table (you can obtain it by using the Between-table Correlation tool or for exemple the cor() function in R) " label="Correlation table file" />
51 <param type="data" name="dataMatrix_in" format="tabular" help="" label="dataMatrix file" />
52 <param type="data" name="variableMetadata_in" format="tabular" help="" label="variableMetadata file" />
53
54 <param help="Define the minimum similarity threshold accepted to determine analytic correlation" label="Correlation threshold" type="float" name="correl_threshold" value="0.90"/>
55
56 <conditional name="mass_file">
57 <param name="mass_choice" checked="true" falsevalue="false" help="'YES' if you want to take it into account; 'NO' if you don't want to take into account mass information" label="Do you want to take into account mass differences between 2 ions?" truevalue="true" type="boolean"/>
58 <when value="true">
59 <conditional name="liste">
60 <param name="mass_list" checked="true" falsevalue="false" help="'YES' if you have your own list to upload; 'NO' if you want to use a default list" label="Do you have your own list of mass differences or do you want to use a default list ?" truevalue="true" type="boolean"/>
61 <when value="false">
62
63 </when>
64 <when value="true">
65 <param type="data" name="mass_file_in" format="tabular,csv" help="The file containing all your report and known mass differences (cf help for file example) " label="Mass differences table (format: tabular or csv) " />
66 </when>
67 </conditional>
68 <param help="2 ions need to have a difference mass included in the list at +/- mass difference range to be considered as analytically correlated | Value recommendation : 0.005" label="Mass difference range" type="float" name="mass_threshold" value="0.005"/>
69 </when>
70 <when value="false">
71
72 </when>
73 </conditional>
74
75 <conditional name="rt_cond">
76 <param checked="true" falsevalue="false" help="'YES' if want to take into account retention time information; 'NO' if you don't want to take into account retention time information" label="Do you want to take into account retention time differences between 2 ions? " name="rt_choice" truevalue="true" type="boolean"/>
77 <when value="true">
78 <param help="Choose a retention time difference threshold between 2 ions considered as analytically correlated | Value recommendation : 0.1" label="Retention time difference threshold" type="float" name="rt_threshold" value="0.1"/>
79 </when>
80 <when value="false">
81
82 </when>
83 </conditional>
84
85 <conditional name="repres_opt">
86 <param name="repres_opt_selector" label="Which representative ion do you want to select for each group" type="select" display="radio" help="">
87 <option value="intensity">Highest intensity</option>
88 <option value="mass">Highest mass</option>
89 <option value="mixt">Highest (mass2 x intensity) </option>
90 <option value="max_intensity_max_mass">Highest mass between the 3 highest intensity (following intensity threshold and rules ==> see help) </option>
91 </param>
92 <when value="max_intensity_max_mass">
93 <param help="" label="Minimum intensity threshold for the representative ion" type="float" name="int_threshold" value="1000"/>
94 <param help="Example: ion A have the highest intensity of a group but not the highest mass, B is an ion that have the second highest intensity in the group and a highest mass than A, to choose B as a representative ion for the group his intensity need to be at list 50% of the A intensity." label="Percentage of highest intensity of the group accept for the new representative ion. This option allow to avoid isotope selection. " type="float" name="int_percentage" value="0.5"/>
95 </when>
96 <when value="intensity">
97 </when>
98 <when value="mass">
99 </when>
100 <when value="mixt">
101 </when>
102 </conditional>
103
104 </inputs>
105
106 <outputs>
107 <data format="sif" label="${file_in.name}_sif" name="sif_out"/>
108 <data format="tabular" label="${variableMetadata_in.name}_representative_ion" name="variableMetadata_out"/>
109 </outputs>
110
111 <help><![CDATA[
112
113 .. class:: infomark
114
115 **Contact** : **Stephanie Monnerie**, **Estelle Pujos-Guillot**
116
117 ---------------------------------------------------
118
119 .. class:: infomark
120
121 **References** :
122
123 ---------------------------------------------------
124
125 -----------
126 Input files
127 -----------
128
129 +-----------------------------------------+---------------+
130 | File | Format |
131 +=========================================+===============+
132 | 1) Similarity matrix | txt |
133 +-----------------------------------------+---------------+
134 | 2) Data matrix | tabular |
135 +-----------------------------------------+---------------+
136 | 3) Variable metadata | tabular |
137 +-----------------------------------------+---------------+
138 | **Optional file** | **Format** |
139 +-----------------------------------------+---------------+
140 | 4) Optional : Mass differences list | csv/tabular |
141 +-----------------------------------------+---------------+
142
143 ---------------------------------------------------
144
145 -------------
146 Files content
147 -------------
148
149 Similarity matrix
150 * File organisation : on line by similarity pairs with the first ion ID, the similarity value and the second ion ID, tabular separated ==> Fist_Ion_ID \\t Similarity_Value \\t Second_Ion_ID
151 * Example:
152
153 .. image:: similarity_matrix.JPG
154 :width: 800
155
156 Data matrix file
157 * "variable x sample" **dataMatrix** : tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the variable metadata (see below)
158
159 Variable metadata file
160 * "variable x metadata" **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
161
162 .. class:: warningmark
163
164 For more information about input files, refer to the corresponding "W4M HowTo" page:
165 http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf
166
167
168 Mass differences list
169 * A file containing list of known adducts, fragments or isotopes with the mass differences linked to them
170 * Example:
171
172 .. image:: Adduct_fragment_list.JPG
173 :width: 350
174
175 ---------------------------------------------------
176
177 ----------
178 Parameters
179 ----------
180
181 Take into account mass diffrences between 2 ions :
182 * You can enter a list of mass differences that are known. The file must be organized with a first column for the mass difference type (isotope, fragment, etc...), a second column with the mass difference chemical formula (H+, -2H+K, etc...) and a third column for the mass difference value
183 * If you are choosing to use a mass differences table, you have to choose a mass difference range that will be a threshold to accept or not a difference value as true (recognize a mass difference value in the file +/- this threshold).
184
185 Take into acount retention time :
186 * You can use retention time as a criteria to group ions. You have to choose a value that will be use as intervalle : 2 ions are group when their retention time is equal +/- the threshold.
187
188 Choose the representative ion for each group, there are 3 possibilities to determine the representative ion :
189 * The ion with the highest intensity (recommandated for LC/MS)
190 * The ion with the highest mass
191 * The ion with the highest "mass2 * intensity" value
192 * The ion with the highest mass between the 3 highest intensity of the group, except if the highest mass ion have an intensity < determined percentage of the highest intensity ion one (for exemple 50%) (recommandated for GC/MS)
193
194
195 ---------------------------------------------------
196
197 --------------
198 Example of use
199 --------------
200
201 For UPLC/HRMS data, default parameters can be the following:
202 * If a Pearson correlation is used, the default threshold can be set at 0.90
203 * A delta RT of 0.1 min or adjusted depending on chromatographic systems
204 * The use of the list of known adduct/isotope mass differences with a mass delta of 0.005 Da or adjusted depending on MS resolution
205 * The choice of the ion with the highest intensity as the representative ion.
206 For GC/HRMS dataset, we recommend to use the same parameters but ignoring the list of mass difference and to choose the ion with the highest mass among the top highest intensity as representative.
207
208
209
210 ]]></help>
211 </tool>