0
|
1
|
|
2
|
|
3 TOOL_NAME <- "XSeekerPreparator"
|
13
|
4 VERSION <- "1.2.1"
|
0
|
5
|
|
6 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy"
|
|
7
|
|
8 ENRICHED_RDATA_VERSION <- paste("1.1.2", OUTPUT_SPECIFIC_TOOL, sep="-")
|
|
9 ENRICHED_RDATA_DOC <- sprintf("
|
|
10 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
|
|
11 This doc was generated by the tool: %s - Version %s
|
|
12 To show the different variables contained in this rdata, type:
|
|
13 - `load('this_rdata.rdata', rdata_env <- new.env())`
|
|
14 - `names(rdata_env)`
|
|
15
|
|
16 Sections
|
|
17 ######
|
|
18
|
|
19
|
|
20 This tools helpers
|
|
21 ------
|
|
22 The version number is somewhat special because the evolution of the
|
|
23 rdata's format is non-linear.
|
|
24 There may be different branches, each evolving separatly.
|
|
25 To reflect these branches's diversions, there may be a prepended
|
|
26 branch name following this format:
|
|
27 major.minor.patch-branch_name
|
|
28 Like this, we can process rdata with the same tool, and output
|
|
29 rdata formated differently, for each tool.
|
|
30
|
|
31
|
|
32 - enriched_rdata:
|
|
33 - Description: flag created by that tool to tell it was enriched.
|
|
34 - Retrieval method: enriched_rdata <- TRUE
|
|
35
|
|
36 - enriched_rdata_version:
|
|
37 - Description: A flag created by that tool to tell which version of
|
|
38 this tool has enriched the rdata.
|
|
39 - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION)
|
|
40
|
|
41 - enriched_rdata_doc:
|
|
42 - Description: Contains the documentation string.
|
|
43
|
|
44 Data from original mzxml file
|
|
45 ------
|
|
46 - tic:
|
|
47 - Description: Those are the tic values from the original mzxml
|
|
48 file, extracted using xcms 2.
|
|
49 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@tic
|
|
50 - xcms version: 2.0
|
|
51
|
|
52 - mz:
|
|
53 - Description: Those are the m/z values from the original mzxml
|
|
54 file, extracted using xcms 2.
|
|
55 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$mz
|
|
56 - xcms version: 2.0
|
|
57
|
|
58 - scanindex:
|
|
59 - Description: Those are the scanindex values from the original mzxml
|
|
60 file, extracted using xcms 2.
|
|
61 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scanindex
|
|
62 - xcms version: 2.0
|
|
63
|
|
64 - scantime:
|
|
65 - Description: Those are the scantime values from the original mzxml
|
|
66 file, extracted using xcms 2.
|
|
67 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@scantime
|
|
68 - xcms version: 2.0
|
|
69
|
|
70 - intensity:
|
|
71 - Description: Those are the intensity values from the original mzxml
|
|
72 file, extracted using xcms 2.
|
|
73 - Retrieval method: xcms::xcmsRaw('original_file.mzxml')@env$intensity
|
|
74 - xcms version: 2.0
|
|
75
|
|
76 - polarity:
|
|
77 - Description: Those are the polarity values from the original mzxml
|
|
78 file, extracted using xcms 2.
|
|
79 - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]])
|
|
80 - xcms version: 2.0
|
|
81
|
|
82 Data taken from incoming rdata
|
|
83 ------
|
|
84 - variableMetadata:
|
|
85 - Description: Unmodified copy of variableMetadata from incoming rdata.
|
|
86 - Retrieval method: rdata_file$variableMetadata
|
|
87
|
|
88 - process_params:
|
|
89 - Description: Those are the processing parameters values from the
|
|
90 curent rdata. They have been simplified to allow easy access like:
|
|
91 for (params in process_params) {
|
|
92 if (params[[\"xfunction\"]] == \"annotatediff\") {
|
|
93 process_peak_picking_params(params)
|
|
94 }
|
|
95 }
|
|
96 - Retrieval method:
|
|
97 ## just he same list, but simplified
|
|
98 process_params <- list()
|
|
99 for (list_name in names(rdata_file$listOFlistArguments)) {
|
|
100 param_list <- list()
|
|
101 for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) {
|
|
102 param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]]
|
|
103 }
|
|
104 process_params[[length(process_params)+1]] <- param_list
|
|
105 }
|
|
106 ", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION)
|
|
107
|
|
108
|
|
109
|
|
110 get_models <- function(path) {
|
|
111 if (is.null(path)) {
|
|
112 stop("No models to define the database schema")
|
|
113 } else {
|
|
114 message(sprintf("Loading models from %s", path))
|
|
115 }
|
|
116 ## galaxy mangles the "@" to a "__at__"
|
|
117 if (substr(path, 1, 9) == "git__at__") {
|
|
118 path <- sub("^git__at__", "git@", path, perl=TRUE)
|
|
119 }
|
|
120 if (
|
|
121 substr(path, 1, 4) == "git@"
|
|
122 || substr(path, length(path)-4, 4) == ".git"
|
|
123 ) {
|
|
124 return (get_models_from_git(path))
|
|
125 }
|
|
126 if (substr(path, 1, 4) == "http") {
|
|
127 return (get_models_from_url(path))
|
|
128 }
|
|
129 return (source(path)$value)
|
|
130 }
|
|
131
|
|
132 get_models_from_git <- function (url, target_file="models.R", rm=TRUE) {
|
|
133 tmp <- tempdir()
|
|
134 message(sprintf("Cloning %s", url))
|
|
135 system2("git", c("clone", url, tmp))
|
|
136 result <- search_tree(file.path(tmp, dir), target_file)
|
|
137 if (!is.null(result)) {
|
|
138 models <- source(result)$value
|
|
139 if (rm) {
|
|
140 unlink(tmp, recursive=TRUE)
|
|
141 }
|
|
142 return (models)
|
|
143 }
|
|
144 if (rm) {
|
|
145 unlink(tmp, recursive=TRUE)
|
|
146 }
|
|
147 stop(sprintf(
|
|
148 "Could not find any file named \"%s\" in this repo",
|
|
149 target_file
|
|
150 ))
|
|
151 }
|
|
152
|
|
153 get_models_from_url <- function (url, target_file="models.R", rm=TRUE) {
|
|
154 tmp <- tempdir()
|
|
155 message(sprintf("Downloading %s", url))
|
|
156 result <- file.path(tmp, target_file)
|
|
157 if (download.file(url, destfile=result) == 0) {
|
|
158 models <- source(result)$value
|
|
159 if (rm) {
|
|
160 unlink(tmp, recursive=TRUE)
|
|
161 }
|
|
162 return (models)
|
|
163 }
|
|
164 if (rm) {
|
|
165 unlink(tmp, recursive=TRUE)
|
|
166 }
|
|
167 stop("Could not download any file at this adress.")
|
|
168 }
|
|
169
|
|
170 search_tree <- function(path, target) {
|
|
171 target <- tolower(target)
|
|
172 for (file in list.files(path)) {
|
|
173 if (is.dir(file)) {
|
|
174 result <- search_tree(file.path(path, file), target)
|
|
175 if (!is.null(result)) {
|
|
176 return (result)
|
|
177 }
|
|
178 } else if (tolower(file) == target) {
|
|
179 return (file.path(path, file))
|
|
180 }
|
|
181 }
|
|
182 return (NULL)
|
|
183 }
|
|
184
|
|
185 create_database <- function(orm) {
|
|
186 orm$recreate_database(no_exists=FALSE)
|
|
187 set_database_version(orm, "created")
|
|
188 }
|
|
189
|
|
190 insert_adducts <- function(orm) {
|
|
191 message("Creating adducts...")
|
|
192 adducts <- list(
|
|
193 list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"),
|
|
194 list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"),
|
|
195 list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"),
|
|
196 list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"),
|
|
197 list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
|
|
198 list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"),
|
|
199 list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"),
|
|
200 list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
|
|
201 list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"),
|
|
202 list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"),
|
|
203 list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
|
|
204 list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"),
|
|
205 list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"),
|
|
206 list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"),
|
|
207 list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"),
|
|
208 list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"),
|
|
209 list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"),
|
|
210 list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"),
|
|
211 list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"),
|
|
212 list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"),
|
|
213 list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"),
|
|
214 list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"),
|
|
215 list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"),
|
|
216 list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
|
|
217 list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"),
|
|
218 list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"),
|
|
219 list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"),
|
|
220 list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"),
|
|
221 list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"),
|
|
222 list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
|
|
223 list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"),
|
|
224 list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"),
|
|
225 list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"),
|
|
226 list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
|
|
227 list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"),
|
|
228 list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"),
|
|
229 list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"),
|
|
230 list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"),
|
|
231 list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
|
|
232 list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"),
|
|
233 list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"),
|
|
234 list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"),
|
|
235 list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"),
|
|
236 list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"),
|
|
237 list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
|
|
238 list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"),
|
|
239 list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"),
|
|
240 list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
|
|
241 list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"),
|
|
242 list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"),
|
|
243 list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
|
|
244 list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"),
|
|
245 list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"),
|
|
246 list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"),
|
|
247 list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"),
|
|
248 list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
|
|
249 list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"),
|
|
250 list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"),
|
|
251 list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
|
|
252 list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"),
|
|
253 list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"),
|
|
254 list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
|
|
255 list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"),
|
|
256 list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"),
|
|
257 list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"),
|
|
258 list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
|
|
259 list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"),
|
|
260 list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"),
|
|
261 list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"),
|
|
262 list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"),
|
|
263 list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
|
|
264 list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"),
|
|
265 list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"),
|
|
266 list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
|
|
267 list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"),
|
|
268 list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"),
|
|
269 list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"),
|
|
270 list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
|
|
271 list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"),
|
|
272 list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"),
|
|
273 list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"),
|
|
274 list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"),
|
|
275 list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"),
|
|
276 list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"),
|
|
277 list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
|
|
278 list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"),
|
|
279 list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"),
|
|
280 list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"),
|
|
281 list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"),
|
|
282 list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"),
|
|
283 list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"),
|
|
284 list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"),
|
|
285 list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"),
|
|
286 list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"),
|
|
287 list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"),
|
|
288 list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"),
|
|
289 list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"),
|
|
290 list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"),
|
|
291 list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"),
|
|
292 list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"),
|
|
293 list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"),
|
|
294 list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"),
|
|
295 list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"),
|
|
296 list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"),
|
|
297 list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"),
|
|
298 list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0")
|
|
299 )
|
|
300 dummy_adduct <- orm$adduct()
|
|
301 for (adduct in adducts) {
|
|
302 i <- 0
|
|
303 dummy_adduct$set_name(adduct[[i <- i+1]])
|
|
304 dummy_adduct$set_multi(adduct[[i <- i+1]])
|
|
305 dummy_adduct$set_charge(adduct[[i <- i+1]])
|
|
306 dummy_adduct$set_mass(adduct[[i <- i+1]])
|
|
307 dummy_adduct$set_oidscore(adduct[[i <- i+1]])
|
|
308 dummy_adduct$set_quasi(adduct[[i <- i+1]])
|
|
309 dummy_adduct$set_ips(adduct[[i <- i+1]])
|
|
310 dummy_adduct$set_formula_add(adduct[[i <- i+1]])
|
|
311 dummy_adduct$set_formula_ded(adduct[[i <- i+1]])
|
11
|
312 invisible(dummy_adduct$save())
|
0
|
313 dummy_adduct$clear(unset_id=TRUE)
|
|
314 }
|
|
315 message("Adducts created")
|
|
316 }
|
|
317
|
|
318 insert_base_data <- function(orm, path, archetype=FALSE) {
|
|
319 if (archetype) {
|
|
320 ## not implemented yet
|
|
321 return ()
|
|
322 }
|
|
323 base_data <- readLines(path)
|
|
324 for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) {
|
|
325 orm$execute(sql)
|
|
326 }
|
|
327 set_database_version(orm, "enriched")
|
|
328 }
|
|
329
|
|
330 insert_compounds <- function(orm, compounds_path) {
|
|
331 compounds <- read.csv(file=compounds_path, sep="\t")
|
|
332 if (is.null(compounds <- translate_compounds(compounds))) {
|
|
333 stop("Could not find asked compound's attributes in csv file.")
|
|
334 }
|
|
335 dummy_compound <- orm$compound()
|
|
336 compound_list <- list()
|
|
337 for (i in seq_len(nrow(compounds))) {
|
|
338 dummy_compound$set_mz(compounds[i, "mz"])
|
|
339 dummy_compound$set_name(compounds[i, "name"])
|
|
340 dummy_compound$set_common_name(compounds[i, "common_name"])
|
|
341 dummy_compound$set_formula(compounds[i, "formula"])
|
|
342 compound_list[[length(compound_list)+1]] <- as.list(
|
|
343 dummy_compound,
|
|
344 c("mz", "name", "common_name", "formula")
|
|
345 )
|
|
346 dummy_compound$clear(unset_id=TRUE)
|
|
347 }
|
11
|
348 invisible(dummy_compound$save(bulk=compound_list))
|
0
|
349 }
|
|
350
|
|
351 translate_compounds <- function(compounds) {
|
|
352 recognized_headers <- list(
|
|
353 c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey")
|
|
354 )
|
|
355 header_translators <- list(
|
|
356 hmdb_header_translator
|
|
357 )
|
|
358 for (index in seq_along(recognized_headers)) {
|
|
359 headers <- recognized_headers[[index]]
|
|
360 if (identical(colnames(compounds), headers)) {
|
|
361 return (header_translators[[index]](compounds))
|
|
362 }
|
|
363 }
|
|
364 if (is.null(translator <- guess_translator(colnames(compounds)))) {
|
|
365 return (NULL)
|
|
366 }
|
|
367 return (csv_header_translator(translator, compounds))
|
|
368 }
|
|
369
|
|
370 guess_translator <- function(header) {
|
|
371 result <- list(
|
11
|
372 # HMDB_ID=NULL,
|
0
|
373 mz=NULL,
|
|
374 name=NULL,
|
|
375 common_name=NULL,
|
|
376 formula=NULL,
|
|
377 # inchi_key=NULL
|
|
378 )
|
|
379 asked_cols <- names(result)
|
|
380 for (asked_col in asked_cols) {
|
|
381 for (col in header) {
|
|
382 if ((twisted <- tolower(col)) == asked_col
|
|
383 || gsub("-", "_", twisted) == asked_col
|
|
384 || gsub(" ", "_", twisted) == asked_col
|
|
385 || tolower(gsub("(.)([A-Z])", "\\1_\\2", col)) == asked_col
|
|
386 ) {
|
|
387 result[[asked_col]] <- col
|
|
388 next
|
|
389 }
|
|
390 }
|
|
391 }
|
|
392 if (any(mapply(is.null, result))) {
|
|
393 return (NULL)
|
|
394 }
|
|
395 return (result)
|
|
396 }
|
|
397
|
|
398 hmdb_header_translator <- function(compounds) {
|
|
399 return (csv_header_translator(
|
|
400 list(
|
|
401 HMDB_ID="HMDB_ID",
|
|
402 mz="MzBank",
|
|
403 name="MetName",
|
|
404 common_name="MetName",
|
|
405 formula="ChemFormula",
|
|
406 inchi_key="INChIkey"
|
|
407 ), compounds
|
|
408 ))
|
|
409 }
|
|
410
|
|
411 csv_header_translator <- function(translation_table, csv) {
|
|
412 header_names <- names(translation_table)
|
|
413 result <- data.frame(1:nrow(csv))
|
|
414 for (i in seq_along(header_names)) {
|
|
415 result[, header_names[[i]]] <- csv[, translation_table[[i]]]
|
|
416 }
|
|
417 result[, "mz"] <- as.numeric(result[, "mz"])
|
|
418 return (result)
|
|
419 }
|
|
420
|
|
421 set_database_version <- function(orm, version) {
|
|
422 orm$set_tag(
|
|
423 version,
|
|
424 tag_name="database_version",
|
|
425 tag_table_name="XSeeker_tagging_table"
|
|
426 )
|
|
427 }
|
|
428
|
|
429 process_rdata <- function(orm, rdata, options) {
|
|
430 mzml_tmp_dir <- gather_mzml_files(rdata)
|
|
431 samples <- names(rdata$singlefile)
|
|
432 if (!is.null(options$samples)) {
|
|
433 samples <- samples[options$samples %in% samples]
|
|
434 }
|
|
435 show_percent <- (
|
|
436 is.null(options$`not-show-percent`)
|
|
437 || options$`not-show-percent` == FALSE
|
|
438 )
|
|
439 error <- tryCatch({
|
|
440 process_sample_list(
|
|
441 orm, rdata, samples,
|
|
442 show_percent=show_percent
|
|
443 )
|
|
444 NULL
|
|
445 }, error=function(e) {
|
|
446 message(e)
|
|
447 e
|
|
448 })
|
|
449 if (!is.null(mzml_tmp_dir)) {
|
|
450 unlink(mzml_tmp_dir, recursive=TRUE)
|
|
451 }
|
|
452 if (!is.null(error)) {
|
|
453 stop(error)
|
|
454 }
|
|
455 }
|
|
456
|
|
457 gather_mzml_files <- function(rdata) {
|
|
458 if (is.null(rdata$singlefile)) {
|
|
459 message("Extracting mxml files")
|
|
460 tmp <- tempdir()
|
|
461 rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp)
|
|
462 names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile))
|
|
463 message("Extracted")
|
|
464 return (tmp)
|
|
465 } else {
|
|
466 message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; ")))
|
|
467 }
|
|
468 return (NULL)
|
|
469 }
|
|
470
|
|
471 process_sample_list <- function(orm, radta, sample_names, show_percent) {
|
|
472 file_grouping_var <- find_grouping_var(rdata$variableMetadata)
|
|
473 message("Processing samples.")
|
|
474 message(sprintf("File grouping variable: %s", file_grouping_var))
|
|
475 if(is.null(file_grouping_var)) {
|
|
476 stop("Malformed variableMetada.")
|
|
477 }
|
|
478
|
11
|
479 context <- new.env()
|
|
480 context$samples <- list()
|
|
481 context$peaks <- rdata$xa@xcmsSet@peaks
|
|
482 context$groupidx <- rdata$xa@xcmsSet@groupidx
|
|
483 xcms_set <- rdata$xa@xcmsSet
|
|
484 singlefile <- rdata$singlefile
|
0
|
485 process_arg_list <- rdata$listOFlistArguments
|
11
|
486 var_meta <- rdata$variableMetadata
|
|
487
|
0
|
488 process_params <- list()
|
12
|
489 if (is.null(process_arg_list)) {
|
|
490 histories <- list()
|
|
491 for (history in xcms_set@.processHistory) {
|
|
492 if (
|
|
493 class(history@param) == "CentWaveParam"
|
|
494 && history@type == "Peak detection"
|
|
495 ) {
|
|
496 params <- history@param
|
|
497 process_params <- list(list(
|
|
498 xfunction="annotatediff",
|
|
499 ppm=params@ppm,
|
|
500 peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]),
|
|
501 snthresh=params@snthresh,
|
|
502 prefilterStep=params@prefilter[[1]],
|
|
503 prefilterLevel=params@prefilter[[2]],
|
|
504 mzdiff=params@mzdiff,
|
|
505 fitgauss=params@fitgauss,
|
|
506 noise=params@noise,
|
|
507 mzCenterFun=params@mzCenterFun,
|
|
508 integrate=params@integrate,
|
|
509 firstBaselineCheck=params@firstBaselineCheck,
|
|
510 snthreshIsoROIs=!identical(params@roiScales, numeric(0))
|
|
511 ))
|
|
512 break
|
|
513 }
|
0
|
514 }
|
12
|
515 } else {
|
|
516 for (list_name in names(process_arg_list)) {
|
|
517 param_list <- list()
|
|
518 for (param_name in names(process_arg_list[[list_name]])) {
|
|
519 param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]]
|
|
520 }
|
|
521 process_params[[length(process_params)+1]] <- param_list
|
|
522 }
|
0
|
523 }
|
12
|
524
|
0
|
525 message("Parameters from previous processes extracted.")
|
|
526
|
|
527
|
|
528 indices <- as.numeric(unique(var_meta[, file_grouping_var]))
|
|
529 smol_xcms_set <- orm$smol_xcms_set()
|
|
530 mz_tab_info <- new.env()
|
|
531 g <- xcms::groups(xcms_set)
|
|
532 mz_tab_info$group_length <- nrow(g)
|
|
533 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
|
|
534 mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
|
|
535 mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
|
|
536 mz_tab_info$rtmed <- g[,"rtmed"]
|
|
537 mz_tab_info$mzmed <- g[,"mzmed"]
|
|
538 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into")
|
|
539 blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100))
|
11
|
540 rm(mz_tab_info)
|
|
541
|
|
542 invisible(smol_xcms_set$set_raw(blogified)$save())
|
|
543 smol_xcms_set_id <- smol_xcms_set$get_id()
|
|
544 rm(smol_xcms_set)
|
|
545
|
0
|
546 for (no in indices) {
|
11
|
547 sample_name <- names(singlefile)[[no]]
|
|
548 sample_path <- singlefile[[no]]
|
0
|
549 if (
|
|
550 is.na(no)
|
|
551 || is.null(sample_path)
|
|
552 || !(sample_name %in% sample_names)
|
|
553 ) {
|
|
554 next
|
|
555 }
|
|
556 env <- new.env()
|
11
|
557 ms_file <- xcms::xcmsRaw(sample_path)
|
0
|
558 env$tic <- ms_file@tic
|
|
559 env$mz <- ms_file@env$mz
|
|
560 env$scanindex <- ms_file@scanindex
|
12
|
561 env$scantime <- ms_file@scantime * 60
|
0
|
562 env$intensity <- ms_file@env$intensity
|
|
563 env$polarity <- as.character(ms_file@polarity[[1]])
|
11
|
564
|
|
565 ## Again, ms file is huge, so we get rid of it quickly.
|
|
566 rm(ms_file)
|
|
567
|
0
|
568 env$sample_name <- sample_name
|
|
569 env$dataset_path <- sample_path
|
|
570 env$process_params <- process_params
|
|
571 env$enriched_rdata <- TRUE
|
|
572 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
|
|
573 env$tool_name <- TOOL_NAME
|
|
574 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
|
11
|
575 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
|
|
576 rm (env)
|
|
577 context$samples[no] <- sample$get_id()
|
|
578 rm (sample)
|
0
|
579 }
|
11
|
580 context$clusters <- list()
|
|
581 context$show_percent <- show_percent
|
|
582 context$cluster_mean_rt_abundance <- list()
|
|
583 context$central_feature <- list()
|
12
|
584 context$adducts <- list()
|
11
|
585 load_variable_metadata(orm, var_meta, context)
|
|
586 clusters <- context$clusters
|
|
587 rm(context)
|
0
|
588 message("Features enrichment")
|
11
|
589 complete_features(orm, clusters, show_percent)
|
0
|
590 message("Features enrichment done.")
|
|
591 return (NULL)
|
|
592 }
|
|
593
|
|
594 find_grouping_var <- function(var_meta) {
|
6
|
595 known_colnames = c(
|
|
596 "name", "namecustom", "mz", "mzmin", "mzmax",
|
|
597 "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup"
|
|
598 )
|
|
599 col_names <- colnames(var_meta)
|
|
600 classes = list()
|
|
601 for (name in col_names) {
|
|
602 if (!(name %in% known_colnames)) {
|
|
603 classes[[length(classes)+1]] = name
|
0
|
604 }
|
|
605 }
|
6
|
606 if (length(classes) > 1) {
|
|
607 stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes)))
|
|
608 }
|
7
|
609 if (length(classes) == 0) {
|
6
|
610 stop("Could not find any class column in your variableMetadata.")
|
|
611 }
|
|
612 return (classes[[1]])
|
0
|
613 }
|
|
614
|
11
|
615 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) {
|
0
|
616 message(sprintf("Processing sample %s", env$sample_name))
|
|
617 sample <- (
|
|
618 orm$sample()
|
|
619 $set_name(env$sample_name)
|
|
620 $set_path(env$dataset_path)
|
|
621 $set_kind("enriched_rdata")
|
|
622 $set_polarity(
|
|
623 if (is.null(env$polarity) || identical(env$polarity, character(0))) ""
|
|
624 else env$polarity
|
|
625 )
|
|
626 $set_raw(blob::blob(fst::compress_fst(
|
|
627 serialize(env, NULL),
|
|
628 compression=100
|
|
629 )))
|
|
630 )
|
11
|
631 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id
|
|
632 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id
|
|
633 sample <- sample$save()
|
0
|
634 load_process_params(orm, sample, env$process_params)
|
|
635 message(sprintf("Sample %s inserted.", env$sample_name))
|
|
636 return (sample)
|
|
637 }
|
|
638
|
|
639
|
11
|
640 load_variable_metadata <- function(orm, var_meta, context) {
|
0
|
641 all_clusters <- orm$cluster()$all()
|
|
642
|
11
|
643 next_feature_id <- get_next_id(orm$feature()$all(), "featureID") + 1
|
|
644 next_cluster_id <- 0
|
0
|
645 next_pc_group <- get_next_id(all_clusters, "pc_group")
|
11
|
646 next_align_group <- get_next_id(all_clusters, "align_group") + 1
|
0
|
647 message("Extracting features")
|
|
648 invisible(create_features(
|
11
|
649 orm, var_meta, context,
|
0
|
650 next_feature_id, next_cluster_id,
|
|
651 next_pc_group, next_align_group
|
|
652 ))
|
|
653 message("Extracting features done.")
|
|
654 return (NULL)
|
|
655 }
|
|
656
|
|
657 get_next_id <- function(models, attribute) {
|
|
658 if ((id <- models$max(attribute)) == Inf || id == -Inf) {
|
11
|
659 return (0)
|
0
|
660 }
|
11
|
661 return (id)
|
0
|
662 }
|
|
663
|
|
664 create_features <- function(
|
11
|
665 orm, var_meta, context,
|
0
|
666 next_feature_id, next_cluster_id,
|
|
667 next_pc_group, next_align_group
|
|
668 ) {
|
|
669 field_names <- as.list(names(orm$feature()$fields__))
|
|
670 field_names[field_names=="id"] <- NULL
|
|
671
|
|
672 features <- list()
|
|
673 dummy_feature <- orm$feature()
|
|
674
|
|
675 if (show_percent <- context$show_percent) {
|
|
676 percent <- -1
|
|
677 total <- nrow(var_meta)
|
|
678 }
|
|
679 for (row in seq_len(nrow(var_meta))) {
|
|
680 if (show_percent && (row / total) * 100 > percent) {
|
|
681 percent <- percent + 1
|
|
682 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
|
|
683 }
|
|
684
|
|
685 curent_var_meta <- var_meta[row, ]
|
|
686
|
11
|
687
|
|
688 set_feature_fields_from_var_meta(dummy_feature, curent_var_meta)
|
|
689
|
|
690 dummy_feature$set_featureID(next_feature_id)
|
|
691 next_feature_id <- next_feature_id + 1
|
|
692 fake_iso <- dummy_feature$get_iso()
|
|
693 iso <- extract_iso(fake_iso)
|
|
694 clusterID <- extract_clusterID(fake_iso, next_cluster_id)
|
|
695 context$clusterID <- clusterID
|
|
696 dummy_feature$set_iso(iso)
|
|
697
|
|
698
|
0
|
699 peak_list <- context$peaks[context$groupidx[[row]], ]
|
10
|
700 if (! ("matrix" %in% class(peak_list))) {
|
|
701 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list)))
|
|
702 }
|
11
|
703
|
|
704 clusterID <- as.character(clusterID)
|
|
705 if (is.null(context$central_feature[[clusterID]])) {
|
|
706 int_o <- extract_peak_var(peak_list, "into")
|
|
707 context$central_feature[[clusterID]] <- (
|
|
708 peak_list[peak_list[, "into"] == int_o,]["sample"]
|
|
709 )
|
|
710 }
|
|
711
|
|
712 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE]
|
0
|
713 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) {
|
|
714 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) {
|
|
715 dummy_feature$set_int_o(int_o)
|
|
716 }
|
|
717 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) {
|
|
718 dummy_feature$set_int_b(int_b)
|
|
719 }
|
|
720 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) {
|
|
721 dummy_feature$set_max_o(max_o)
|
|
722 }
|
|
723 }
|
|
724 create_associated_cluster(
|
12
|
725 orm,
|
11
|
726 context$central_feature[[clusterID]],
|
|
727 dummy_feature, clusterID,
|
0
|
728 context, curent_var_meta, next_pc_group,
|
|
729 next_align_group
|
|
730 )
|
|
731 next_align_group <- next_align_group + 1
|
|
732 features[[length(features)+1]] <- as.list(dummy_feature, field_names)
|
|
733 dummy_feature$clear()
|
|
734 }
|
|
735 message("")## +\n for previous message
|
|
736 message("Saving features")
|
11
|
737 rm(var_meta)
|
|
738 invisible(dummy_feature$save(bulk=features))
|
0
|
739 message("Saved.")
|
|
740 return (context$clusters)
|
|
741 }
|
|
742
|
|
743 extract_peak_var <- function(peak_list, var_name, selector=max) {
|
|
744 value <- peak_list[, var_name]
|
|
745 names(value) <- NULL
|
|
746 return (selector(value))
|
|
747 }
|
|
748
|
|
749 set_feature_fields_from_var_meta <- function(feature, var_meta) {
|
|
750 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
|
|
751 feature$set_mz(mz)
|
|
752 }
|
|
753 if (!is.null(mzmin <- var_meta[["mzmin"]]) && !is.na(mzmin)) {
|
|
754 feature$set_mz_min(mzmin)
|
|
755 }
|
|
756 if (!is.null(mzmax <- var_meta[["mzmax"]]) && !is.na(mzmax)) {
|
|
757 feature$set_mz_max(mzmax)
|
|
758 }
|
|
759 if (!is.null(rt <- var_meta[["rt"]]) && !is.na(rt)) {
|
|
760 feature$set_rt(rt)
|
|
761 }
|
|
762 if (!is.null(rtmin <- var_meta[["rtmin"]]) && !is.na(rtmin)) {
|
|
763 feature$set_rt_min(rtmin)
|
|
764 }
|
|
765 if (!is.null(rtmax <- var_meta[["rtmax"]]) && !is.na(rtmax)) {
|
|
766 feature$set_rt_max(rtmax)
|
|
767 }
|
|
768 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
|
|
769 feature$set_iso(isotopes)
|
|
770 }
|
|
771 return (feature)
|
|
772 }
|
|
773
|
|
774 extract_iso <- function(weird_data) {
|
|
775 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
|
|
776 return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE))
|
|
777 }
|
|
778 return (weird_data)
|
|
779 }
|
|
780
|
|
781 extract_clusterID <- function(weird_data, next_cluster_id){
|
|
782 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
|
|
783 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
|
|
784 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
|
|
785 } else {
|
|
786 clusterID <- 0
|
|
787 }
|
|
788 return (clusterID + next_cluster_id)
|
|
789 }
|
|
790
|
|
791 create_associated_cluster <- function(
|
12
|
792 orm,
|
11
|
793 sample_no, feature, clusterID,
|
0
|
794 context, curent_var_meta, next_pc_group, next_align_group
|
|
795 ) {
|
11
|
796 clusterID <- as.character(clusterID)
|
|
797 if (is.null(cluster <- context$clusters[[clusterID]])) {
|
12
|
798 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
|
|
799 adduct_name <- as.character(curent_var_meta[["adduct"]])
|
|
800 annotation <- curent_var_meta[["isotopes"]]
|
11
|
801 cluster <- context$clusters[[clusterID]] <- orm$cluster(
|
0
|
802 pc_group=pcgroup + next_pc_group,
|
12
|
803 # adduct=adduct,
|
0
|
804 align_group=next_align_group,
|
|
805 # curent_group=curent_group,
|
|
806 clusterID=context$clusterID,
|
|
807 annotation=annotation
|
11
|
808 )
|
12
|
809 if (is.null(adduct <- context$adducts[[adduct_name]])) {
|
|
810 context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first()
|
|
811 if (is.null(adduct <- context$adducts[[adduct_name]])) {
|
|
812 adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0)
|
|
813 adduct$save()
|
|
814 }
|
|
815 }
|
|
816 cluster$set_adduct(adduct)
|
11
|
817 ## Crappy hack to assign sample id to cluster without loading the sample.
|
|
818 ## Samples are too big (their sample$env) and slows the process, and eat all the menory
|
|
819 ## so we dont't want to load them.
|
|
820 cluster[["sample_id"]] <- context$samples[sample_no][[1]]
|
|
821 cluster$modified__[["sample_id"]] <- cluster[["sample_id"]]
|
0
|
822 } else {
|
|
823 if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
|
|
824 cluster$set_clusterID(context$clusterID)
|
|
825 }
|
|
826 }
|
|
827 cluster$save()
|
|
828 feature$set_cluster(cluster)
|
|
829 return (feature)
|
|
830 }
|
|
831
|
11
|
832 complete_features <- function(orm, clusters, show_percent) {
|
|
833 total <- length(clusters)
|
|
834 percent <- -1
|
|
835 i <- 0
|
|
836 for (cluster in clusters) {
|
|
837 i <- i+1
|
|
838 if (show_percent && (i / total) * 100 > percent) {
|
|
839 percent <- percent + 1
|
|
840 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE)
|
|
841 }
|
0
|
842 features <- orm$feature()$load_by(cluster_id=cluster$get_id())
|
|
843 if (features$any()) {
|
|
844 if (!is.null(rt <- features$mean("rt"))) {
|
|
845 cluster$set_mean_rt(rt)$save()
|
|
846 }
|
|
847 features_df <- as.data.frame(features)
|
|
848 central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ]
|
|
849 central_feature_into <- central_feature[["int_o"]]
|
|
850 if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) {
|
|
851 for (feature in as.vector(features)) {
|
|
852 feature$set_abundance(
|
|
853 feature$get_int_o() / central_feature_into * 100
|
|
854 )$save()
|
|
855 }
|
|
856 }
|
|
857 }
|
|
858 }
|
|
859 return (NULL)
|
|
860 }
|
|
861
|
|
862 load_process_params <- function(orm, sample, params) {
|
|
863 for (param_list in params) {
|
|
864 if (is.null(param_list[["xfunction"]])) {
|
|
865 next
|
|
866 }
|
|
867 if (param_list[["xfunction"]] == "annotatediff") {
|
|
868 load_process_params_peak_picking(orm, sample, param_list)
|
|
869 }
|
|
870 }
|
|
871 return (sample)
|
|
872 }
|
|
873
|
|
874 load_process_params_peak_picking <- function(orm, sample, peak_picking_params) {
|
|
875 return (add_sample_process_parameters(
|
|
876 params=peak_picking_params,
|
|
877 params_translation=list(
|
|
878 ppm="ppm",
|
|
879 maxcharge="maxCharge",
|
|
880 maxiso="maxIso"
|
|
881 ),
|
|
882 param_model_generator=orm$peak_picking_parameters,
|
|
883 sample_param_setter=sample$set_peak_picking_parameters
|
|
884 ))
|
|
885 }
|
|
886
|
|
887 add_sample_process_parameters <- function(
|
|
888 params,
|
|
889 params_translation,
|
|
890 param_model_generator,
|
|
891 sample_param_setter
|
|
892 ) {
|
|
893 model_params <- list()
|
|
894 for (rdata_param_name in names(params_translation)) {
|
|
895 database_param_name <- params_translation[[rdata_param_name]]
|
|
896 if (is.null(rdata_param <- params[[rdata_param_name]])) {
|
|
897 next
|
|
898 }
|
|
899 model_params[[database_param_name]] <- rdata_param
|
|
900 }
|
|
901 params_models <- do.call(param_model_generator()$load_by, model_params)
|
|
902 if (params_models$any()) {
|
|
903 params_model <- params_models$first()
|
|
904 } else {
|
|
905 params_model <- do.call(param_model_generator, model_params)
|
|
906 params_model$save()
|
|
907 }
|
|
908 return (sample_param_setter(params_model)$save())
|
|
909 }
|
|
910
|
|
911
|
|
912 library(optparse)
|
|
913
|
|
914 option_list <- list(
|
|
915 optparse::make_option(
|
|
916 c("-v", "--version"),
|
|
917 action="store_true",
|
|
918 help="Display this tool's version and exits"
|
|
919 ),
|
|
920 optparse::make_option(
|
|
921 c("-i", "--input"),
|
|
922 type="character",
|
|
923 help="The rdata path to import in XSeeker"
|
|
924 ),
|
|
925 optparse::make_option(
|
|
926 c("-s", "--samples"),
|
|
927 type="character",
|
|
928 help="Samples to visualise in XSeeker"
|
|
929 ),
|
|
930 optparse::make_option(
|
|
931 c("-B", "--archetype"),
|
|
932 type="character",
|
|
933 help="The name of the base database"
|
|
934 ),
|
|
935 optparse::make_option(
|
|
936 c("-b", "--database"),
|
|
937 type="character",
|
|
938 help="The base database's path"
|
|
939 ),
|
|
940 optparse::make_option(
|
|
941 c("-c", "--compounds-csv"),
|
|
942 type="character",
|
|
943 help="The csv containing compounds"
|
|
944 ),
|
|
945 optparse::make_option(
|
|
946 c("-m", "--models"),
|
|
947 type="character",
|
|
948 help="The path or url (must begin with http[s]:// or git@) to the database's models"
|
|
949 ),
|
|
950 optparse::make_option(
|
|
951 c("-o", "--output"),
|
|
952 type="character",
|
|
953 help="The path where to output sqlite"
|
|
954 ),
|
|
955 optparse::make_option(
|
|
956 c("-P", "--not-show-percent"),
|
|
957 action="store_true",
|
|
958 help="Flag not to show the percents",
|
|
959 default=FALSE
|
|
960 )
|
|
961 )
|
|
962
|
|
963 options(error=function(){traceback(3)})
|
|
964
|
|
965 parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
|
|
966 args <- parse_args(parser, positional_arguments=0)
|
|
967
|
|
968 err_code <- 0
|
|
969
|
|
970 if (!is.null(args$options$version)) {
|
|
971 message(sprintf("%s %s", TOOL_NAME, VERSION))
|
|
972 quit()
|
|
973 }
|
|
974
|
|
975 models <- get_models(args$options$models)
|
|
976 orm <- DBModelR::ORM(
|
|
977 connection_params=list(dbname=args$options$output),
|
|
978 dbms="SQLite"
|
|
979 )
|
|
980
|
|
981 invisible(orm$models(models))
|
|
982 invisible(create_database(orm))
|
|
983
|
|
984 message("Database model created")
|
|
985
|
|
986 insert_adducts(orm)
|
|
987
|
|
988 if (!is.null(args$options$database)) {
|
|
989 insert_base_data(orm, args$options$database)
|
|
990 }
|
|
991 message(sprintf("Base data inserted using %s.", args$options$database))
|
|
992
|
|
993 if (!is.null(args$options$archetype)) {
|
|
994 insert_base_data(orm, args$options$archetype, archetype=TRUE)
|
|
995 }
|
|
996 if (!is.null(args$options$`compounds-csv`)) {
|
|
997 insert_compounds(orm, args$options$`compounds-csv`)
|
|
998 }
|
|
999
|
|
1000 # if (!is.null(args$options$rdata)) {
|
|
1001 # load_rdata_in_base(args$options$rdata, args$options$samples, args$options$`not-show-percent`)
|
|
1002 # }
|
|
1003
|
|
1004
|
|
1005 load(args$options$input, rdata <- new.env())
|
|
1006
|
|
1007 process_rdata(orm, rdata, args$options)
|
|
1008
|
|
1009 quit(status=err_code)
|
|
1010
|
|
1011
|