Mercurial > repos > computational-metabolomics > mspurity_createdatabase
comparison spectralMatching.R @ 0:f52287a06c02 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
| author | computational-metabolomics |
|---|---|
| date | Wed, 27 Nov 2019 13:44:58 -0500 |
| parents | |
| children | 3d36a391612c |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f52287a06c02 |
|---|---|
| 1 library(msPurity) | |
| 2 library(msPurityData) | |
| 3 library(optparse) | |
| 4 print(sessionInfo()) | |
| 5 # load in library spectra config | |
| 6 source_local <- function(fname){ argv <- commandArgs(trailingOnly=FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } | |
| 7 source_local("dbconfig.R") | |
| 8 | |
| 9 option_list <- list( | |
| 10 make_option(c("-o", "--outDir"), type="character"), | |
| 11 make_option("--q_dbPth", type="character"), | |
| 12 make_option("--l_dbPth", type="character"), | |
| 13 | |
| 14 make_option("--q_dbType", type="character", default=NA), | |
| 15 make_option("--l_dbType", type="character", default=NA), | |
| 16 | |
| 17 make_option("--q_msp", type="character", default=NA), | |
| 18 make_option("--l_msp", type="character", default=NA), | |
| 19 | |
| 20 make_option("--q_defaultDb", action="store_true"), | |
| 21 make_option("--l_defaultDb", action="store_true"), | |
| 22 | |
| 23 make_option("--q_ppmPrec", type="double"), | |
| 24 make_option("--l_ppmPrec", type="double"), | |
| 25 | |
| 26 make_option("--q_ppmProd", type="double"), | |
| 27 make_option("--l_ppmProd", type="double"), | |
| 28 | |
| 29 make_option("--q_raThres", type="double", default=NA), | |
| 30 make_option("--l_raThres", type="double", default=NA), | |
| 31 | |
| 32 make_option("--q_polarity", type="character", default=NA), | |
| 33 make_option("--l_polarity", type="character", default=NA), | |
| 34 | |
| 35 make_option("--q_purity", type="double", default=NA), | |
| 36 make_option("--l_purity", type="double", default=NA), | |
| 37 | |
| 38 make_option("--q_xcmsGroups", type="character", default=NA), | |
| 39 make_option("--l_xcmsGroups", type="character", default=NA), | |
| 40 | |
| 41 make_option("--q_pids", type="character", default=NA), | |
| 42 make_option("--l_pids", type="character", default=NA), | |
| 43 | |
| 44 make_option("--q_rtrangeMin", type="double", default=NA), | |
| 45 make_option("--l_rtrangeMin", type="double", default=NA), | |
| 46 | |
| 47 make_option("--q_rtrangeMax", type="double", default=NA), | |
| 48 make_option("--l_rtrangeMax", type="double", default=NA), | |
| 49 | |
| 50 make_option("--q_accessions", type="character", default=NA), | |
| 51 make_option("--l_accessions", type="character", default=NA), | |
| 52 | |
| 53 make_option("--q_sources", type="character", default=NA), | |
| 54 make_option("--l_sources", type="character", default=NA), | |
| 55 | |
| 56 make_option("--q_sourcesUser", type="character", default=NA), | |
| 57 make_option("--l_sourcesUser", type="character", default=NA), | |
| 58 | |
| 59 make_option("--q_instrumentTypes", type="character", default=NA), | |
| 60 make_option("--l_instrumentTypes", type="character", default=NA), | |
| 61 | |
| 62 make_option("--q_instrumentTypesUser", type="character", default=NA), | |
| 63 make_option("--l_instrumentTypesUser", type="character", default=NA), | |
| 64 | |
| 65 make_option("--q_instruments", type="character", default=NA), | |
| 66 make_option("--l_instruments", type="character", default=NA), | |
| 67 | |
| 68 make_option("--q_spectraTypes", type="character", default=NA), | |
| 69 make_option("--l_spectraTypes", type="character", default=NA), | |
| 70 | |
| 71 make_option("--q_spectraFilter", action="store_true"), | |
| 72 make_option("--l_spectraFilter", action="store_true"), | |
| 73 | |
| 74 make_option("--usePrecursors", action="store_true"), | |
| 75 | |
| 76 make_option("--mzW", type="double"), | |
| 77 make_option("--raW", type="double"), | |
| 78 | |
| 79 make_option("--rttol", type="double", default=NA), | |
| 80 | |
| 81 make_option("--updateDb", action="store_true"), | |
| 82 make_option("--copyDb", action="store_true"), | |
| 83 make_option("--cores", default=1) | |
| 84 | |
| 85 | |
| 86 ) | |
| 87 | |
| 88 # store options | |
| 89 opt<- parse_args(OptionParser(option_list=option_list)) | |
| 90 | |
| 91 print(opt) | |
| 92 | |
| 93 # check if the sqlite databases have any spectra | |
| 94 checkSPeakMeta <- function(dbPth, nme){ | |
| 95 if(is.null(dbPth)){ | |
| 96 return(TRUE) | |
| 97 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size>0)){ | |
| 98 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) | |
| 99 if (DBI::dbExistsTable(con, "s_peak_meta")){ | |
| 100 spm <- DBI::dbGetQuery(con, 'SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1') | |
| 101 return(TRUE) | |
| 102 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ | |
| 103 spm <- DBI::dbGetQuery(con, 'SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1') | |
| 104 return(TRUE) | |
| 105 }else{ | |
| 106 print(paste("No spectra available for ",nme)) | |
| 107 return(FALSE) | |
| 108 } | |
| 109 }else{ | |
| 110 print(paste("file empty or does not exist for", nme)) | |
| 111 return(FALSE) | |
| 112 } | |
| 113 | |
| 114 | |
| 115 } | |
| 116 | |
| 117 | |
| 118 addQueryNameColumn <- function(sm){ | |
| 119 if (is.null(sm$matchedResults) || length(sm$matchedResults)==1 || nrow(sm$matchedResults)==0){ | |
| 120 return(sm) | |
| 121 } | |
| 122 | |
| 123 con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) | |
| 124 if (DBI::dbExistsTable(con, "s_peak_meta")){ | |
| 125 spm <- DBI::dbGetQuery(con, 'SELECT pid, name AS query_entry_name FROM s_peak_meta') | |
| 126 }else if(DBI::dbExistsTable(con, "library_spectra_meta")){ | |
| 127 spm <- DBI::dbGetQuery(con, 'SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta') | |
| 128 } | |
| 129 print(sm$matchedResults) | |
| 130 if ('pid' %in% colnames(sm$matchedResults)){ | |
| 131 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='pid', by.y='pid') | |
| 132 }else{ | |
| 133 sm$matchedResults <- merge(sm$matchedResults, spm, by.x='qpid', by.y='pid') | |
| 134 } | |
| 135 | |
| 136 print(sm$xcmsMatchedResults) | |
| 137 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults)==1 || nrow(sm$xcmsMatchedResults)==0){ | |
| 138 return(sm) | |
| 139 }else{ | |
| 140 if ('pid' %in% colnames(sm$xcmsMatchedResults)){ | |
| 141 sm$xcmsMatchedResults<- merge(sm$xcmsMatchedResults, spm, by.x='pid', by.y='pid') | |
| 142 }else{ | |
| 143 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x='qpid', by.y='pid') | |
| 144 } | |
| 145 } | |
| 146 | |
| 147 return(sm) | |
| 148 | |
| 149 } | |
| 150 | |
| 151 | |
| 152 updateDbF <- function(q_con, l_con){ | |
| 153 message('Adding extra details to database') | |
| 154 q_con <- DBI::dbConnect(RSQLite::SQLite(),sm$q_dbPth) | |
| 155 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")){ | |
| 156 l_s_peak_meta <- DBI::dbGetQuery(q_con, 'SELECT * FROM l_s_peak_meta') | |
| 157 colnames(l_s_peak_meta)[1] <- 'pid' | |
| 158 } | |
| 159 | |
| 160 l_con <- DBI::dbConnect(RSQLite::SQLite(),l_dbPth) | |
| 161 if (DBI::dbExistsTable(l_con, "s_peaks")){ | |
| 162 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) | |
| 163 | |
| 164 }else if(DBI::dbExistsTable(l_con, "library_spectra")){ | |
| 165 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra | |
| 166 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse=','))) | |
| 167 }else{ | |
| 168 l_s_peaks = NULL | |
| 169 } | |
| 170 | |
| 171 if (DBI::dbExistsTable(l_con, "source")){ | |
| 172 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM source') | |
| 173 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { | |
| 174 l_source <- DBI::dbGetQuery(l_con, 'SELECT * FROM library_spectra_source') | |
| 175 }else{ | |
| 176 l_source = NULL | |
| 177 } | |
| 178 | |
| 179 if (!is.null(l_s_peaks)){ | |
| 180 DBI::dbWriteTable(q_con, name='l_s_peaks', value=l_s_peaks, row.names=FALSE, append=TRUE) | |
| 181 } | |
| 182 | |
| 183 if (!is.null(l_source)){ | |
| 184 DBI::dbWriteTable(q_con, name='l_source', value=l_source, row.names=FALSE, append=TRUE) | |
| 185 } | |
| 186 | |
| 187 | |
| 188 } | |
| 189 | |
| 190 | |
| 191 extractMultiple <- function(optParam){ | |
| 192 if (!is.na(optParam)){ | |
| 193 param <- trimws(strsplit(optParam, ',')[[1]]) | |
| 194 param <- param[param != ""] | |
| 195 }else{ | |
| 196 param <- NA | |
| 197 } | |
| 198 return(param) | |
| 199 | |
| 200 } | |
| 201 | |
| 202 if(!is.null(opt$q_defaultDb)){ | |
| 203 q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData") | |
| 204 q_dbType <- 'sqlite' | |
| 205 }else if (!opt$q_dbType=='local_config'){ | |
| 206 q_dbType <- opt$q_dbType | |
| 207 q_dbPth <- opt$q_dbPth | |
| 208 } | |
| 209 | |
| 210 if(!is.null(opt$l_defaultDb)){ | |
| 211 l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package="msPurityData") | |
| 212 l_dbType <- 'sqlite' | |
| 213 }else if (!opt$l_dbType=='local_config'){ | |
| 214 l_dbType <- opt$l_dbType | |
| 215 l_dbPth <- opt$l_dbPth | |
| 216 } | |
| 217 | |
| 218 | |
| 219 q_polarity <- extractMultiple(opt$q_polarity) | |
| 220 l_polarity <- extractMultiple(opt$l_polarity) | |
| 221 | |
| 222 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups) | |
| 223 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups) | |
| 224 | |
| 225 q_pids <- extractMultiple(opt$q_pids) | |
| 226 l_pids <- extractMultiple(opt$l_pids) | |
| 227 | |
| 228 q_sources <- extractMultiple(opt$q_sources) | |
| 229 l_sources <- extractMultiple(opt$l_sources) | |
| 230 | |
| 231 q_sourcesUser <- extractMultiple(opt$q_sourcesUser) | |
| 232 l_sourcesUser <- extractMultiple(opt$l_sourcesUser) | |
| 233 | |
| 234 q_sources <-c(q_sources, q_sourcesUser) | |
| 235 l_sources <-c(l_sources, l_sourcesUser) | |
| 236 | |
| 237 q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes) | |
| 238 l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes) | |
| 239 | |
| 240 q_instrumentTypes <-c(q_instrumentTypes, q_instrumentTypes) | |
| 241 l_instrumentTypes <-c(l_instrumentTypes, l_instrumentTypes) | |
| 242 | |
| 243 | |
| 244 if(!is.null(opt$l_spectraFilter)){ | |
| 245 l_spectraFilter <- TRUE | |
| 246 }else{ | |
| 247 l_spectraFilter <- FALSE | |
| 248 } | |
| 249 | |
| 250 if(!is.null(opt$q_spectraFilter)){ | |
| 251 q_spectraFilter <- TRUE | |
| 252 }else{ | |
| 253 q_spectraFilter <- FALSE | |
| 254 } | |
| 255 | |
| 256 if(!is.null(opt$updateDb)){ | |
| 257 updateDb <- TRUE | |
| 258 }else{ | |
| 259 updateDb <- FALSE | |
| 260 } | |
| 261 | |
| 262 if(!is.null(opt$copyDb)){ | |
| 263 copyDb <- TRUE | |
| 264 }else{ | |
| 265 copyDb <- FALSE | |
| 266 } | |
| 267 | |
| 268 if(!is.null(opt$l_rtrangeMax)){ | |
| 269 l_rtrangeMax <- opt$l_rtrangeMax | |
| 270 }else{ | |
| 271 l_rtrangeMax <- NA | |
| 272 } | |
| 273 | |
| 274 if(!is.null(opt$q_rtrangeMax)){ | |
| 275 q_rtrangeMax <- opt$q_rtrangeMax | |
| 276 }else{ | |
| 277 q_rtrangeMax <- NA | |
| 278 } | |
| 279 | |
| 280 if(!is.null(opt$l_rtrangeMin)){ | |
| 281 l_rtrangeMin <- opt$l_rtrangeMin | |
| 282 }else{ | |
| 283 l_rtrangeMin <- NA | |
| 284 } | |
| 285 | |
| 286 if(!is.null(opt$q_rtrangeMin)){ | |
| 287 q_rtrangeMin <- opt$q_rtrangeMin | |
| 288 }else{ | |
| 289 q_rtrangeMin <- NA | |
| 290 } | |
| 291 | |
| 292 q_check <- checkSPeakMeta(opt$q_dbPth, 'query') | |
| 293 l_check <- checkSPeakMeta(opt$l_dbPth, 'library') | |
| 294 | |
| 295 if (q_check && l_check){ | |
| 296 sm <- msPurity::spectralMatching( | |
| 297 q_purity = opt$q_purity, | |
| 298 l_purity = opt$l_purity, | |
| 299 | |
| 300 q_ppmProd = opt$q_ppmProd, | |
| 301 l_ppmProd = opt$l_ppmProd, | |
| 302 | |
| 303 q_ppmPrec = opt$q_ppmPrec, | |
| 304 l_ppmPrec = opt$l_ppmPrec, | |
| 305 | |
| 306 q_raThres = opt$q_raThres, | |
| 307 l_raThres = opt$l_raThres, | |
| 308 | |
| 309 q_pol = q_polarity, | |
| 310 l_pol = l_polarity, | |
| 311 | |
| 312 q_xcmsGroups = q_xcmsGroups, | |
| 313 l_xcmsGroups = l_xcmsGroups, | |
| 314 | |
| 315 q_pids = q_pids, | |
| 316 l_pids = l_pids, | |
| 317 | |
| 318 q_sources = q_sources, | |
| 319 l_sources = l_sources, | |
| 320 | |
| 321 q_instrumentTypes = q_instrumentTypes, | |
| 322 l_instrumentTypes = l_instrumentTypes, | |
| 323 | |
| 324 q_spectraFilter= q_spectraFilter, | |
| 325 l_spectraFilter= l_spectraFilter, | |
| 326 | |
| 327 l_rtrange=c(l_rtrangeMin, l_rtrangeMax), | |
| 328 q_rtrange=c(q_rtrangeMin, q_rtrangeMax), | |
| 329 | |
| 330 q_accessions = opt$q_accessions, | |
| 331 l_accessions= opt$l_accessions, | |
| 332 | |
| 333 raW = opt$raW, | |
| 334 mzW = opt$mzW, | |
| 335 rttol=opt$rttol, | |
| 336 cores=opt$cores, | |
| 337 | |
| 338 copyDb=copyDb, | |
| 339 updateDb=updateDb, | |
| 340 outPth = "db_with_spectral_matching.sqlite", | |
| 341 | |
| 342 q_dbPth = q_dbPth, | |
| 343 q_dbType = q_dbType, | |
| 344 q_dbName = q_dbName, | |
| 345 q_dbHost = q_dbHost, | |
| 346 q_dbUser = q_dbUser, | |
| 347 q_dbPass = q_dbPass, | |
| 348 q_dbPort = q_dbPort, | |
| 349 | |
| 350 l_dbPth = l_dbPth, | |
| 351 l_dbType = l_dbType, | |
| 352 l_dbName = l_dbName, | |
| 353 l_dbHost = l_dbHost, | |
| 354 l_dbUser = l_dbUser, | |
| 355 l_dbPass = l_dbPass, | |
| 356 l_dbPort = l_dbPort | |
| 357 | |
| 358 ) | |
| 359 | |
| 360 sm <- addQueryNameColumn(sm) | |
| 361 # Get name of the query results (and merged with the data frames) | |
| 362 write.table(sm$matchedResults, 'matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) | |
| 363 write.table(sm$xcmsMatchedResults, 'xcms_matched_results.tsv', sep = '\t', row.names = FALSE, col.names = TRUE) | |
| 364 | |
| 365 if(updateDb){ | |
| 366 updateDbF(q_con, l_con) | |
| 367 } | |
| 368 } |
