Mercurial > repos > workflow4metabolomics > camera_combinexsannos
comparison lib.r @ 0:139ff66b0b5d draft
planemo upload commit f69695e76674862ed9c77c1c127f459b4df42464
author | workflow4metabolomics |
---|---|
date | Fri, 26 Jul 2019 16:49:18 -0400 |
parents | |
children | ea15115a5b3f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:139ff66b0b5d |
---|---|
1 # lib.r | |
2 | |
3 #@author G. Le Corguille | |
4 # solve an issue with batch if arguments are logical TRUE/FALSE | |
5 parseCommandArgs <- function(...) { | |
6 args <- batch::parseCommandArgs(...) | |
7 for (key in names(args)) { | |
8 if (args[key] %in% c("TRUE","FALSE")) | |
9 args[key] = as.logical(args[key]) | |
10 } | |
11 return(args) | |
12 } | |
13 | |
14 #@author G. Le Corguille | |
15 # This function will | |
16 # - load the packages | |
17 # - display the sessionInfo | |
18 loadAndDisplayPackages <- function(pkgs) { | |
19 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
20 | |
21 sessioninfo = sessionInfo() | |
22 cat(sessioninfo$R.version$version.string,"\n") | |
23 cat("Main packages:\n") | |
24 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
25 cat("Other loaded packages:\n") | |
26 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
27 } | |
28 | |
29 # This function retrieve a xset like object | |
30 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
31 getxcmsSetObject <- function(xobject) { | |
32 # XCMS 1.x | |
33 if (class(xobject) == "xcmsSet") | |
34 return (xobject) | |
35 # XCMS 3.x | |
36 if (class(xobject) == "XCMSnExp") { | |
37 # Get the legacy xcmsSet object | |
38 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
39 if (is.null(xset@phenoData$sample_group)) | |
40 sampclass(xset) = "." | |
41 else | |
42 sampclass(xset) <- xset@phenoData$sample_group | |
43 if (!is.null(xset@phenoData$sample_name)) | |
44 rownames(xset@phenoData) = xset@phenoData$sample_name | |
45 return (xset) | |
46 } | |
47 } | |
48 | |
49 #@author G. Le Corguille | |
50 #The function create a pdf from the different png generated by diffreport | |
51 diffreport_png2pdf <- function(filebase) { | |
52 dir.create("pdf") | |
53 | |
54 pdfEicOutput = paste0("pdf/",filebase,"-eic_pdf.pdf") | |
55 pdfBoxOutput = paste0("pdf/",filebase,"-box_pdf.pdf") | |
56 | |
57 system(paste0("gm convert ",filebase,"_eic/*.png ",pdfEicOutput)) | |
58 system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput)) | |
59 | |
60 } | |
61 | |
62 #@author G. Le Corguille | |
63 #The function create a zip archive from the different png generated by diffreport | |
64 diffreport_png2zip <- function() { | |
65 zip("eic.zip", dir(pattern="_eic"), zip=Sys.which("zip")) | |
66 zip("box.zip", dir(pattern="_box"), zip=Sys.which("zip")) | |
67 } | |
68 | |
69 #The function create a zip archive from the different tabular generated by diffreport | |
70 diffreport_tabular2zip <- function() { | |
71 zip("tabular.zip", dir(pattern="tabular/*"), zip=Sys.which("zip")) | |
72 } | |
73 | |
74 #@author G. Le Corguille | |
75 #This function convert if it is required the Retention Time in minutes | |
76 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
77 if (convertRTMinute){ | |
78 #converting the retention times (seconds) into minutes | |
79 print("converting the retention times into minutes in the variableMetadata") | |
80 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 | |
81 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 | |
82 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 | |
83 } | |
84 return (variableMetadata) | |
85 } | |
86 | |
87 #@author G. Le Corguille | |
88 #This function format ions identifiers | |
89 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | |
90 splitDeco = strsplit(as.character(variableMetadata$name),"_") | |
91 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | |
92 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | |
93 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | |
94 return(variableMetadata) | |
95 } | |
96 | |
97 #The function annotateDiffreport without the corr function which bugs | |
98 annotatediff <- function(xset=xset, args=args, variableMetadataOutput="variableMetadata.tsv") { | |
99 # Resolve the bug with x11, with the function png | |
100 options(bitmapType='cairo') | |
101 | |
102 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | |
103 res=try(is.null(xset@filled)) | |
104 | |
105 # ------ annot ------- | |
106 args$calcCiS=as.logical(args$calcCiS) | |
107 args$calcIso=as.logical(args$calcIso) | |
108 args$calcCaS=as.logical(args$calcCaS) | |
109 | |
110 # common parameters | |
111 args4annotate = list(object=xset, | |
112 nSlaves=args$nSlaves,sigma=args$sigma,perfwhm=args$perfwhm, | |
113 maxcharge=args$maxcharge,maxiso=args$maxiso,minfrac=args$minfrac, | |
114 ppm=args$ppm,mzabs=args$mzabs,quick=args$quick, | |
115 polarity=args$polarity,max_peaks=args$max_peaks,intval=args$intval) | |
116 | |
117 # quick == FALSE | |
118 if(args$quick==FALSE) { | |
119 args4annotate = append(args4annotate, | |
120 list(graphMethod=args$graphMethod,cor_eic_th=args$cor_eic_th,pval=args$pval, | |
121 calcCiS=args$calcCiS,calcIso=args$calcIso,calcCaS=args$calcCaS)) | |
122 # no ruleset | |
123 if (!is.null(args$multiplier)) { | |
124 args4annotate = append(args4annotate, | |
125 list(multiplier=args$multiplier)) | |
126 } | |
127 # ruleset | |
128 else { | |
129 rulset=read.table(args$rules, h=T, sep=";") | |
130 if (ncol(rulset) < 4) rulset=read.table(args$rules, h=T, sep="\t") | |
131 if (ncol(rulset) < 4) rulset=read.table(args$rules, h=T, sep=",") | |
132 if (ncol(rulset) < 4) { | |
133 error_message="Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation" | |
134 print(error_message) | |
135 stop(error_message) | |
136 } | |
137 | |
138 args4annotate = append(args4annotate, | |
139 list(rules=rulset)) | |
140 } | |
141 } | |
142 | |
143 | |
144 # launch annotate | |
145 xa = do.call("annotate", args4annotate) | |
146 peakList=getPeaklist(xa,intval=args$intval) | |
147 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); | |
148 | |
149 # --- Multi condition : diffreport --- | |
150 diffrepOri=NULL | |
151 if (!is.null(args$runDiffreport) & nlevels(sampclass(xset))>=2) { | |
152 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | |
153 res=try(is.null(xset@filled)) | |
154 classes=levels(sampclass(xset)) | |
155 x=1:(length(classes)-1) | |
156 for (i in seq(along=x) ) { | |
157 y=1:(length(classes)) | |
158 for (n in seq(along=y)){ | |
159 if(i+n <= length(classes)){ | |
160 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-") | |
161 | |
162 diffrep=diffreport( | |
163 object=xset,class1=classes[i],class2=classes[i+n], | |
164 filebase=filebase,eicmax=args$eicmax,eicwidth=args$eicwidth, | |
165 sortpval=TRUE,value=args$value,h=args$h,w=args$w,mzdec=args$mzdec,missing=0) | |
166 | |
167 diffrepOri = diffrep | |
168 | |
169 # renamming of the column rtmed to rt to fit with camera peaklist function output | |
170 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt" | |
171 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz" | |
172 | |
173 # combines results and reorder columns | |
174 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F) | |
175 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) | |
176 | |
177 diffrep = RTSecondToMinute(diffrep, args$convertRTMinute) | |
178 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=args$numDigitsRT, numDigitsMZ=args$numDigitsMZ) | |
179 | |
180 if(args$sortpval){ | |
181 diffrep=diffrep[order(diffrep$pvalue), ] | |
182 } | |
183 | |
184 dir.create("tabular", showWarnings = FALSE) | |
185 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) | |
186 | |
187 if (args$eicmax != 0) { | |
188 if (args$png2 == "pdf") | |
189 diffreport_png2pdf(filebase) | |
190 } | |
191 } | |
192 } | |
193 } | |
194 if (args$png2 == "zip") | |
195 diffreport_png2zip() | |
196 if (args$tabular2 == "zip") | |
197 diffreport_tabular2zip() | |
198 } | |
199 | |
200 # --- variableMetadata --- | |
201 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] | |
202 variableMetadata = RTSecondToMinute(variableMetadata, args$convertRTMinute) | |
203 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=args$numDigitsRT, numDigitsMZ=args$numDigitsMZ) | |
204 # if we have 2 conditions, we keep stat of diffrep | |
205 if (!is.null(args$runDiffreport) & nlevels(sampclass(xset))==2) { | |
206 variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F) | |
207 if(exists("args[[\"sortpval\"]]")){ | |
208 variableMetadata=variableMetadata[order(variableMetadata$pvalue), ] | |
209 } | |
210 } | |
211 | |
212 variableMetadataOri=variableMetadata | |
213 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput) | |
214 | |
215 return(list("xa"=xa,"diffrep"=diffrepOri,"variableMetadata"=variableMetadataOri)); | |
216 | |
217 } | |
218 | |
219 | |
220 combinexsAnnos_function <- function(xaP, xaN, diffrepP=NULL,diffrepN=NULL, | |
221 pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, convertRTMinute=F, numDigitsMZ=0, | |
222 numDigitsRT=0, variableMetadataOutput="variableMetadata.tsv"){ | |
223 | |
224 #Load the two Rdata to extract the xset objects from positive and negative mode | |
225 cat("\tObject xset from positive mode\n") | |
226 print(xaP) | |
227 cat("\n") | |
228 | |
229 cat("\tObject xset from negative mode\n") | |
230 print(xaN) | |
231 cat("\n") | |
232 | |
233 cat("\n") | |
234 cat("\tCombining...\n") | |
235 #Convert the string to numeric for creating matrix | |
236 row=as.numeric(strsplit(ruleset,",")[[1]][1]) | |
237 column=as.numeric(strsplit(ruleset,",")[[1]][2]) | |
238 ruleset=cbind(row,column) | |
239 #Test if the file comes from an older version tool | |
240 if ((!is.null(xaP)) & (!is.null(xaN))) { | |
241 #Launch the combinexsannos function from CAMERA | |
242 cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset) | |
243 } else { | |
244 stop("You must relauch the CAMERA.annotate step with the lastest version.") | |
245 } | |
246 | |
247 if(pos){ | |
248 xa=xaP | |
249 mode="neg. Mode" | |
250 } else { | |
251 xa=xaN | |
252 mode="pos. Mode" | |
253 } | |
254 | |
255 peakList=getPeaklist(xa) | |
256 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); | |
257 variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]); | |
258 variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))] | |
259 | |
260 #Test if there are more than two classes (conditions) | |
261 if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) { | |
262 diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="") | |
263 diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="") | |
264 | |
265 variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name") | |
266 variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name") | |
267 } | |
268 | |
269 rownames(variableMetadata) = NULL | |
270 #TODO: checker | |
271 #colnames(variableMetadata)[1:2] = c("name","mz/rt"); | |
272 | |
273 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) | |
274 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
275 | |
276 #If the user want to keep only the metabolites which match a difference | |
277 if(keep_meta){ | |
278 variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",] | |
279 } | |
280 | |
281 #Write the output into a tsv file | |
282 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput) | |
283 return(variableMetadata); | |
284 | |
285 } | |
286 | |
287 # This function get the raw file path from the arguments | |
288 getRawfilePathFromArguments <- function(singlefile, zipfile, args) { | |
289 if (!is.null(args$zipfile)) zipfile = args$zipfile | |
290 if (!is.null(args$zipfilePositive)) zipfile = args$zipfilePositive | |
291 if (!is.null(args$zipfileNegative)) zipfile = args$zipfileNegative | |
292 | |
293 if (!is.null(args$singlefile_galaxyPath)) { | |
294 singlefile_galaxyPaths = args$singlefile_galaxyPath; | |
295 singlefile_sampleNames = args$singlefile_sampleName | |
296 } | |
297 if (!is.null(args$singlefile_galaxyPathPositive)) { | |
298 singlefile_galaxyPaths = args$singlefile_galaxyPathPositive; | |
299 singlefile_sampleNames = args$singlefile_sampleNamePositive | |
300 } | |
301 if (!is.null(args$singlefile_galaxyPathNegative)) { | |
302 singlefile_galaxyPaths = args$singlefile_galaxyPathNegative; | |
303 singlefile_sampleNames = args$singlefile_sampleNameNegative | |
304 } | |
305 if (exists("singlefile_galaxyPaths")){ | |
306 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) | |
307 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) | |
308 | |
309 singlefile=NULL | |
310 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
311 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
312 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] | |
313 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath | |
314 } | |
315 } | |
316 for (argument in c("zipfile", "zipfilePositive", "zipfileNegative", | |
317 "singlefile_galaxyPath", "singlefile_sampleName", | |
318 "singlefile_galaxyPathPositive", "singlefile_sampleNamePositive", | |
319 "singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { | |
320 args[[argument]]=NULL | |
321 } | |
322 return(list(zipfile=zipfile, singlefile=singlefile, args=args)) | |
323 } | |
324 | |
325 | |
326 # This function retrieve the raw file in the working directory | |
327 # - if zipfile: unzip the file with its directory tree | |
328 # - if singlefiles: set symlink with the good filename | |
329 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | |
330 if(!is.null(singlefile) && (length("singlefile")>0)) { | |
331 for (singlefile_sampleName in names(singlefile)) { | |
332 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] | |
333 if(!file.exists(singlefile_galaxyPath)){ | |
334 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | |
335 print(error_message); stop(error_message) | |
336 } | |
337 | |
338 file.symlink(singlefile_galaxyPath,singlefile_sampleName) | |
339 } | |
340 directory = "." | |
341 | |
342 } | |
343 if(!is.null(zipfile) && (zipfile!="")) { | |
344 if(!file.exists(zipfile)){ | |
345 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | |
346 print(error_message) | |
347 stop(error_message) | |
348 } | |
349 | |
350 #list all file in the zip file | |
351 #zip_files=unzip(zipfile,list=T)[,"Name"] | |
352 | |
353 #unzip | |
354 suppressWarnings(unzip(zipfile, unzip="unzip")) | |
355 | |
356 #get the directory name | |
357 filesInZip=unzip(zipfile, list=T); | |
358 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); | |
359 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
360 directory = "." | |
361 if (length(directories) == 1) directory = directories | |
362 | |
363 cat("files_root_directory\t",directory,"\n") | |
364 | |
365 } | |
366 return (directory) | |
367 } | |
368 | |
369 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
370 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524 | |
371 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd | |
372 ############################################################ | |
373 ## getEIC | |
374 getEIC <- function(object, mzrange, rtrange = 200, | |
375 groupidx, sampleidx = sampnames(object), | |
376 rt = c("corrected", "raw")) { | |
377 | |
378 files <- filepaths(object) | |
379 grp <- groups(object) | |
380 samp <- sampnames(object) | |
381 prof <- profinfo(object) | |
382 | |
383 rt <- match.arg(rt) | |
384 | |
385 if (is.numeric(sampleidx)) | |
386 sampleidx <- sampnames(object)[sampleidx] | |
387 sampidx <- match(sampleidx, sampnames(object)) | |
388 | |
389 if (!missing(groupidx)) { | |
390 if (is.numeric(groupidx)) | |
391 groupidx <- groupnames(object)[unique(as.integer(groupidx))] | |
392 grpidx <- match(groupidx, groupnames(object, template = groupidx)) | |
393 } | |
394 | |
395 if (missing(mzrange)) { | |
396 if (missing(groupidx)) | |
397 stop("No m/z range or groups specified") | |
398 if (any(is.na(groupval(object, value = "mz")))) | |
399 warning( | |
400 "`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | |
401 "-in missing peak values. Note however that this will also ", | |
402 "insert intensities of 0 for peaks that can not be filled in.") | |
403 mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE) | |
404 mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE) | |
405 mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2) | |
406 ## if (any(is.na(groupval(object, value = "mz")))) | |
407 ## stop('Please use fillPeaks() to fill up NA values !') | |
408 ## mzmin <- -rowMax(-groupval(object, value = "mzmin")) | |
409 ## mzmax <- rowMax(groupval(object, value = "mzmax")) | |
410 ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2) | |
411 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) | |
412 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] | |
413 else if (is.null(dim(mzrange))) | |
414 stop("mzrange must be a matrix") | |
415 colnames(mzrange) <- c("mzmin", "mzmax") | |
416 | |
417 if (length(rtrange) == 1) { | |
418 if (missing(groupidx)) | |
419 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), | |
420 ncol = 2, byrow = TRUE) | |
421 else { | |
422 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) | |
423 } | |
424 } else if (is.null(dim(rtrange))) | |
425 stop("rtrange must be a matrix or single number") | |
426 colnames(rtrange) <- c("rtmin", "rtmax") | |
427 | |
428 ## Ensure that we've got corrected retention time if requested. | |
429 if (is.null(object@rt[[rt]])) | |
430 stop(rt, " retention times not present in 'object'!") | |
431 | |
432 ## Ensure that the defined retention time range is within the rtrange of the | |
433 ## object: we're using the max minimal rt of all files and the min maximal rt | |
434 rtrs <- lapply(object@rt[[rt]], range) | |
435 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), | |
436 min(unlist(lapply(rtrs, "[", 2)))) | |
437 ## Check if we've got a range which is completely off: | |
438 if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) { | |
439 outs <- which(rtrange[, "rtmin"] >= rtr[2] | | |
440 rtrange[, "rtmax"] <= rtr[1]) | |
441 stop(length(outs), " of the specified 'rtrange' are completely outside ", | |
442 "of the retention time range of 'object' which is (", rtr[1], ", ", | |
443 rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ", | |
444 rtrange[outs[1], "rtmax"], "!") | |
445 } | |
446 lower_rt_outside <- rtrange[, "rtmin"] < rtr[1] | |
447 upper_rt_outside <- rtrange[, "rtmax"] > rtr[2] | |
448 if (any(lower_rt_outside) | any(upper_rt_outside)) { | |
449 ## Silently fix these ranges. | |
450 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] | |
451 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] | |
452 } | |
453 | |
454 if (missing(groupidx)) | |
455 gnames <- character(0) | |
456 else | |
457 gnames <- groupidx | |
458 | |
459 eic <- vector("list", length(sampleidx)) | |
460 names(eic) <- sampleidx | |
461 | |
462 for (i in seq(along = sampidx)) { | |
463 | |
464 ## cat(sampleidx[i], "") | |
465 flush.console() | |
466 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other | |
467 ## stuff. | |
468 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) | |
469 currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step) | |
470 eic[[i]] <- currenteic@eic[[1]] | |
471 rm(lcraw) | |
472 gc() | |
473 } | |
474 ## cat("\n") | |
475 | |
476 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, | |
477 rt = rt, groupnames = gnames)) | |
478 } | |
479 | |
480 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
481 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524 | |
482 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd | |
483 ############################################################ | |
484 ## diffreport | |
485 diffreport = function(object, | |
486 class1 = levels(sampclass(object))[1], | |
487 class2 = levels(sampclass(object))[2], | |
488 filebase = character(), | |
489 eicmax = 0, eicwidth = 200, | |
490 sortpval = TRUE, | |
491 classeic = c(class1,class2), | |
492 value = c("into","maxo","intb"), | |
493 metlin = FALSE, | |
494 h = 480, w = 640, mzdec=2, | |
495 missing = numeric(), ...) { | |
496 | |
497 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { | |
498 stop("No group information. Use group().") | |
499 } | |
500 | |
501 if (!is.numeric(w) || !is.numeric(h)) | |
502 stop("'h' and 'w' have to be numeric") | |
503 ## require(multtest) || stop("Couldn't load multtest") | |
504 | |
505 value <- match.arg(value) | |
506 groupmat <- groups(object) | |
507 if (length(groupmat) == 0) | |
508 stop("No group information found") | |
509 samples <- sampnames(object) | |
510 n <- length(samples) | |
511 classlabel <- sampclass(object) | |
512 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] | |
513 | |
514 values <- groupval(object, "medret", value=value) | |
515 indecies <- groupval(object, "medret", value = "index") | |
516 | |
517 if (!all(c(class1,class2) %in% classlabel)) | |
518 stop("Incorrect Class Labels") | |
519 | |
520 ## c1 and c2 are column indices of class1 and class2 resp. | |
521 c1 <- which(classlabel %in% class1) | |
522 c2 <- which(classlabel %in% class2) | |
523 ceic <- which(classlabel %in% classeic) | |
524 if (length(intersect(c1, c2)) > 0) | |
525 stop("Intersecting Classes") | |
526 | |
527 ## Optionally replace NA values with the value provided with missing | |
528 if (length(missing)) { | |
529 if (is.numeric(missing)) { | |
530 ## handles NA, Inf and -Inf | |
531 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] | |
532 } else | |
533 stop("'missing' should be numeric") | |
534 } | |
535 ## Check against missing Values | |
536 if (any(is.na(values[, c(c1, c2)]))) | |
537 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | |
538 "-in missing peak values. Note however that this will also ", | |
539 "insert intensities of 0 for peaks that can not be filled in.") | |
540 | |
541 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) | |
542 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) | |
543 | |
544 ## Calculate fold change. | |
545 ## For foldchange <1 set fold to 1/fold | |
546 ## See tstat to check which was higher | |
547 fold <- mean2 / mean1 | |
548 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] | |
549 | |
550 testval <- values[,c(c1,c2)] | |
551 ## Replace eventual infinite values with NA (CAMERA issue #33) | |
552 testval[is.infinite(testval)] <- NA | |
553 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) | |
554 | |
555 if (min(length(c1), length(c2)) >= 2) { | |
556 tstat <- mt.teststat(testval, testclab, ...) | |
557 pvalue <- xcms:::pval(testval, testclab, tstat) | |
558 } else { | |
559 message("Too few samples per class, skipping t-test.") | |
560 tstat <- pvalue <- rep(NA,nrow(testval)) | |
561 } | |
562 stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue) | |
563 if (length(levels(sampclass(object))) >2) { | |
564 pvalAnova<-c() | |
565 for(i in 1:nrow(values)){ | |
566 var<-as.numeric(values[i,]) | |
567 ano<-summary(aov(var ~ sampclass(object)) ) | |
568 pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"]) | |
569 } | |
570 stat<-cbind(stat, anova= pvalAnova) | |
571 } | |
572 if (metlin) { | |
573 neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1) | |
574 metlin <- abs(metlin) | |
575 digits <- ceiling(-log10(metlin))+1 | |
576 metlinurl <- | |
577 paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=", | |
578 round(neutralmass - metlin, digits), "&mass_max=", | |
579 round(neutralmass + metlin, digits), sep="") | |
580 values <- cbind(metlin = metlinurl, values) | |
581 } | |
582 twosamp <- cbind(name = groupnames(object), stat, groupmat, values) | |
583 if (sortpval) { | |
584 tsidx <- order(twosamp[,"pvalue"]) | |
585 twosamp <- twosamp[tsidx,] | |
586 rownames(twosamp) <- 1:nrow(twosamp) | |
587 values<-values[tsidx,] | |
588 } else | |
589 tsidx <- 1:nrow(values) | |
590 | |
591 if (length(filebase)) | |
592 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) | |
593 | |
594 if (eicmax > 0) { | |
595 if (length(unique(peaks(object)[,"rt"])) > 1) { | |
596 ## This looks like "normal" LC data | |
597 | |
598 eicmax <- min(eicmax, length(tsidx)) | |
599 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, | |
600 groupidx = tsidx[seq(length = eicmax)]) | |
601 | |
602 if (length(filebase)) { | |
603 eicdir <- paste(filebase, "_eic", sep="") | |
604 boxdir <- paste(filebase, "_box", sep="") | |
605 dir.create(eicdir) | |
606 dir.create(boxdir) | |
607 if (capabilities("png")){ | |
608 xcms:::xcmsBoxPlot(values[seq(length = eicmax),], | |
609 sampclass(object), dirpath=boxdir, pic="png", width=w, height=h) | |
610 png(file.path(eicdir, "%003d.png"), width = w, height = h) | |
611 } else { | |
612 xcms:::xcmsBoxPlot(values[seq(length = eicmax),], | |
613 sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h) | |
614 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | |
615 height = h/72, onefile = FALSE) | |
616 } | |
617 } | |
618 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) | |
619 | |
620 if (length(filebase)) | |
621 dev.off() | |
622 } else { | |
623 ## This looks like a direct-infusion single spectrum | |
624 if (length(filebase)) { | |
625 eicdir <- paste(filebase, "_eic", sep="") | |
626 boxdir <- paste(filebase, "_box", sep="") | |
627 dir.create(eicdir) | |
628 dir.create(boxdir) | |
629 if (capabilities("png")){ | |
630 xcmsBoxPlot(values[seq(length = eicmax),], | |
631 sampclass(object), dirpath=boxdir, pic="png", | |
632 width=w, height=h) | |
633 png(file.path(eicdir, "%003d.png"), width = w, height = h, | |
634 units = "px") | |
635 } else { | |
636 xcmsBoxPlot(values[seq(length = eicmax),], | |
637 sampclass(object), dirpath=boxdir, pic="pdf", | |
638 width=w, height=h) | |
639 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | |
640 height = h/72, onefile = FALSE) | |
641 } | |
642 } | |
643 | |
644 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) | |
645 | |
646 if (length(filebase)) | |
647 dev.off() | |
648 } | |
649 } | |
650 | |
651 invisible(twosamp) | |
652 } |