comparison NmrBucketing_script.R @ 9:62c62e31fc80 draft

planemo upload for repository https://github.com/workflow4metabolomics/nmr_bucketing commit fc2be0f9fa66f830d592d74d14b47a63e761647b
author lecorguille
date Fri, 21 Apr 2017 08:53:40 -0400
parents
children
comparison
equal deleted inserted replaced
8:c54c70af216b 9:62c62e31fc80
1 ################################################################################################
2 # SPECTRA BUCKETING AND INTEGRATION FROM RAW BRUKER FILES #
3 # User : Galaxy #
4 # Original data : -- #
5 # Starting date : 20-10-2014 #
6 # Version 1 : 18-12-2014 #
7 # Version 2 : 07-01-2015 #
8 # Version 3 : 24-10-2016 #
9 # #
10 # Input files : modification on october 2016 #
11 # - Raw bruker files included in user-defined fileName #
12 # - Preprocessed files (alignment, ...) included in p x n dataframe #
13 ################################################################################################
14 NmrBucketing <- function(fileType,fileName,leftBorder = 10.0,rightBorder = 0.5,bucketSize = 0.04,exclusionZones,
15 exclusionZonesBorders=NULL,graph=c("None","Overlay","One_per_individual"),
16 nomFichier,savLog.txtC = NULL)
17 {
18 ## Option
19 ##---------------
20 strAsFacL <- options()$stringsAsFactors
21 options(stingsAsFactors = FALSE)
22 options(warn = -1)
23
24
25 ## Constants
26 ##---------------
27 topEnvC <- environment()
28 flgC <- "\n"
29
30 ## Log file (in case of integration into Galaxy)
31 ##----------------------------------------------
32 if(!is.null(savLog.txtC))
33 sink(savLog.txtC, append = TRUE)
34
35 ## Functions definition
36 ##---------------------
37 ## RAW BRUKER FILE READING FUNCTION
38 NmRBrucker_read <- function(DataDir,SampleSpectrum)
39 {
40
41 bruker.get_param <- function (ACQ,paramStr)
42 {
43 regexpStr <- paste("^...",paramStr,"=",sep="")
44 as.numeric(gsub("^[^=]+= ","" ,ACQ[which(simplify2array(regexpr(regexpStr,ACQ))>0)]))
45 }
46
47 ACQFILE <- "acqus"
48 SPECFILE <- paste(DataDir,"/1r",sep="")
49 PROCFILE <- paste(DataDir,"/procs",sep="")
50
51 ACQ <- readLines(ACQFILE)
52 TD <- bruker.get_param(ACQ,"TD")
53 SW <- bruker.get_param(ACQ,"SW")
54 SWH <- bruker.get_param(ACQ,"SW_h")
55 DTYPA <- bruker.get_param(ACQ,"DTYPA")
56 BYTORDA <- bruker.get_param(ACQ,"BYTORDA")
57 #ENDIAN = ifelse( BYTORDA==0, "little", "big")
58 ENDIAN <- "little"
59 SIZE = ifelse( DTYPA==0, 4, 8)
60
61 PROC <- readLines(PROCFILE)
62 OFFSET <- bruker.get_param(PROC,"OFFSET")
63 SI <- bruker.get_param(PROC,"SI")
64
65 to.read = file(SPECFILE,"rb")
66 maxTDSI = max(TD,SI)
67 # signal<-rev(readBin(to.read, what="int",size=SIZE, n=TD, signed = TRUE, endian = ENDIAN))
68 signal<-rev(readBin(to.read, what="int",size=SIZE, n=maxTDSI, signed = TRUE, endian = ENDIAN))
69 close(to.read)
70
71 td <- length(signal)
72
73 # dppm <- SW/(TD-1)
74 dppm <- SW/(td-1)
75 pmax <- OFFSET
76 pmin <- OFFSET - SW
77 ppmseq <- seq(from=pmin, to=pmax, by=dppm)
78 signal <- 100*signal/max(signal)
79
80 SampleSpectrum <- cbind(ppmseq,signal)
81 return(SampleSpectrum)
82 }
83
84 ## SPECTRUM BUCKETING
85 NmrBrucker_bucket <- function(spectrum)
86 {
87 # Initialisations
88 b <- 1
89 j <- 1
90 # Variable number
91 J <- round((spectrum[1,1]-spectrum[dim(spectrum)[1],1])/bucketSize)
92 f.bucket <- matrix(rep(0,J*2),ncol=2)
93 colnames(f.bucket) <- c("Bucket",FileNames[i])
94
95
96 # Data bucketing
97 while (j < dim(spectrum)[1])
98 {
99 # chemical shift
100 BUB <- spectrum[j,1]
101
102 # In zone exclusion?
103 exclusion.in <- FALSE
104 if (!is.null(exclusionZonesBorders))
105 {
106 for (k in 1:nrow(exclusion.zone.m))
107 if (BUB <= exclusion.zone.m[k,1] && exclusion.zone.m[k,2] < BUB)
108 exclusion.in <- TRUE
109 }
110
111 if (exclusion.in)
112 {
113 # Bucketing
114 {
115 BLB <- BUB - bucketSize
116 bucket <- spectrum[j,]
117 while (j < dim(spectrum)[1] && spectrum[j,1] > BLB)
118 {
119 j <- j + 1
120 if (spectrum[j,1] > BLB)
121 bucket <- rbind(bucket,spectrum[j,])
122 }
123
124 f.bucket[b,] <- c(round(mean(bucket[,1]),3),0)
125
126 # Next bucket boundary
127 BUB <- spectrum[j,1]
128 b <- b + 1
129 }
130 }
131
132 if (!exclusion.in)
133 # Bucketing
134 {
135 BLB <- BUB - bucketSize
136 bucket <- spectrum[j,]
137 while (j < dim(spectrum)[1] && spectrum[j,1] > BLB)
138 {
139 j <- j + 1
140 if (spectrum[j,1] > BLB)
141 bucket <- rbind(bucket,spectrum[j,])
142 }
143
144 # Integration (trapezoid method)
145 s <- cumtrapz(bucket[,1],bucket[,2])
146 f.bucket[b,] <- c(round(mean(bucket[,1]),3),abs(s[length(s)][[1]]))
147
148 # Next bucket boundary
149 BUB <- spectrum[j,1]
150 b <- b + 1
151 }
152 }
153 return(f.bucket)
154 }
155
156 # Exclusion zones
157 if (!is.null(exclusionZonesBorders))
158 {
159 exclusion.zone.m <- matrix(exclusionZonesBorders[[1]],nrow=1)
160 if (length(exclusionZonesBorders) > 1)
161 for (k in 2:length(exclusionZonesBorders))
162 exclusion.zone.m <- rbind(exclusion.zone.m,exclusionZonesBorders[[k]])
163 }
164
165 ## CHANGES
166 ## Inputs from zip or library (raw files)
167 if (fileType == "zip")
168 {
169 # File names
170 FileNames <- list.files(fileName)
171 n <- length(FileNames)
172
173 # Reading and Bucketing
174 fileName <- paste(fileName,"/",sep="")
175
176 i <- 1
177 while (i <= n)
178 {
179 # File reading
180 SampleDir <- paste(fileName,FileNames[i],"/1/",sep="")
181 setwd(SampleDir)
182 DataDir <- "pdata/1"
183
184 rawSpectrum <- NmRBrucker_read(DataDir,rawSpectrum)
185
186 orderedSpectrum <- rawSpectrum[order(rawSpectrum[,1],decreasing=T), ]
187
188 # Removal of chemical shifts > leftBorder or < rightBorder boundaries
189 truncatedSpectrum <- orderedSpectrum[orderedSpectrum[,1] < leftBorder & orderedSpectrum[,1] > rightBorder, ]
190 truncatedSpectrum[,1] <- round(truncatedSpectrum[,1],3)
191
192 # Bucketing
193 spectrum.bucket <- NmrBrucker_bucket(truncatedSpectrum)
194 ppm <- spectrum.bucket[,1]
195
196 # spectrum Concatenation
197 if (i == 1)
198 bucketedSpectra <- spectrum.bucket
199 if (i > 1)
200 bucketedSpectra <- cbind(bucketedSpectra,spectrum.bucket[,2])
201 colnames(bucketedSpectra)[i+1] <- FileNames[i]
202
203 # Next sample
204 rm(spectrum.bucket)
205 i <- i +1
206 }
207 # Directory
208 cd(fileName)
209 }
210
211 ## Inputs from dataset (preprocessed files)
212 if (fileType=="tsv")
213 {
214 FileNames <- colnames(fileName)
215 n <- length(FileNames)
216
217 for (i in 1:ncol(fileName))
218 {
219 orderedSpectrum <- cbind(as.numeric(rownames(fileName)),fileName[,i])
220 orderedSpectrum <- orderedSpectrum[order(orderedSpectrum[,1],decreasing=T), ]
221
222 truncatedSpectrum <- orderedSpectrum[orderedSpectrum[,1] < leftBorder & orderedSpectrum[,1] > rightBorder, ]
223 truncatedSpectrum[,1] <- round(truncatedSpectrum[,1],3)
224
225 # Bucketing
226 spectrum.bucket <- NmrBrucker_bucket(truncatedSpectrum)
227 ppm <- spectrum.bucket[,1]
228
229 # spectrum Concatenation
230 if (i == 1)
231 bucketedSpectra <- spectrum.bucket
232 if (i > 1)
233 bucketedSpectra <- cbind(bucketedSpectra,spectrum.bucket[,2])
234 colnames(bucketedSpectra)[i+1] <- colnames(fileName)[i]
235 }
236 }
237
238 identifiants <- gsub("([- , * { } | \\[ ])","_",colnames(bucketedSpectra)[-1])
239 colnames(bucketedSpectra) <- c(colnames(bucketedSpectra)[1],identifiants)
240
241 bucketedSpectra <- bucketedSpectra[bucketedSpectra[,1]!=0,]
242 rownames(bucketedSpectra) <- paste("B",bucketedSpectra[,1],sep="")
243 bucketedSpectra <- bucketedSpectra[,-1]
244
245 # Metadata matrice outputs
246 sampleMetadata <- data.frame(1:n)
247 rownames(sampleMetadata) <- colnames(bucketedSpectra)
248 colnames(sampleMetadata) <- "SampleOrder"
249
250 variableMetadata <- data.frame(1:nrow(bucketedSpectra))
251 rownames(variableMetadata) <- rownames(bucketedSpectra)
252 colnames(variableMetadata) <- "VariableOrder"
253
254
255 return(list(bucketedSpectra,sampleMetadata,variableMetadata,ppm)) # ,truncatedSpectrum_matrice
256 }
257
258
259 #################################################################################################################
260 ## Typical function call
261 #################################################################################################################
262 ## StudyDir <- "K:/PROJETS/Metabohub/Bruker/Tlse_BPASourisCerveau/"
263 ## upper <- 9.5
264 ## lower <- 0.8
265 ## bucket.width <- 0.01
266 ## exclusion <- TRUE
267 ## exclusion.zone <- list(c(5.1,4.5))
268 ## graphique <- "Overlay"
269 ## nomFichier <- "Tlse_BPASourisCerveau_NmrBucketing_graph.pdf"
270 ## tlse_cerveaupnd21.bucket <- NmrBucketing(StudyDir,upper,lower,bucket.width,exclusion,exclusion.zone,graphique,nomFichier)
271 ## write.table(tlse_cerveaupnd21.bucket,file=paste(StudyDir,"Tlse_BPASourisCerveau_NmrBucketing_dataMatrix.tsv",sep=""),
272 ## quote=FALSE,row.nmaes=FALSE,sep="\t")
273 #################################################################################################################