comparison NmrBucketing_script.R @ 0:a99a6026c972 draft

planemo upload for repository https://github.com/workflow4metabolomics/nmr_bucketing commit 44fd4fc42930d2e9ad7b77cf575f2231547de15c
author marie-tremblay-metatoul
date Fri, 08 Apr 2016 10:56:14 -0400
parents
children 301d7adf862e
comparison
equal deleted inserted replaced
-1:000000000000 0:a99a6026c972
1 ################################################################################################
2 # SPECTRA BUCKETING AND INTEGRATION FROM RAW BRUKER FILES #
3 # User : Galaxy #
4 # Original data : -- #
5 # Starting date : 20-10-2014 #
6 # Version 1 : 18-12-2014 #
7 # Version 2 : 07-01-2015 #
8 # #
9 # Input files : files in included in user-defined directory #
10 ################################################################################################
11 NmrBucketing <- function(directory,leftBorder = 10.0,rightBorder = 0.5,bucketSize = 0.04,exclusionZones,exclusionZonesBorders=NULL,
12 graph=c("None","Overlay","One_per_individual"),nomFichier,savLog.txtC = NULL)
13 {
14 ## Option
15 ##---------------
16 strAsFacL <- options()$stringsAsFactors
17 options(stingsAsFactors = FALSE)
18 options(warn = -1)
19
20
21 ## Constants
22 ##---------------
23 topEnvC <- environment()
24 flgC <- "\n"
25
26 ## Log file (in case of integration into Galaxy)
27 ##----------------------------------------------
28 if(!is.null(savLog.txtC))
29 sink(savLog.txtC, append = TRUE)
30
31 ## Functions definition
32 ##---------------------
33 ## RAW BRUKER FILE READING FUNCTION
34 NmRBrucker_read <- function(DataDir,SampleSpectrum)
35 {
36
37 bruker.get_param <- function (ACQ,paramStr)
38 {
39 regexpStr <- paste("^...",paramStr,"=",sep="")
40 as.numeric(gsub("^[^=]+= ","" ,ACQ[which(simplify2array(regexpr(regexpStr,ACQ))>0)]))
41 }
42
43 ACQFILE <- "acqus"
44 SPECFILE <- paste(DataDir,"/1r",sep="")
45 PROCFILE <- paste(DataDir,"/procs",sep="")
46
47 ACQ <- readLines(ACQFILE)
48 TD <- bruker.get_param(ACQ,"TD")
49 SW <- bruker.get_param(ACQ,"SW")
50 SWH <- bruker.get_param(ACQ,"SW_h")
51 DTYPA <- bruker.get_param(ACQ,"DTYPA")
52 BYTORDA <- bruker.get_param(ACQ,"BYTORDA")
53 #ENDIAN = ifelse( BYTORDA==0, "little", "big")
54 ENDIAN <- "little"
55 SIZE = ifelse( DTYPA==0, 4, 8)
56
57 PROC <- readLines(PROCFILE)
58 OFFSET <- bruker.get_param(PROC,"OFFSET")
59 SI <- bruker.get_param(PROC,"SI")
60
61 to.read = file(SPECFILE,"rb")
62 maxTDSI = max(TD,SI)
63 # signal<-rev(readBin(to.read, what="int",size=SIZE, n=TD, signed = TRUE, endian = ENDIAN))
64 signal<-rev(readBin(to.read, what="int",size=SIZE, n=maxTDSI, signed = TRUE, endian = ENDIAN))
65 close(to.read)
66
67 td <- length(signal)
68
69 # dppm <- SW/(TD-1)
70 dppm <- SW/(td-1)
71 pmax <- OFFSET
72 pmin <- OFFSET - SW
73 ppmseq <- seq(from=pmin, to=pmax, by=dppm)
74 signal <- 100*signal/max(signal)
75
76 SampleSpectrum <- cbind(ppmseq,signal)
77 return(SampleSpectrum)
78 }
79
80 ## SPECTRUM BUCKETING
81 NmrBrucker_bucket <- function(spectrum)
82 {
83 # Initialisations
84 b <- 1
85 j <- 1
86 # Variable number
87 J <- round((spectrum[1,1]-spectrum[dim(spectrum)[1],1])/bucketSize)
88 f.bucket <- matrix(rep(0,J*2),ncol=2)
89 colnames(f.bucket) <- c("Bucket",FileNames[i])
90
91
92 # Data bucketing
93 while (j < dim(spectrum)[1])
94 {
95 # chemical shift
96 BUB <- spectrum[j,1]
97
98 # In zone exclusion?
99 exclusion.in <- FALSE
100 if (!is.null(exclusionZonesBorders))
101 {
102 for (k in 1:nrow(exclusion.zone.m))
103 if (BUB <= exclusion.zone.m[k,1] && exclusion.zone.m[k,2] < BUB)
104 exclusion.in <- TRUE
105 }
106
107 if (exclusion.in)
108 j <- j + 1
109
110 if (!exclusion.in)
111 # Bucketing
112 {
113 BLB <- BUB - bucketSize
114 bucket <- spectrum[j,]
115 while (j < dim(spectrum)[1] && spectrum[j,1] > BLB)
116 {
117 j <- j + 1
118 if (spectrum[j,1] > BLB)
119 bucket <- rbind(bucket,spectrum[j,])
120 }
121
122 # Integration (trapezoid method)
123 s <- cumtrapz(bucket[,1],bucket[,2])
124 f.bucket[b,] <- c(round(mean(bucket[,1]),3),abs(s[length(s)][[1]]))
125
126 # Next bucket boundary
127 BUB <- spectrum[j,1]
128 b <- b + 1
129 }
130 }
131 return(f.bucket)
132 }
133
134
135 # File names
136 FileNames <- list.files(directory)
137 n <- length(FileNames)
138
139 # Exclusion zones
140 ## if (exclusionZones == "yes")
141 if (!is.null(exclusionZonesBorders))
142 {
143 exclusion.zone.m <- matrix(exclusionZonesBorders[[1]],nrow=1)
144 if (length(exclusionZonesBorders) > 1)
145 for (k in 2:length(exclusionZonesBorders))
146 exclusion.zone.m <- rbind(exclusion.zone.m,exclusionZonesBorders[[k]])
147 }
148
149 # Reading and Bucketing
150 directory <- paste(directory,"/",sep="")
151
152 i <- 1
153 while (i <= n)
154 {
155 # File reading
156 SampleDir <- paste(directory,FileNames[i],"/1/",sep="")
157 setwd(SampleDir)
158 DataDir <- "pdata/1"
159
160 rawSpectrum <- NmRBrucker_read(DataDir,rawSpectrum)
161
162 orderedSpectrum <- rawSpectrum[order(rawSpectrum[,1],decreasing=T), ]
163
164 # Removal of chemical shifts > leftBorder or < rightBorder boundaries
165 truncatedSpectrum <- orderedSpectrum[orderedSpectrum[,1] < leftBorder & orderedSpectrum[,1] > rightBorder, ]
166 truncatedSpectrum[,1] <- round(truncatedSpectrum[,1],3)
167
168 # Bucketing
169 spectrum.bucket <- NmrBrucker_bucket(truncatedSpectrum)
170
171 # spectrum Concatenation
172 if (i == 1)
173 bucketedSpectra <- spectrum.bucket
174 if (i > 1)
175 bucketedSpectra <- cbind(bucketedSpectra,spectrum.bucket[,2])
176 colnames(bucketedSpectra)[i+1] <- FileNames[i]
177
178 # Next sample
179 rm(spectrum.bucket)
180 i <- i +1
181 }
182 identifiants <- gsub("([- , * { } | \\[ ])","_",colnames(bucketedSpectra)[-1])
183 colnames(bucketedSpectra) <- c(colnames(bucketedSpectra)[1],identifiants)
184
185 bucketedSpectra <- bucketedSpectra[bucketedSpectra[,1]!=0,]
186 rownames(bucketedSpectra) <- paste("B",bucketedSpectra[,1],sep="")
187 bucketedSpectra <- bucketedSpectra[,-1]
188
189 # Metadata matrice outputs
190 sampleMetadata <- data.frame(1:n)
191 rownames(sampleMetadata) <- colnames(bucketedSpectra)
192 colnames(sampleMetadata) <- "SampleOrder"
193
194 variableMetadata <- data.frame(1:nrow(bucketedSpectra))
195 rownames(variableMetadata) <- rownames(bucketedSpectra)
196 colnames(variableMetadata) <- "VariableOrder"
197
198 # Directory
199 cd(directory)
200
201 # Bucketed spectra graph
202 if (graph != "None")
203 {
204 # Graphic Device opening
205 pdf(nomFichier,onefile=TRUE)
206
207 if (graph == "Overlay")
208 {
209 ymax <- max(bucketedSpectra)
210 plot(1:length(bucketedSpectra[,1]),bucketedSpectra[,1],ylim=c(0,ymax),type='l',col=1,xlab="Chemical shift",ylab="Intensity")
211 for (i in 2:ncol(bucketedSpectra))
212 {
213 spectre <- bucketedSpectra[,i]
214 lines(spectre,col=i)
215 }
216 # legend(0,ymax,lty=c(1,1),legend=colnames(bucketedSpectra),col=1:ncol(bucketedSpectra))
217 }
218 else
219 {
220 for (i in 1:ncol(bucketedSpectra))
221 {
222 plot(1:length(bucketedSpectra[,i]),bucketedSpectra[,i],type='l',col=1,xlab="Chemical shift",ylab="Intensity")
223 }
224 }
225 dev.off()
226 }
227 return(list(bucketedSpectra,sampleMetadata,variableMetadata)) # ,truncatedSpectrum_matrice
228 }
229
230
231 #################################################################################################################
232 ## Typical function call
233 #################################################################################################################
234 ## StudyDir <- "K:/PROJETS/Metabohub/Bruker/Tlse_BPASourisCerveau/"
235 ## upper <- 9.5
236 ## lower <- 0.8
237 ## bucket.width <- 0.01
238 ## exclusion <- TRUE
239 ## exclusion.zone <- list(c(5.1,4.5))
240 ## graphique <- "Overlay"
241 ## nomFichier <- "Tlse_BPASourisCerveau_NmrBucketing_graph.pdf"
242 ## tlse_cerveaupnd21.bucket <- NmrBucketing(StudyDir,upper,lower,bucket.width,exclusion,exclusion.zone,graphique,nomFichier)
243 ## write.table(tlse_cerveaupnd21.bucket,file=paste(StudyDir,"Tlse_BPASourisCerveau_NmrBucketing_dataMatrix.tsv",sep=""),
244 ## quote=FALSE,row.nmaes=FALSE,sep="\t")
245 #################################################################################################################