Mercurial > repos > marie-tremblay-metatoul > nmr_bucketing
comparison NmrBucketing_script.R @ 0:a99a6026c972 draft
planemo upload for repository https://github.com/workflow4metabolomics/nmr_bucketing commit 44fd4fc42930d2e9ad7b77cf575f2231547de15c
author | marie-tremblay-metatoul |
---|---|
date | Fri, 08 Apr 2016 10:56:14 -0400 |
parents | |
children | 301d7adf862e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a99a6026c972 |
---|---|
1 ################################################################################################ | |
2 # SPECTRA BUCKETING AND INTEGRATION FROM RAW BRUKER FILES # | |
3 # User : Galaxy # | |
4 # Original data : -- # | |
5 # Starting date : 20-10-2014 # | |
6 # Version 1 : 18-12-2014 # | |
7 # Version 2 : 07-01-2015 # | |
8 # # | |
9 # Input files : files in included in user-defined directory # | |
10 ################################################################################################ | |
11 NmrBucketing <- function(directory,leftBorder = 10.0,rightBorder = 0.5,bucketSize = 0.04,exclusionZones,exclusionZonesBorders=NULL, | |
12 graph=c("None","Overlay","One_per_individual"),nomFichier,savLog.txtC = NULL) | |
13 { | |
14 ## Option | |
15 ##--------------- | |
16 strAsFacL <- options()$stringsAsFactors | |
17 options(stingsAsFactors = FALSE) | |
18 options(warn = -1) | |
19 | |
20 | |
21 ## Constants | |
22 ##--------------- | |
23 topEnvC <- environment() | |
24 flgC <- "\n" | |
25 | |
26 ## Log file (in case of integration into Galaxy) | |
27 ##---------------------------------------------- | |
28 if(!is.null(savLog.txtC)) | |
29 sink(savLog.txtC, append = TRUE) | |
30 | |
31 ## Functions definition | |
32 ##--------------------- | |
33 ## RAW BRUKER FILE READING FUNCTION | |
34 NmRBrucker_read <- function(DataDir,SampleSpectrum) | |
35 { | |
36 | |
37 bruker.get_param <- function (ACQ,paramStr) | |
38 { | |
39 regexpStr <- paste("^...",paramStr,"=",sep="") | |
40 as.numeric(gsub("^[^=]+= ","" ,ACQ[which(simplify2array(regexpr(regexpStr,ACQ))>0)])) | |
41 } | |
42 | |
43 ACQFILE <- "acqus" | |
44 SPECFILE <- paste(DataDir,"/1r",sep="") | |
45 PROCFILE <- paste(DataDir,"/procs",sep="") | |
46 | |
47 ACQ <- readLines(ACQFILE) | |
48 TD <- bruker.get_param(ACQ,"TD") | |
49 SW <- bruker.get_param(ACQ,"SW") | |
50 SWH <- bruker.get_param(ACQ,"SW_h") | |
51 DTYPA <- bruker.get_param(ACQ,"DTYPA") | |
52 BYTORDA <- bruker.get_param(ACQ,"BYTORDA") | |
53 #ENDIAN = ifelse( BYTORDA==0, "little", "big") | |
54 ENDIAN <- "little" | |
55 SIZE = ifelse( DTYPA==0, 4, 8) | |
56 | |
57 PROC <- readLines(PROCFILE) | |
58 OFFSET <- bruker.get_param(PROC,"OFFSET") | |
59 SI <- bruker.get_param(PROC,"SI") | |
60 | |
61 to.read = file(SPECFILE,"rb") | |
62 maxTDSI = max(TD,SI) | |
63 # signal<-rev(readBin(to.read, what="int",size=SIZE, n=TD, signed = TRUE, endian = ENDIAN)) | |
64 signal<-rev(readBin(to.read, what="int",size=SIZE, n=maxTDSI, signed = TRUE, endian = ENDIAN)) | |
65 close(to.read) | |
66 | |
67 td <- length(signal) | |
68 | |
69 # dppm <- SW/(TD-1) | |
70 dppm <- SW/(td-1) | |
71 pmax <- OFFSET | |
72 pmin <- OFFSET - SW | |
73 ppmseq <- seq(from=pmin, to=pmax, by=dppm) | |
74 signal <- 100*signal/max(signal) | |
75 | |
76 SampleSpectrum <- cbind(ppmseq,signal) | |
77 return(SampleSpectrum) | |
78 } | |
79 | |
80 ## SPECTRUM BUCKETING | |
81 NmrBrucker_bucket <- function(spectrum) | |
82 { | |
83 # Initialisations | |
84 b <- 1 | |
85 j <- 1 | |
86 # Variable number | |
87 J <- round((spectrum[1,1]-spectrum[dim(spectrum)[1],1])/bucketSize) | |
88 f.bucket <- matrix(rep(0,J*2),ncol=2) | |
89 colnames(f.bucket) <- c("Bucket",FileNames[i]) | |
90 | |
91 | |
92 # Data bucketing | |
93 while (j < dim(spectrum)[1]) | |
94 { | |
95 # chemical shift | |
96 BUB <- spectrum[j,1] | |
97 | |
98 # In zone exclusion? | |
99 exclusion.in <- FALSE | |
100 if (!is.null(exclusionZonesBorders)) | |
101 { | |
102 for (k in 1:nrow(exclusion.zone.m)) | |
103 if (BUB <= exclusion.zone.m[k,1] && exclusion.zone.m[k,2] < BUB) | |
104 exclusion.in <- TRUE | |
105 } | |
106 | |
107 if (exclusion.in) | |
108 j <- j + 1 | |
109 | |
110 if (!exclusion.in) | |
111 # Bucketing | |
112 { | |
113 BLB <- BUB - bucketSize | |
114 bucket <- spectrum[j,] | |
115 while (j < dim(spectrum)[1] && spectrum[j,1] > BLB) | |
116 { | |
117 j <- j + 1 | |
118 if (spectrum[j,1] > BLB) | |
119 bucket <- rbind(bucket,spectrum[j,]) | |
120 } | |
121 | |
122 # Integration (trapezoid method) | |
123 s <- cumtrapz(bucket[,1],bucket[,2]) | |
124 f.bucket[b,] <- c(round(mean(bucket[,1]),3),abs(s[length(s)][[1]])) | |
125 | |
126 # Next bucket boundary | |
127 BUB <- spectrum[j,1] | |
128 b <- b + 1 | |
129 } | |
130 } | |
131 return(f.bucket) | |
132 } | |
133 | |
134 | |
135 # File names | |
136 FileNames <- list.files(directory) | |
137 n <- length(FileNames) | |
138 | |
139 # Exclusion zones | |
140 ## if (exclusionZones == "yes") | |
141 if (!is.null(exclusionZonesBorders)) | |
142 { | |
143 exclusion.zone.m <- matrix(exclusionZonesBorders[[1]],nrow=1) | |
144 if (length(exclusionZonesBorders) > 1) | |
145 for (k in 2:length(exclusionZonesBorders)) | |
146 exclusion.zone.m <- rbind(exclusion.zone.m,exclusionZonesBorders[[k]]) | |
147 } | |
148 | |
149 # Reading and Bucketing | |
150 directory <- paste(directory,"/",sep="") | |
151 | |
152 i <- 1 | |
153 while (i <= n) | |
154 { | |
155 # File reading | |
156 SampleDir <- paste(directory,FileNames[i],"/1/",sep="") | |
157 setwd(SampleDir) | |
158 DataDir <- "pdata/1" | |
159 | |
160 rawSpectrum <- NmRBrucker_read(DataDir,rawSpectrum) | |
161 | |
162 orderedSpectrum <- rawSpectrum[order(rawSpectrum[,1],decreasing=T), ] | |
163 | |
164 # Removal of chemical shifts > leftBorder or < rightBorder boundaries | |
165 truncatedSpectrum <- orderedSpectrum[orderedSpectrum[,1] < leftBorder & orderedSpectrum[,1] > rightBorder, ] | |
166 truncatedSpectrum[,1] <- round(truncatedSpectrum[,1],3) | |
167 | |
168 # Bucketing | |
169 spectrum.bucket <- NmrBrucker_bucket(truncatedSpectrum) | |
170 | |
171 # spectrum Concatenation | |
172 if (i == 1) | |
173 bucketedSpectra <- spectrum.bucket | |
174 if (i > 1) | |
175 bucketedSpectra <- cbind(bucketedSpectra,spectrum.bucket[,2]) | |
176 colnames(bucketedSpectra)[i+1] <- FileNames[i] | |
177 | |
178 # Next sample | |
179 rm(spectrum.bucket) | |
180 i <- i +1 | |
181 } | |
182 identifiants <- gsub("([- , * { } | \\[ ])","_",colnames(bucketedSpectra)[-1]) | |
183 colnames(bucketedSpectra) <- c(colnames(bucketedSpectra)[1],identifiants) | |
184 | |
185 bucketedSpectra <- bucketedSpectra[bucketedSpectra[,1]!=0,] | |
186 rownames(bucketedSpectra) <- paste("B",bucketedSpectra[,1],sep="") | |
187 bucketedSpectra <- bucketedSpectra[,-1] | |
188 | |
189 # Metadata matrice outputs | |
190 sampleMetadata <- data.frame(1:n) | |
191 rownames(sampleMetadata) <- colnames(bucketedSpectra) | |
192 colnames(sampleMetadata) <- "SampleOrder" | |
193 | |
194 variableMetadata <- data.frame(1:nrow(bucketedSpectra)) | |
195 rownames(variableMetadata) <- rownames(bucketedSpectra) | |
196 colnames(variableMetadata) <- "VariableOrder" | |
197 | |
198 # Directory | |
199 cd(directory) | |
200 | |
201 # Bucketed spectra graph | |
202 if (graph != "None") | |
203 { | |
204 # Graphic Device opening | |
205 pdf(nomFichier,onefile=TRUE) | |
206 | |
207 if (graph == "Overlay") | |
208 { | |
209 ymax <- max(bucketedSpectra) | |
210 plot(1:length(bucketedSpectra[,1]),bucketedSpectra[,1],ylim=c(0,ymax),type='l',col=1,xlab="Chemical shift",ylab="Intensity") | |
211 for (i in 2:ncol(bucketedSpectra)) | |
212 { | |
213 spectre <- bucketedSpectra[,i] | |
214 lines(spectre,col=i) | |
215 } | |
216 # legend(0,ymax,lty=c(1,1),legend=colnames(bucketedSpectra),col=1:ncol(bucketedSpectra)) | |
217 } | |
218 else | |
219 { | |
220 for (i in 1:ncol(bucketedSpectra)) | |
221 { | |
222 plot(1:length(bucketedSpectra[,i]),bucketedSpectra[,i],type='l',col=1,xlab="Chemical shift",ylab="Intensity") | |
223 } | |
224 } | |
225 dev.off() | |
226 } | |
227 return(list(bucketedSpectra,sampleMetadata,variableMetadata)) # ,truncatedSpectrum_matrice | |
228 } | |
229 | |
230 | |
231 ################################################################################################################# | |
232 ## Typical function call | |
233 ################################################################################################################# | |
234 ## StudyDir <- "K:/PROJETS/Metabohub/Bruker/Tlse_BPASourisCerveau/" | |
235 ## upper <- 9.5 | |
236 ## lower <- 0.8 | |
237 ## bucket.width <- 0.01 | |
238 ## exclusion <- TRUE | |
239 ## exclusion.zone <- list(c(5.1,4.5)) | |
240 ## graphique <- "Overlay" | |
241 ## nomFichier <- "Tlse_BPASourisCerveau_NmrBucketing_graph.pdf" | |
242 ## tlse_cerveaupnd21.bucket <- NmrBucketing(StudyDir,upper,lower,bucket.width,exclusion,exclusion.zone,graphique,nomFichier) | |
243 ## write.table(tlse_cerveaupnd21.bucket,file=paste(StudyDir,"Tlse_BPASourisCerveau_NmrBucketing_dataMatrix.tsv",sep=""), | |
244 ## quote=FALSE,row.nmaes=FALSE,sep="\t") | |
245 ################################################################################################################# |