Mercurial > repos > marie-tremblay-metatoul > nmr_bucketing
comparison NmrBucketing_script.R @ 9:62c62e31fc80 draft
planemo upload for repository https://github.com/workflow4metabolomics/nmr_bucketing commit fc2be0f9fa66f830d592d74d14b47a63e761647b
author | lecorguille |
---|---|
date | Fri, 21 Apr 2017 08:53:40 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:c54c70af216b | 9:62c62e31fc80 |
---|---|
1 ################################################################################################ | |
2 # SPECTRA BUCKETING AND INTEGRATION FROM RAW BRUKER FILES # | |
3 # User : Galaxy # | |
4 # Original data : -- # | |
5 # Starting date : 20-10-2014 # | |
6 # Version 1 : 18-12-2014 # | |
7 # Version 2 : 07-01-2015 # | |
8 # Version 3 : 24-10-2016 # | |
9 # # | |
10 # Input files : modification on october 2016 # | |
11 # - Raw bruker files included in user-defined fileName # | |
12 # - Preprocessed files (alignment, ...) included in p x n dataframe # | |
13 ################################################################################################ | |
14 NmrBucketing <- function(fileType,fileName,leftBorder = 10.0,rightBorder = 0.5,bucketSize = 0.04,exclusionZones, | |
15 exclusionZonesBorders=NULL,graph=c("None","Overlay","One_per_individual"), | |
16 nomFichier,savLog.txtC = NULL) | |
17 { | |
18 ## Option | |
19 ##--------------- | |
20 strAsFacL <- options()$stringsAsFactors | |
21 options(stingsAsFactors = FALSE) | |
22 options(warn = -1) | |
23 | |
24 | |
25 ## Constants | |
26 ##--------------- | |
27 topEnvC <- environment() | |
28 flgC <- "\n" | |
29 | |
30 ## Log file (in case of integration into Galaxy) | |
31 ##---------------------------------------------- | |
32 if(!is.null(savLog.txtC)) | |
33 sink(savLog.txtC, append = TRUE) | |
34 | |
35 ## Functions definition | |
36 ##--------------------- | |
37 ## RAW BRUKER FILE READING FUNCTION | |
38 NmRBrucker_read <- function(DataDir,SampleSpectrum) | |
39 { | |
40 | |
41 bruker.get_param <- function (ACQ,paramStr) | |
42 { | |
43 regexpStr <- paste("^...",paramStr,"=",sep="") | |
44 as.numeric(gsub("^[^=]+= ","" ,ACQ[which(simplify2array(regexpr(regexpStr,ACQ))>0)])) | |
45 } | |
46 | |
47 ACQFILE <- "acqus" | |
48 SPECFILE <- paste(DataDir,"/1r",sep="") | |
49 PROCFILE <- paste(DataDir,"/procs",sep="") | |
50 | |
51 ACQ <- readLines(ACQFILE) | |
52 TD <- bruker.get_param(ACQ,"TD") | |
53 SW <- bruker.get_param(ACQ,"SW") | |
54 SWH <- bruker.get_param(ACQ,"SW_h") | |
55 DTYPA <- bruker.get_param(ACQ,"DTYPA") | |
56 BYTORDA <- bruker.get_param(ACQ,"BYTORDA") | |
57 #ENDIAN = ifelse( BYTORDA==0, "little", "big") | |
58 ENDIAN <- "little" | |
59 SIZE = ifelse( DTYPA==0, 4, 8) | |
60 | |
61 PROC <- readLines(PROCFILE) | |
62 OFFSET <- bruker.get_param(PROC,"OFFSET") | |
63 SI <- bruker.get_param(PROC,"SI") | |
64 | |
65 to.read = file(SPECFILE,"rb") | |
66 maxTDSI = max(TD,SI) | |
67 # signal<-rev(readBin(to.read, what="int",size=SIZE, n=TD, signed = TRUE, endian = ENDIAN)) | |
68 signal<-rev(readBin(to.read, what="int",size=SIZE, n=maxTDSI, signed = TRUE, endian = ENDIAN)) | |
69 close(to.read) | |
70 | |
71 td <- length(signal) | |
72 | |
73 # dppm <- SW/(TD-1) | |
74 dppm <- SW/(td-1) | |
75 pmax <- OFFSET | |
76 pmin <- OFFSET - SW | |
77 ppmseq <- seq(from=pmin, to=pmax, by=dppm) | |
78 signal <- 100*signal/max(signal) | |
79 | |
80 SampleSpectrum <- cbind(ppmseq,signal) | |
81 return(SampleSpectrum) | |
82 } | |
83 | |
84 ## SPECTRUM BUCKETING | |
85 NmrBrucker_bucket <- function(spectrum) | |
86 { | |
87 # Initialisations | |
88 b <- 1 | |
89 j <- 1 | |
90 # Variable number | |
91 J <- round((spectrum[1,1]-spectrum[dim(spectrum)[1],1])/bucketSize) | |
92 f.bucket <- matrix(rep(0,J*2),ncol=2) | |
93 colnames(f.bucket) <- c("Bucket",FileNames[i]) | |
94 | |
95 | |
96 # Data bucketing | |
97 while (j < dim(spectrum)[1]) | |
98 { | |
99 # chemical shift | |
100 BUB <- spectrum[j,1] | |
101 | |
102 # In zone exclusion? | |
103 exclusion.in <- FALSE | |
104 if (!is.null(exclusionZonesBorders)) | |
105 { | |
106 for (k in 1:nrow(exclusion.zone.m)) | |
107 if (BUB <= exclusion.zone.m[k,1] && exclusion.zone.m[k,2] < BUB) | |
108 exclusion.in <- TRUE | |
109 } | |
110 | |
111 if (exclusion.in) | |
112 { | |
113 # Bucketing | |
114 { | |
115 BLB <- BUB - bucketSize | |
116 bucket <- spectrum[j,] | |
117 while (j < dim(spectrum)[1] && spectrum[j,1] > BLB) | |
118 { | |
119 j <- j + 1 | |
120 if (spectrum[j,1] > BLB) | |
121 bucket <- rbind(bucket,spectrum[j,]) | |
122 } | |
123 | |
124 f.bucket[b,] <- c(round(mean(bucket[,1]),3),0) | |
125 | |
126 # Next bucket boundary | |
127 BUB <- spectrum[j,1] | |
128 b <- b + 1 | |
129 } | |
130 } | |
131 | |
132 if (!exclusion.in) | |
133 # Bucketing | |
134 { | |
135 BLB <- BUB - bucketSize | |
136 bucket <- spectrum[j,] | |
137 while (j < dim(spectrum)[1] && spectrum[j,1] > BLB) | |
138 { | |
139 j <- j + 1 | |
140 if (spectrum[j,1] > BLB) | |
141 bucket <- rbind(bucket,spectrum[j,]) | |
142 } | |
143 | |
144 # Integration (trapezoid method) | |
145 s <- cumtrapz(bucket[,1],bucket[,2]) | |
146 f.bucket[b,] <- c(round(mean(bucket[,1]),3),abs(s[length(s)][[1]])) | |
147 | |
148 # Next bucket boundary | |
149 BUB <- spectrum[j,1] | |
150 b <- b + 1 | |
151 } | |
152 } | |
153 return(f.bucket) | |
154 } | |
155 | |
156 # Exclusion zones | |
157 if (!is.null(exclusionZonesBorders)) | |
158 { | |
159 exclusion.zone.m <- matrix(exclusionZonesBorders[[1]],nrow=1) | |
160 if (length(exclusionZonesBorders) > 1) | |
161 for (k in 2:length(exclusionZonesBorders)) | |
162 exclusion.zone.m <- rbind(exclusion.zone.m,exclusionZonesBorders[[k]]) | |
163 } | |
164 | |
165 ## CHANGES | |
166 ## Inputs from zip or library (raw files) | |
167 if (fileType == "zip") | |
168 { | |
169 # File names | |
170 FileNames <- list.files(fileName) | |
171 n <- length(FileNames) | |
172 | |
173 # Reading and Bucketing | |
174 fileName <- paste(fileName,"/",sep="") | |
175 | |
176 i <- 1 | |
177 while (i <= n) | |
178 { | |
179 # File reading | |
180 SampleDir <- paste(fileName,FileNames[i],"/1/",sep="") | |
181 setwd(SampleDir) | |
182 DataDir <- "pdata/1" | |
183 | |
184 rawSpectrum <- NmRBrucker_read(DataDir,rawSpectrum) | |
185 | |
186 orderedSpectrum <- rawSpectrum[order(rawSpectrum[,1],decreasing=T), ] | |
187 | |
188 # Removal of chemical shifts > leftBorder or < rightBorder boundaries | |
189 truncatedSpectrum <- orderedSpectrum[orderedSpectrum[,1] < leftBorder & orderedSpectrum[,1] > rightBorder, ] | |
190 truncatedSpectrum[,1] <- round(truncatedSpectrum[,1],3) | |
191 | |
192 # Bucketing | |
193 spectrum.bucket <- NmrBrucker_bucket(truncatedSpectrum) | |
194 ppm <- spectrum.bucket[,1] | |
195 | |
196 # spectrum Concatenation | |
197 if (i == 1) | |
198 bucketedSpectra <- spectrum.bucket | |
199 if (i > 1) | |
200 bucketedSpectra <- cbind(bucketedSpectra,spectrum.bucket[,2]) | |
201 colnames(bucketedSpectra)[i+1] <- FileNames[i] | |
202 | |
203 # Next sample | |
204 rm(spectrum.bucket) | |
205 i <- i +1 | |
206 } | |
207 # Directory | |
208 cd(fileName) | |
209 } | |
210 | |
211 ## Inputs from dataset (preprocessed files) | |
212 if (fileType=="tsv") | |
213 { | |
214 FileNames <- colnames(fileName) | |
215 n <- length(FileNames) | |
216 | |
217 for (i in 1:ncol(fileName)) | |
218 { | |
219 orderedSpectrum <- cbind(as.numeric(rownames(fileName)),fileName[,i]) | |
220 orderedSpectrum <- orderedSpectrum[order(orderedSpectrum[,1],decreasing=T), ] | |
221 | |
222 truncatedSpectrum <- orderedSpectrum[orderedSpectrum[,1] < leftBorder & orderedSpectrum[,1] > rightBorder, ] | |
223 truncatedSpectrum[,1] <- round(truncatedSpectrum[,1],3) | |
224 | |
225 # Bucketing | |
226 spectrum.bucket <- NmrBrucker_bucket(truncatedSpectrum) | |
227 ppm <- spectrum.bucket[,1] | |
228 | |
229 # spectrum Concatenation | |
230 if (i == 1) | |
231 bucketedSpectra <- spectrum.bucket | |
232 if (i > 1) | |
233 bucketedSpectra <- cbind(bucketedSpectra,spectrum.bucket[,2]) | |
234 colnames(bucketedSpectra)[i+1] <- colnames(fileName)[i] | |
235 } | |
236 } | |
237 | |
238 identifiants <- gsub("([- , * { } | \\[ ])","_",colnames(bucketedSpectra)[-1]) | |
239 colnames(bucketedSpectra) <- c(colnames(bucketedSpectra)[1],identifiants) | |
240 | |
241 bucketedSpectra <- bucketedSpectra[bucketedSpectra[,1]!=0,] | |
242 rownames(bucketedSpectra) <- paste("B",bucketedSpectra[,1],sep="") | |
243 bucketedSpectra <- bucketedSpectra[,-1] | |
244 | |
245 # Metadata matrice outputs | |
246 sampleMetadata <- data.frame(1:n) | |
247 rownames(sampleMetadata) <- colnames(bucketedSpectra) | |
248 colnames(sampleMetadata) <- "SampleOrder" | |
249 | |
250 variableMetadata <- data.frame(1:nrow(bucketedSpectra)) | |
251 rownames(variableMetadata) <- rownames(bucketedSpectra) | |
252 colnames(variableMetadata) <- "VariableOrder" | |
253 | |
254 | |
255 return(list(bucketedSpectra,sampleMetadata,variableMetadata,ppm)) # ,truncatedSpectrum_matrice | |
256 } | |
257 | |
258 | |
259 ################################################################################################################# | |
260 ## Typical function call | |
261 ################################################################################################################# | |
262 ## StudyDir <- "K:/PROJETS/Metabohub/Bruker/Tlse_BPASourisCerveau/" | |
263 ## upper <- 9.5 | |
264 ## lower <- 0.8 | |
265 ## bucket.width <- 0.01 | |
266 ## exclusion <- TRUE | |
267 ## exclusion.zone <- list(c(5.1,4.5)) | |
268 ## graphique <- "Overlay" | |
269 ## nomFichier <- "Tlse_BPASourisCerveau_NmrBucketing_graph.pdf" | |
270 ## tlse_cerveaupnd21.bucket <- NmrBucketing(StudyDir,upper,lower,bucket.width,exclusion,exclusion.zone,graphique,nomFichier) | |
271 ## write.table(tlse_cerveaupnd21.bucket,file=paste(StudyDir,"Tlse_BPASourisCerveau_NmrBucketing_dataMatrix.tsv",sep=""), | |
272 ## quote=FALSE,row.nmaes=FALSE,sep="\t") | |
273 ################################################################################################################# |