Mercurial > repos > marie-tremblay-metatoul > normalization
comparison NmrNormalization_script.R @ 3:966fcf7ae66e draft
planemo upload for repository https://github.com/workflow4metabolomics/normalization commit 9ca88a22e9b9394bfa00ea383fbb2b78ef05f990
author | lecorguille |
---|---|
date | Thu, 26 Oct 2017 06:01:14 -0400 |
parents | |
children | 221cbd549c40 |
comparison
equal
deleted
inserted
replaced
2:3cd762aac7a4 | 3:966fcf7ae66e |
---|---|
1 ################################################################################################################# | |
2 # SPECTRA NORMALIZATION FROM SPECTRAL DATA # | |
3 # User : Galaxy # | |
4 # Original data : -- # | |
5 # Starting date : 20-10-2014 # | |
6 # Version 1 : 27-01-2015 # | |
7 # Version 2 : 27-02-2015 # | |
8 # # | |
9 # Input files: # | |
10 # - Data matrix containing bucketed and integrated spectra to normalize # | |
11 # - Sample metadata matrix containing at least biological factor of interest # | |
12 # - Scaling method: Total intensity/Probabilistic Quotient Normalization # | |
13 # - Control group: name of control to compute median reference spectra # | |
14 # - Graph: normalization result representation # | |
15 ################################################################################################################# | |
16 NmrNormalization <- function(dataMatrix,scalingMethod=c("None","Total","PQN","BioFactor"),sampleMetadata=NULL, | |
17 bioFactor=NULL,ControlGroup=NULL,graph=c("None","Overlay","One_per_individual"), | |
18 nomFichier=NULL,savLog.txtC=NULL) | |
19 { | |
20 | |
21 ## Option | |
22 ##--------------- | |
23 strAsFacL <- options()$stringsAsFactors | |
24 options(stingsAsFactors = FALSE) | |
25 options(warn = -1) | |
26 | |
27 | |
28 ## Constants | |
29 ##--------------- | |
30 topEnvC <- environment() | |
31 flgC <- "\n" | |
32 | |
33 ## Log file (in case of integration into Galaxy) | |
34 ##---------------------------------------------- | |
35 if(!is.null(savLog.txtC)) | |
36 sink(savLog.txtC, append = TRUE) | |
37 | |
38 ## Functions definition | |
39 ##--------------------- | |
40 ################################################################################################################# | |
41 # Total intensity normalization | |
42 # Input parameters | |
43 # - data : bucketed spectra (rows=buckets; columns=samples) | |
44 ################################################################################################################# | |
45 NmrBrucker_total <- function(data) | |
46 { | |
47 # Total intensity normalization | |
48 data.total <- apply(data,2,sum) | |
49 data.normalized <- data[,1]/data.total[1] | |
50 for (i in 2:ncol(data)) | |
51 data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) | |
52 colnames(data.normalized) <- colnames(data) | |
53 rownames(data.normalized) <- rownames(data) | |
54 return(data.normalized) | |
55 } | |
56 | |
57 | |
58 ################################################################################################################# | |
59 # Biological factor normalization | |
60 # Input parameters | |
61 # - data : bucketed spectra (rows=buckets; columns=samples) | |
62 # - sampleMetadata : dataframe with biological factor of interest measured for each invidual | |
63 # - bioFactor : name of the column cotaining the biological factor of interest | |
64 ################################################################################################################# | |
65 NmrBrucker_bioFact <- function(data,sampleMetadata,bioFactor) | |
66 { | |
67 # Total intensity normalization | |
68 data.normalized <- data[,1]/bioFactor[1] | |
69 for (i in 2:ncol(data)) | |
70 data.normalized <- cbind(data.normalized,data[,i]/bioFactor[i]) | |
71 colnames(data.normalized) <- colnames(data) | |
72 rownames(data.normalized) <- rownames(data) | |
73 return(data.normalized) | |
74 } | |
75 | |
76 | |
77 ################################################################################################################# | |
78 # Probabilistic quotient normalization (PQN) | |
79 # Input parameters | |
80 # - data : bucketed spectra (rows=buckets; columns=samples) | |
81 # - sampleMetadata : dataframe with treatment group of inviduals | |
82 # - pqnFactor : number of the column cotaining the biological facor of interest | |
83 # - nomControl : name of the treatment group | |
84 ################################################################################################################# | |
85 NmrBrucker_pqn <- function(data,sampleMetadata,pqnFactor,nomControl) | |
86 { | |
87 # Total intensity normalization | |
88 data.total <- apply(data,2,sum) | |
89 data.normalized <- data[,1]/data.total[1] | |
90 for (i in 2:ncol(data)) | |
91 data.normalized <- cbind(data.normalized,data[,i]/data.total[i]) | |
92 colnames(data.normalized) <- colnames(data) | |
93 rownames(data.normalized) <- rownames(data) | |
94 | |
95 # Reference spectrum | |
96 # Recuperation spectres individus controle | |
97 control.spectra <- data.normalized[,sampleMetadata[,pqnFactor]==nomControl] | |
98 spectrum.ref <- apply(control.spectra,1,median) | |
99 | |
100 # Ratio between normalized and reference spectra | |
101 data.normalized.ref <- data.normalized/spectrum.ref | |
102 | |
103 # Median ratio | |
104 data.normalized.ref.median <- apply(data.normalized.ref,1,median) | |
105 | |
106 # Normalization | |
107 data.normalizedPQN <- data.normalized[,1]/data.normalized.ref.median | |
108 for (i in 2:ncol(data)) | |
109 data.normalizedPQN <- cbind(data.normalizedPQN,data.normalized[,i]/data.normalized.ref.median) | |
110 colnames(data.normalizedPQN) <- colnames(data) | |
111 rownames(data.normalizedPQN) <- rownames(data) | |
112 | |
113 return(data.normalizedPQN) | |
114 } | |
115 | |
116 | |
117 ## Tests | |
118 if (scalingMethod=="QuantitativeVariable") | |
119 { | |
120 if(mode(sampleMetadata[,bioFactor]) == "character") | |
121 bioFact <- factor(sampleMetadata[,bioFactor]) | |
122 else | |
123 bioFact <- sampleMetadata[,bioFactor] | |
124 } | |
125 | |
126 ## Spectra scaling depending on the user choice | |
127 if (scalingMethod == "None") | |
128 { | |
129 NormalizedBucketedSpectra <- dataMatrix | |
130 } | |
131 else if (scalingMethod == "Total") | |
132 { | |
133 NormalizedBucketedSpectra <- NmrBrucker_total(dataMatrix) | |
134 } | |
135 else if (scalingMethod == "PQN") | |
136 { | |
137 NormalizedBucketedSpectra <- NmrBrucker_pqn(dataMatrix,sampleMetadata,bioFactor,ControlGroup) | |
138 } | |
139 else if (scalingMethod == "QuantitativeVariable") | |
140 { | |
141 NormalizedBucketedSpectra <- NmrBrucker_bioFact(dataMatrix,sampleMetadata,bioFact) | |
142 } | |
143 | |
144 ## OUTPUTS | |
145 return(list(NormalizedBucketedSpectra)) | |
146 | |
147 } |