0
|
1 ################################################################################################
|
|
2 # GENERIC FILTERS #
|
|
3 # #
|
|
4 # User: Galaxy #
|
|
5 # Starting date: 03-09-2014 #
|
|
6 # V-1.0: Restriction of old filter script to Filter according to factors #
|
|
7 # V-1.1: Choice of metadata table for filtering added ; data check added ; handling of NA ; #
|
|
8 # check for minimum remaining data #
|
|
9 # V-1.2: Minor modifications in script layout #
|
|
10 # V-2.0: Addition of numerical filter #
|
|
11 # V-2.1: Handling special characters #
|
|
12 # #
|
|
13 # #
|
|
14 # Input files: dataMatrix ; sampleMetadata ; variableMetadata #
|
|
15 # Output files: dataMatrix ; sampleMetadata ; variableMetadata #
|
|
16 # #
|
|
17 ################################################################################################
|
|
18
|
|
19 # Parameters (for dev)
|
|
20 if(FALSE){
|
|
21
|
|
22 ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file
|
|
23 meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file
|
|
24 meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file
|
|
25
|
|
26 ion.file.out <- "test/ressources/outputs/ex_data_IONS_fl.txt" #tab file
|
|
27 meta.samp.file.out <- "test/ressources/outputs/ex_data_PROTOCOLE1_fl.txt" #tab file
|
|
28 meta.ion.file.out <- "test/ressources/outputs/ex_data_METAION_fl.txt" #tab file
|
|
29
|
|
30 NUM <- TRUE ; if(NUM){ls.num<-list(c("sample","injectionOrder","upper","20"),c("variable","var1","extremity","0.12","500"))}else{ls.num<-NULL}
|
|
31
|
|
32 FACT <- TRUE ; if(FACT){ls.fact<-list(c("centre","C","sample"),c("var2","A","variable"))}else{ls.fact<-NULL}
|
|
33
|
|
34 }
|
|
35
|
|
36 filters <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in,
|
|
37 NUM, ls.num, FACT, ls.fact,
|
|
38 ion.file.out, meta.samp.file.out, meta.ion.file.out){
|
|
39 # This function allows to filter variables and samples according to factors or numerical values.
|
|
40 # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata.
|
|
41 # It generates 3 new datasets corresponding to the 3 inputs filtered.
|
|
42 #
|
|
43 # Parameters:
|
|
44 # - xxx.in: input files' access
|
|
45 # - xxx.out: output files' access
|
|
46 # - NUM: filter according to numerical variables yes/no
|
|
47 # | > ls.num: numerical variables' list for filter
|
|
48 # - FACT: filter according to factors yes/no
|
|
49 # | > ls.fact: factors' list for filter
|
|
50
|
|
51
|
|
52 # Input -----------------------------------------------------------------------------------
|
|
53
|
|
54 ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE)
|
|
55 meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE)
|
|
56 meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE)
|
|
57
|
|
58 # Error vector
|
|
59 err.stock <- "\n"
|
|
60
|
|
61
|
|
62 # Table match check
|
|
63 table.check <- match3(ion.data,meta.samp.data,meta.ion.data)
|
|
64 check.err(table.check)
|
|
65
|
|
66 # StockID
|
|
67 samp.id <- stockID(ion.data,meta.samp.data,"sample")
|
|
68 ion.data <- samp.id$dataMatrix
|
|
69 meta.samp.data <- samp.id$Metadata
|
|
70 samp.id <- samp.id$id.match
|
|
71
|
|
72
|
|
73
|
|
74 # Function 1: Filter according to numerical variables -------------------------------------
|
|
75 # Allows to delete all elements corresponding to defined values of designated variables.
|
|
76 if(NUM){
|
|
77
|
|
78 # For each numerical variable to filter
|
|
79 for(i in 1:length(ls.num)){
|
|
80
|
|
81 # Which metadata table is concerned
|
|
82 if(ls.num[[i]][1]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data}
|
|
83
|
|
84 # Checking the columns and factors variables
|
|
85 numcol <- which(colnames(metadata)==ls.num[[i]][2])
|
|
86 if(length(numcol)==0) {
|
|
87 err.stock <- c(err.stock,"\n-------",
|
|
88 "\nWarning: no '",ls.num[[i]][2],"' column detected in ",ls.num[[i]][1],
|
|
89 " metadata!","\nFiltering impossible for this variable.\n-------\n")
|
|
90 }else{
|
|
91 if(!is.numeric(metadata[,numcol])){
|
|
92 err.stock <- c(err.stock,"\n-------",
|
|
93 "\nWarning: column '",ls.num[[i]][2],"' in ",ls.num[[i]][1],
|
|
94 " metadata is not a numerical variable!",
|
|
95 "\nNumerical filtering impossible for this variable.\n-------\n")
|
|
96 }else{
|
|
97
|
|
98 # Filtering
|
|
99 if(ls.num[[i]][3]=="lower"){
|
|
100 toremove <- which(metadata[,numcol]<as.numeric(ls.num[[i]][4]))
|
|
101 if(length(toremove)!=0){
|
|
102 metadata <- metadata[-c(toremove),]
|
|
103 }
|
|
104 }else{if(ls.num[[i]][3]=="upper"){
|
|
105 toremove <- which(metadata[,numcol]>as.numeric(ls.num[[i]][4]))
|
|
106 if(length(toremove)!=0){
|
|
107 metadata <- metadata[-c(toremove),]
|
|
108 }
|
|
109 }else{if(ls.num[[i]][3]=="between"){
|
|
110 toremove <- (metadata[,numcol]>as.numeric(ls.num[[i]][4]))+(metadata[,numcol]<as.numeric(ls.num[[i]][5]))
|
|
111 toremove <- which(toremove==2)
|
|
112 if(length(toremove)!=0){
|
|
113 metadata <- metadata[-c(toremove),]
|
|
114 }
|
|
115 }else{if(ls.num[[i]][3]=="extremity"){
|
|
116 toremove <- c(which(metadata[,numcol]<as.numeric(ls.num[[i]][4])),
|
|
117 which(metadata[,numcol]>as.numeric(ls.num[[i]][5])))
|
|
118 if(length(toremove)!=0){
|
|
119 metadata <- metadata[-c(toremove),]
|
|
120 }
|
|
121 }}}}
|
|
122
|
|
123 # Extension to the tables
|
|
124 if(ls.num[[i]][1]=="sample"){
|
|
125 meta.samp.data <- metadata
|
|
126 ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))]
|
|
127 }else{
|
|
128 meta.ion.data <- metadata
|
|
129 ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),]
|
|
130 }
|
|
131
|
|
132 }}}
|
|
133
|
|
134 } # end if(NUM)
|
|
135 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - -
|
|
136
|
|
137
|
|
138
|
|
139 # Function 2: Filter according to factors -------------------------------------------------
|
|
140 # Allows to delete all elements corresponding to selected value of designated factor.
|
|
141 if(FACT){
|
|
142
|
|
143 # For each factor to filter
|
|
144 for(i in 1:length(ls.fact)){
|
|
145
|
|
146 # Which metadata table is concerned
|
|
147 if(ls.fact[[i]][3]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data}
|
|
148
|
|
149 # Checking the columns and factors variables
|
|
150 numcol <- which(colnames(metadata)==ls.fact[[i]][1])
|
|
151 if(length(numcol)==0) {
|
|
152 err.stock <- c(err.stock,"\n-------",
|
|
153 "\nWarning: no '",ls.fact[[i]][1],"' column detected in ",ls.fact[[i]][3],
|
|
154 " metadata!","\nFiltering impossible for this factor.\n-------\n")
|
|
155 }else{
|
|
156 if((!(ls.fact[[i]][2]%in%levels(as.factor(metadata[,numcol]))))&((ls.fact[[i]][2]!="NA")|(length(which(is.na(metadata[,numcol])))==0))){
|
|
157 err.stock <- c(err.stock,"\n-------",
|
|
158 "\nWarning: no '",ls.fact[[i]][2],"' level detected in '",
|
|
159 ls.fact[[i]][1],"' column (",ls.fact[[i]][3]," metadata)!\n",
|
|
160 "Filtering impossible for this factor.\n-------\n")
|
|
161 }else{
|
|
162
|
|
163 # Filtering
|
|
164 if(length(which(metadata[,numcol]==ls.fact[[i]][2]))!=0){ #if the level still exists in the data
|
|
165 metadata <- metadata[-c(which(metadata[,numcol]==ls.fact[[i]][2])),]
|
|
166 }else{ #to treat the special case of "NA" level
|
|
167 if(ls.fact[[i]][2]=="NA"){metadata <- metadata[-c(which(is.na(metadata[,numcol]))),]}
|
|
168 }
|
|
169
|
|
170 # Extension to the tables
|
|
171 if(ls.fact[[i]][3]=="sample"){
|
|
172 meta.samp.data <- metadata
|
|
173 ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))]
|
|
174 }else{
|
|
175 meta.ion.data <- metadata
|
|
176 ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),]
|
|
177 }
|
|
178
|
|
179 }}}
|
|
180
|
|
181 } # end if(FACT)
|
|
182 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - -
|
|
183
|
|
184
|
|
185
|
|
186
|
|
187 # Check if at least one sample and one variable remain ------------------------------------
|
|
188
|
|
189 if(nrow(meta.samp.data)==0){
|
|
190 stop("\n /!\\ Your filtering options lead to no more sample in your data matrix!\n",
|
|
191 "Think about reducing your number of filter.")
|
|
192 }
|
|
193
|
|
194 if(nrow(meta.ion.data)==0){
|
|
195 stop("\n /!\\ Your filtering options lead to no more variable in your data matrix!\n",
|
|
196 "Think about reducing your number of filter.")
|
|
197 }
|
|
198
|
|
199 # Output ----------------------------------------------------------------------------------
|
|
200
|
|
201 # Getting back original identifiers
|
|
202 id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id)
|
|
203 ion.data <- id.ori$dataMatrix
|
|
204 meta.samp.data <- id.ori$Metadata
|
|
205
|
|
206
|
|
207 # Error checking
|
|
208 if(length(err.stock)>1){
|
|
209 stop(err.stock)
|
|
210 }else{
|
|
211
|
|
212 write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE)
|
|
213 write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE)
|
|
214 write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE)
|
|
215
|
|
216 }
|
|
217
|
|
218
|
|
219 } # end of filters function
|
|
220
|
|
221
|
|
222 # Typical function call
|
|
223 #filters(ion.file.in, meta.samp.file.in, meta.ion.file.in,
|
|
224 # NUM, ls.num, FACT, ls.fact,
|
|
225 # ion.file.out, meta.samp.file.out, meta.ion.file.out)
|
|
226
|