Mercurial > repos > melpetera > generic_filter
comparison GalFilter/filter_script.R @ 0:2c9afaf849ad draft
Uploaded
author | melpetera |
---|---|
date | Thu, 23 Feb 2017 04:39:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2c9afaf849ad |
---|---|
1 ################################################################################################ | |
2 # GENERIC FILTERS # | |
3 # # | |
4 # User: Galaxy # | |
5 # Starting date: 03-09-2014 # | |
6 # V-1.0: Restriction of old filter script to Filter according to factors # | |
7 # V-1.1: Choice of metadata table for filtering added ; data check added ; handling of NA ; # | |
8 # check for minimum remaining data # | |
9 # V-1.2: Minor modifications in script layout # | |
10 # V-2.0: Addition of numerical filter # | |
11 # V-2.1: Handling special characters # | |
12 # # | |
13 # # | |
14 # Input files: dataMatrix ; sampleMetadata ; variableMetadata # | |
15 # Output files: dataMatrix ; sampleMetadata ; variableMetadata # | |
16 # # | |
17 ################################################################################################ | |
18 | |
19 # Parameters (for dev) | |
20 if(FALSE){ | |
21 | |
22 ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file | |
23 meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file | |
24 meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file | |
25 | |
26 ion.file.out <- "test/ressources/outputs/ex_data_IONS_fl.txt" #tab file | |
27 meta.samp.file.out <- "test/ressources/outputs/ex_data_PROTOCOLE1_fl.txt" #tab file | |
28 meta.ion.file.out <- "test/ressources/outputs/ex_data_METAION_fl.txt" #tab file | |
29 | |
30 NUM <- TRUE ; if(NUM){ls.num<-list(c("sample","injectionOrder","upper","20"),c("variable","var1","extremity","0.12","500"))}else{ls.num<-NULL} | |
31 | |
32 FACT <- TRUE ; if(FACT){ls.fact<-list(c("centre","C","sample"),c("var2","A","variable"))}else{ls.fact<-NULL} | |
33 | |
34 } | |
35 | |
36 filters <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in, | |
37 NUM, ls.num, FACT, ls.fact, | |
38 ion.file.out, meta.samp.file.out, meta.ion.file.out){ | |
39 # This function allows to filter variables and samples according to factors or numerical values. | |
40 # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata. | |
41 # It generates 3 new datasets corresponding to the 3 inputs filtered. | |
42 # | |
43 # Parameters: | |
44 # - xxx.in: input files' access | |
45 # - xxx.out: output files' access | |
46 # - NUM: filter according to numerical variables yes/no | |
47 # | > ls.num: numerical variables' list for filter | |
48 # - FACT: filter according to factors yes/no | |
49 # | > ls.fact: factors' list for filter | |
50 | |
51 | |
52 # Input ----------------------------------------------------------------------------------- | |
53 | |
54 ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE) | |
55 meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE) | |
56 meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE) | |
57 | |
58 # Error vector | |
59 err.stock <- "\n" | |
60 | |
61 | |
62 # Table match check | |
63 table.check <- match3(ion.data,meta.samp.data,meta.ion.data) | |
64 check.err(table.check) | |
65 | |
66 # StockID | |
67 samp.id <- stockID(ion.data,meta.samp.data,"sample") | |
68 ion.data <- samp.id$dataMatrix | |
69 meta.samp.data <- samp.id$Metadata | |
70 samp.id <- samp.id$id.match | |
71 | |
72 | |
73 | |
74 # Function 1: Filter according to numerical variables ------------------------------------- | |
75 # Allows to delete all elements corresponding to defined values of designated variables. | |
76 if(NUM){ | |
77 | |
78 # For each numerical variable to filter | |
79 for(i in 1:length(ls.num)){ | |
80 | |
81 # Which metadata table is concerned | |
82 if(ls.num[[i]][1]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data} | |
83 | |
84 # Checking the columns and factors variables | |
85 numcol <- which(colnames(metadata)==ls.num[[i]][2]) | |
86 if(length(numcol)==0) { | |
87 err.stock <- c(err.stock,"\n-------", | |
88 "\nWarning: no '",ls.num[[i]][2],"' column detected in ",ls.num[[i]][1], | |
89 " metadata!","\nFiltering impossible for this variable.\n-------\n") | |
90 }else{ | |
91 if(!is.numeric(metadata[,numcol])){ | |
92 err.stock <- c(err.stock,"\n-------", | |
93 "\nWarning: column '",ls.num[[i]][2],"' in ",ls.num[[i]][1], | |
94 " metadata is not a numerical variable!", | |
95 "\nNumerical filtering impossible for this variable.\n-------\n") | |
96 }else{ | |
97 | |
98 # Filtering | |
99 if(ls.num[[i]][3]=="lower"){ | |
100 toremove <- which(metadata[,numcol]<as.numeric(ls.num[[i]][4])) | |
101 if(length(toremove)!=0){ | |
102 metadata <- metadata[-c(toremove),] | |
103 } | |
104 }else{if(ls.num[[i]][3]=="upper"){ | |
105 toremove <- which(metadata[,numcol]>as.numeric(ls.num[[i]][4])) | |
106 if(length(toremove)!=0){ | |
107 metadata <- metadata[-c(toremove),] | |
108 } | |
109 }else{if(ls.num[[i]][3]=="between"){ | |
110 toremove <- (metadata[,numcol]>as.numeric(ls.num[[i]][4]))+(metadata[,numcol]<as.numeric(ls.num[[i]][5])) | |
111 toremove <- which(toremove==2) | |
112 if(length(toremove)!=0){ | |
113 metadata <- metadata[-c(toremove),] | |
114 } | |
115 }else{if(ls.num[[i]][3]=="extremity"){ | |
116 toremove <- c(which(metadata[,numcol]<as.numeric(ls.num[[i]][4])), | |
117 which(metadata[,numcol]>as.numeric(ls.num[[i]][5]))) | |
118 if(length(toremove)!=0){ | |
119 metadata <- metadata[-c(toremove),] | |
120 } | |
121 }}}} | |
122 | |
123 # Extension to the tables | |
124 if(ls.num[[i]][1]=="sample"){ | |
125 meta.samp.data <- metadata | |
126 ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))] | |
127 }else{ | |
128 meta.ion.data <- metadata | |
129 ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),] | |
130 } | |
131 | |
132 }}} | |
133 | |
134 } # end if(NUM) | |
135 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - | |
136 | |
137 | |
138 | |
139 # Function 2: Filter according to factors ------------------------------------------------- | |
140 # Allows to delete all elements corresponding to selected value of designated factor. | |
141 if(FACT){ | |
142 | |
143 # For each factor to filter | |
144 for(i in 1:length(ls.fact)){ | |
145 | |
146 # Which metadata table is concerned | |
147 if(ls.fact[[i]][3]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data} | |
148 | |
149 # Checking the columns and factors variables | |
150 numcol <- which(colnames(metadata)==ls.fact[[i]][1]) | |
151 if(length(numcol)==0) { | |
152 err.stock <- c(err.stock,"\n-------", | |
153 "\nWarning: no '",ls.fact[[i]][1],"' column detected in ",ls.fact[[i]][3], | |
154 " metadata!","\nFiltering impossible for this factor.\n-------\n") | |
155 }else{ | |
156 if((!(ls.fact[[i]][2]%in%levels(as.factor(metadata[,numcol]))))&((ls.fact[[i]][2]!="NA")|(length(which(is.na(metadata[,numcol])))==0))){ | |
157 err.stock <- c(err.stock,"\n-------", | |
158 "\nWarning: no '",ls.fact[[i]][2],"' level detected in '", | |
159 ls.fact[[i]][1],"' column (",ls.fact[[i]][3]," metadata)!\n", | |
160 "Filtering impossible for this factor.\n-------\n") | |
161 }else{ | |
162 | |
163 # Filtering | |
164 if(length(which(metadata[,numcol]==ls.fact[[i]][2]))!=0){ #if the level still exists in the data | |
165 metadata <- metadata[-c(which(metadata[,numcol]==ls.fact[[i]][2])),] | |
166 }else{ #to treat the special case of "NA" level | |
167 if(ls.fact[[i]][2]=="NA"){metadata <- metadata[-c(which(is.na(metadata[,numcol]))),]} | |
168 } | |
169 | |
170 # Extension to the tables | |
171 if(ls.fact[[i]][3]=="sample"){ | |
172 meta.samp.data <- metadata | |
173 ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))] | |
174 }else{ | |
175 meta.ion.data <- metadata | |
176 ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),] | |
177 } | |
178 | |
179 }}} | |
180 | |
181 } # end if(FACT) | |
182 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - | |
183 | |
184 | |
185 | |
186 | |
187 # Check if at least one sample and one variable remain ------------------------------------ | |
188 | |
189 if(nrow(meta.samp.data)==0){ | |
190 stop("\n /!\\ Your filtering options lead to no more sample in your data matrix!\n", | |
191 "Think about reducing your number of filter.") | |
192 } | |
193 | |
194 if(nrow(meta.ion.data)==0){ | |
195 stop("\n /!\\ Your filtering options lead to no more variable in your data matrix!\n", | |
196 "Think about reducing your number of filter.") | |
197 } | |
198 | |
199 # Output ---------------------------------------------------------------------------------- | |
200 | |
201 # Getting back original identifiers | |
202 id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id) | |
203 ion.data <- id.ori$dataMatrix | |
204 meta.samp.data <- id.ori$Metadata | |
205 | |
206 | |
207 # Error checking | |
208 if(length(err.stock)>1){ | |
209 stop(err.stock) | |
210 }else{ | |
211 | |
212 write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
213 write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
214 write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) | |
215 | |
216 } | |
217 | |
218 | |
219 } # end of filters function | |
220 | |
221 | |
222 # Typical function call | |
223 #filters(ion.file.in, meta.samp.file.in, meta.ion.file.in, | |
224 # NUM, ls.num, FACT, ls.fact, | |
225 # ion.file.out, meta.samp.file.out, meta.ion.file.out) | |
226 |