comparison GalFilter/filter_script.R @ 0:2c9afaf849ad draft

Uploaded
author melpetera
date Thu, 23 Feb 2017 04:39:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2c9afaf849ad
1 ################################################################################################
2 # GENERIC FILTERS #
3 # #
4 # User: Galaxy #
5 # Starting date: 03-09-2014 #
6 # V-1.0: Restriction of old filter script to Filter according to factors #
7 # V-1.1: Choice of metadata table for filtering added ; data check added ; handling of NA ; #
8 # check for minimum remaining data #
9 # V-1.2: Minor modifications in script layout #
10 # V-2.0: Addition of numerical filter #
11 # V-2.1: Handling special characters #
12 # #
13 # #
14 # Input files: dataMatrix ; sampleMetadata ; variableMetadata #
15 # Output files: dataMatrix ; sampleMetadata ; variableMetadata #
16 # #
17 ################################################################################################
18
19 # Parameters (for dev)
20 if(FALSE){
21
22 ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file
23 meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file
24 meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file
25
26 ion.file.out <- "test/ressources/outputs/ex_data_IONS_fl.txt" #tab file
27 meta.samp.file.out <- "test/ressources/outputs/ex_data_PROTOCOLE1_fl.txt" #tab file
28 meta.ion.file.out <- "test/ressources/outputs/ex_data_METAION_fl.txt" #tab file
29
30 NUM <- TRUE ; if(NUM){ls.num<-list(c("sample","injectionOrder","upper","20"),c("variable","var1","extremity","0.12","500"))}else{ls.num<-NULL}
31
32 FACT <- TRUE ; if(FACT){ls.fact<-list(c("centre","C","sample"),c("var2","A","variable"))}else{ls.fact<-NULL}
33
34 }
35
36 filters <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in,
37 NUM, ls.num, FACT, ls.fact,
38 ion.file.out, meta.samp.file.out, meta.ion.file.out){
39 # This function allows to filter variables and samples according to factors or numerical values.
40 # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata.
41 # It generates 3 new datasets corresponding to the 3 inputs filtered.
42 #
43 # Parameters:
44 # - xxx.in: input files' access
45 # - xxx.out: output files' access
46 # - NUM: filter according to numerical variables yes/no
47 # | > ls.num: numerical variables' list for filter
48 # - FACT: filter according to factors yes/no
49 # | > ls.fact: factors' list for filter
50
51
52 # Input -----------------------------------------------------------------------------------
53
54 ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE)
55 meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE)
56 meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE)
57
58 # Error vector
59 err.stock <- "\n"
60
61
62 # Table match check
63 table.check <- match3(ion.data,meta.samp.data,meta.ion.data)
64 check.err(table.check)
65
66 # StockID
67 samp.id <- stockID(ion.data,meta.samp.data,"sample")
68 ion.data <- samp.id$dataMatrix
69 meta.samp.data <- samp.id$Metadata
70 samp.id <- samp.id$id.match
71
72
73
74 # Function 1: Filter according to numerical variables -------------------------------------
75 # Allows to delete all elements corresponding to defined values of designated variables.
76 if(NUM){
77
78 # For each numerical variable to filter
79 for(i in 1:length(ls.num)){
80
81 # Which metadata table is concerned
82 if(ls.num[[i]][1]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data}
83
84 # Checking the columns and factors variables
85 numcol <- which(colnames(metadata)==ls.num[[i]][2])
86 if(length(numcol)==0) {
87 err.stock <- c(err.stock,"\n-------",
88 "\nWarning: no '",ls.num[[i]][2],"' column detected in ",ls.num[[i]][1],
89 " metadata!","\nFiltering impossible for this variable.\n-------\n")
90 }else{
91 if(!is.numeric(metadata[,numcol])){
92 err.stock <- c(err.stock,"\n-------",
93 "\nWarning: column '",ls.num[[i]][2],"' in ",ls.num[[i]][1],
94 " metadata is not a numerical variable!",
95 "\nNumerical filtering impossible for this variable.\n-------\n")
96 }else{
97
98 # Filtering
99 if(ls.num[[i]][3]=="lower"){
100 toremove <- which(metadata[,numcol]<as.numeric(ls.num[[i]][4]))
101 if(length(toremove)!=0){
102 metadata <- metadata[-c(toremove),]
103 }
104 }else{if(ls.num[[i]][3]=="upper"){
105 toremove <- which(metadata[,numcol]>as.numeric(ls.num[[i]][4]))
106 if(length(toremove)!=0){
107 metadata <- metadata[-c(toremove),]
108 }
109 }else{if(ls.num[[i]][3]=="between"){
110 toremove <- (metadata[,numcol]>as.numeric(ls.num[[i]][4]))+(metadata[,numcol]<as.numeric(ls.num[[i]][5]))
111 toremove <- which(toremove==2)
112 if(length(toremove)!=0){
113 metadata <- metadata[-c(toremove),]
114 }
115 }else{if(ls.num[[i]][3]=="extremity"){
116 toremove <- c(which(metadata[,numcol]<as.numeric(ls.num[[i]][4])),
117 which(metadata[,numcol]>as.numeric(ls.num[[i]][5])))
118 if(length(toremove)!=0){
119 metadata <- metadata[-c(toremove),]
120 }
121 }}}}
122
123 # Extension to the tables
124 if(ls.num[[i]][1]=="sample"){
125 meta.samp.data <- metadata
126 ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))]
127 }else{
128 meta.ion.data <- metadata
129 ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),]
130 }
131
132 }}}
133
134 } # end if(NUM)
135 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - -
136
137
138
139 # Function 2: Filter according to factors -------------------------------------------------
140 # Allows to delete all elements corresponding to selected value of designated factor.
141 if(FACT){
142
143 # For each factor to filter
144 for(i in 1:length(ls.fact)){
145
146 # Which metadata table is concerned
147 if(ls.fact[[i]][3]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data}
148
149 # Checking the columns and factors variables
150 numcol <- which(colnames(metadata)==ls.fact[[i]][1])
151 if(length(numcol)==0) {
152 err.stock <- c(err.stock,"\n-------",
153 "\nWarning: no '",ls.fact[[i]][1],"' column detected in ",ls.fact[[i]][3],
154 " metadata!","\nFiltering impossible for this factor.\n-------\n")
155 }else{
156 if((!(ls.fact[[i]][2]%in%levels(as.factor(metadata[,numcol]))))&((ls.fact[[i]][2]!="NA")|(length(which(is.na(metadata[,numcol])))==0))){
157 err.stock <- c(err.stock,"\n-------",
158 "\nWarning: no '",ls.fact[[i]][2],"' level detected in '",
159 ls.fact[[i]][1],"' column (",ls.fact[[i]][3]," metadata)!\n",
160 "Filtering impossible for this factor.\n-------\n")
161 }else{
162
163 # Filtering
164 if(length(which(metadata[,numcol]==ls.fact[[i]][2]))!=0){ #if the level still exists in the data
165 metadata <- metadata[-c(which(metadata[,numcol]==ls.fact[[i]][2])),]
166 }else{ #to treat the special case of "NA" level
167 if(ls.fact[[i]][2]=="NA"){metadata <- metadata[-c(which(is.na(metadata[,numcol]))),]}
168 }
169
170 # Extension to the tables
171 if(ls.fact[[i]][3]=="sample"){
172 meta.samp.data <- metadata
173 ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))]
174 }else{
175 meta.ion.data <- metadata
176 ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),]
177 }
178
179 }}}
180
181 } # end if(FACT)
182 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - -
183
184
185
186
187 # Check if at least one sample and one variable remain ------------------------------------
188
189 if(nrow(meta.samp.data)==0){
190 stop("\n /!\\ Your filtering options lead to no more sample in your data matrix!\n",
191 "Think about reducing your number of filter.")
192 }
193
194 if(nrow(meta.ion.data)==0){
195 stop("\n /!\\ Your filtering options lead to no more variable in your data matrix!\n",
196 "Think about reducing your number of filter.")
197 }
198
199 # Output ----------------------------------------------------------------------------------
200
201 # Getting back original identifiers
202 id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id)
203 ion.data <- id.ori$dataMatrix
204 meta.samp.data <- id.ori$Metadata
205
206
207 # Error checking
208 if(length(err.stock)>1){
209 stop(err.stock)
210 }else{
211
212 write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE)
213 write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE)
214 write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE)
215
216 }
217
218
219 } # end of filters function
220
221
222 # Typical function call
223 #filters(ion.file.in, meta.samp.file.in, meta.ion.file.in,
224 # NUM, ls.num, FACT, ls.fact,
225 # ion.file.out, meta.samp.file.out, meta.ion.file.out)
226