diff GalFilter/filter_script.R @ 0:2c9afaf849ad draft

Uploaded
author melpetera
date Thu, 23 Feb 2017 04:39:36 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GalFilter/filter_script.R	Thu Feb 23 04:39:36 2017 -0500
@@ -0,0 +1,226 @@
+################################################################################################
+# GENERIC FILTERS                                                                              #
+#                                                                                              #
+# User: Galaxy                                                                                 #
+# Starting date: 03-09-2014                                                                    #
+# V-1.0: Restriction of old filter script to Filter according to factors                       #
+# V-1.1: Choice of metadata table for filtering added ; data check added ; handling of NA ;    #
+#        check for minimum remaining data                                                      #
+# V-1.2: Minor modifications in script layout                                                  #
+# V-2.0: Addition of numerical filter                                                          #
+# V-2.1: Handling special characters                                                           #
+#                                                                                              #
+#                                                                                              #
+# Input files: dataMatrix ; sampleMetadata ; variableMetadata                                  #
+# Output files: dataMatrix ; sampleMetadata ; variableMetadata                                 #
+#                                                                                              #
+################################################################################################
+
+# Parameters (for dev)
+if(FALSE){
+  
+  ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt"  #tab file
+  meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt"  #tab file
+  meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt"  #tab file
+  
+  ion.file.out <- "test/ressources/outputs/ex_data_IONS_fl.txt"  #tab file
+  meta.samp.file.out <- "test/ressources/outputs/ex_data_PROTOCOLE1_fl.txt"  #tab file
+  meta.ion.file.out <- "test/ressources/outputs/ex_data_METAION_fl.txt"  #tab file
+
+NUM <- TRUE ; if(NUM){ls.num<-list(c("sample","injectionOrder","upper","20"),c("variable","var1","extremity","0.12","500"))}else{ls.num<-NULL}
+  
+FACT <- TRUE ; if(FACT){ls.fact<-list(c("centre","C","sample"),c("var2","A","variable"))}else{ls.fact<-NULL}
+  
+}
+
+filters <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in,
+                    NUM, ls.num, FACT, ls.fact,
+                    ion.file.out, meta.samp.file.out, meta.ion.file.out){
+  # This function allows to filter variables and samples according to factors or numerical values. 
+  # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata. 
+  # It generates 3 new datasets corresponding to the 3 inputs filtered. 
+  #
+  # Parameters:
+  # - xxx.in: input files' access
+  # - xxx.out: output files' access
+  # - NUM: filter according to numerical variables yes/no
+  # | > ls.num: numerical variables' list for filter
+  # - FACT: filter according to factors yes/no
+  # | > ls.fact: factors' list for filter
+  
+  
+# Input -----------------------------------------------------------------------------------
+
+ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE)
+meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE)
+meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE)
+
+# Error vector
+err.stock <- "\n"
+
+
+# Table match check 
+table.check <- match3(ion.data,meta.samp.data,meta.ion.data)
+check.err(table.check)
+
+# StockID
+samp.id <- stockID(ion.data,meta.samp.data,"sample")
+ion.data <- samp.id$dataMatrix
+meta.samp.data <- samp.id$Metadata
+samp.id <- samp.id$id.match
+
+
+
+# Function 1: Filter according to numerical variables -------------------------------------
+# Allows to delete all elements corresponding to defined values of designated variables.
+if(NUM){
+  
+  # For each numerical variable to filter
+  for(i in 1:length(ls.num)){
+    
+    # Which metadata table is concerned
+    if(ls.num[[i]][1]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data}
+    
+    # Checking the columns and factors variables
+    numcol <- which(colnames(metadata)==ls.num[[i]][2])
+    if(length(numcol)==0) {
+      err.stock <- c(err.stock,"\n-------",
+                     "\nWarning: no '",ls.num[[i]][2],"' column detected in ",ls.num[[i]][1],
+                     " metadata!","\nFiltering impossible for this variable.\n-------\n") 
+    }else{
+      if(!is.numeric(metadata[,numcol])){
+        err.stock <- c(err.stock,"\n-------",
+                       "\nWarning: column '",ls.num[[i]][2],"' in ",ls.num[[i]][1],
+                       " metadata is not a numerical variable!",
+                       "\nNumerical filtering impossible for this variable.\n-------\n")
+      }else{
+        
+        # Filtering
+        if(ls.num[[i]][3]=="lower"){
+          toremove <- which(metadata[,numcol]<as.numeric(ls.num[[i]][4]))
+          if(length(toremove)!=0){
+            metadata <- metadata[-c(toremove),]
+          }
+        }else{if(ls.num[[i]][3]=="upper"){
+          toremove <- which(metadata[,numcol]>as.numeric(ls.num[[i]][4]))
+          if(length(toremove)!=0){
+            metadata <- metadata[-c(toremove),]
+          }
+        }else{if(ls.num[[i]][3]=="between"){
+          toremove <- (metadata[,numcol]>as.numeric(ls.num[[i]][4]))+(metadata[,numcol]<as.numeric(ls.num[[i]][5]))
+          toremove <- which(toremove==2)
+          if(length(toremove)!=0){
+            metadata <- metadata[-c(toremove),]
+          }
+        }else{if(ls.num[[i]][3]=="extremity"){
+          toremove <- c(which(metadata[,numcol]<as.numeric(ls.num[[i]][4])),
+                        which(metadata[,numcol]>as.numeric(ls.num[[i]][5])))
+          if(length(toremove)!=0){
+            metadata <- metadata[-c(toremove),]
+          }
+        }}}}
+        
+        # Extension to the tables
+        if(ls.num[[i]][1]=="sample"){
+          meta.samp.data <- metadata
+          ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))]
+        }else{
+          meta.ion.data <- metadata
+          ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),]
+        }
+        
+      }}}
+  
+} # end if(NUM)
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - -
+
+
+
+# Function 2: Filter according to factors -------------------------------------------------
+# Allows to delete all elements corresponding to selected value of designated factor.
+if(FACT){
+
+  # For each factor to filter
+  for(i in 1:length(ls.fact)){
+    
+	# Which metadata table is concerned
+	if(ls.fact[[i]][3]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data}
+	
+    # Checking the columns and factors variables
+    numcol <- which(colnames(metadata)==ls.fact[[i]][1])
+    if(length(numcol)==0) {
+    err.stock <- c(err.stock,"\n-------",
+                   "\nWarning: no '",ls.fact[[i]][1],"' column detected in ",ls.fact[[i]][3],
+                   " metadata!","\nFiltering impossible for this factor.\n-------\n") 
+    }else{
+    if((!(ls.fact[[i]][2]%in%levels(as.factor(metadata[,numcol]))))&((ls.fact[[i]][2]!="NA")|(length(which(is.na(metadata[,numcol])))==0))){
+      err.stock <- c(err.stock,"\n-------",
+                     "\nWarning: no '",ls.fact[[i]][2],"' level detected in '",
+                     ls.fact[[i]][1],"' column (",ls.fact[[i]][3]," metadata)!\n",
+					 "Filtering impossible for this factor.\n-------\n")
+    }else{
+      
+    # Filtering
+    if(length(which(metadata[,numcol]==ls.fact[[i]][2]))!=0){ #if the level still exists in the data
+      metadata <- metadata[-c(which(metadata[,numcol]==ls.fact[[i]][2])),]
+	}else{ #to treat the special case of "NA" level
+	  if(ls.fact[[i]][2]=="NA"){metadata <- metadata[-c(which(is.na(metadata[,numcol]))),]}
+	}
+	
+	# Extension to the tables
+	if(ls.fact[[i]][3]=="sample"){
+	  meta.samp.data <- metadata
+      ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))]
+	}else{
+	  meta.ion.data <- metadata
+      ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),]
+	}
+
+  }}}
+
+} # end if(FACT)
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - -
+
+
+
+
+# Check if at least one sample and one variable remain ------------------------------------
+
+if(nrow(meta.samp.data)==0){
+  stop("\n /!\\ Your filtering options lead to no more sample in your data matrix!\n",
+       "Think about reducing your number of filter.")
+}
+
+if(nrow(meta.ion.data)==0){
+  stop("\n /!\\ Your filtering options lead to no more variable in your data matrix!\n",
+       "Think about reducing your number of filter.")
+}
+
+# Output ----------------------------------------------------------------------------------
+
+# Getting back original identifiers
+id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id)
+ion.data <- id.ori$dataMatrix
+meta.samp.data <- id.ori$Metadata
+
+
+# Error checking
+if(length(err.stock)>1){
+  stop(err.stock)
+}else{
+
+write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE)
+write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE)
+write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE)
+
+}
+
+
+} # end of filters function
+
+
+# Typical function call
+#filters(ion.file.in, meta.samp.file.in, meta.ion.file.in, 
+#        NUM, ls.num, FACT, ls.fact,
+#        ion.file.out, meta.samp.file.out, meta.ion.file.out)
+