Mercurial > repos > melpetera > generic_filter
changeset 0:2c9afaf849ad draft
Uploaded
author | melpetera |
---|---|
date | Thu, 23 Feb 2017 04:39:36 -0500 |
parents | |
children | b1fa45bd2b44 |
files | GalFilter/README.txt GalFilter/RcheckLibrary.R GalFilter/filter_script.R GalFilter/filter_wrap.R GalFilter/generic_filter.xml GalFilter/miniTools.R GalFilter/static/images/Thumbs.db GalFilter/static/images/filter.png |
diffstat | 8 files changed, 839 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GalFilter/README.txt Thu Feb 23 04:39:36 2017 -0500 @@ -0,0 +1,36 @@ +## ****** global_filter environnemnt : ****** ## +# version December 2014 M Landi / M Petera + +## --- PERL compilator / libraries : --- ## +NA +-- + +## --- R bin and Packages : --- ## +$ R --version +R version 3.0.2 (2013-05-16) -- "Good Sport" +Platform: x86_64-redhat-linux-gnu (64-bit) + +The dependent libs are : +>install.packages("batch", dep=TRUE) +-- + +## --- Binary dependencies --- ## +NA +-- + +## --- Config : --- ## +NA +-- + +## --- XML HELP PART --- ## +one image : +filter.png +-- + +## --- DATASETS --- ## +No data set ! waiting for galaxy pages +-- + +## --- ??? COMMENTS ??? --- ## +!WARNING! : Its existence made obsolete the old "Filters" tool +-- \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GalFilter/RcheckLibrary.R Thu Feb 23 04:39:36 2017 -0500 @@ -0,0 +1,124 @@ +###################################################### +# R check library +# Coded by: M.Petera, +# - - +# R functions to use in R scripts +# (management of various generic subroutines) +# - - +# V0: script structure + first functions +# V1: More detailed error messages in match functions +###################################################### + + +# Generic function to return an error if problems have been encountered - - - - + +check.err <- function(err.stock){ + + # err.stock = vector of results returned by check functions + + if(length(err.stock)!=0){ stop("\n- - - - - - - - -\n",err.stock,"\n- - - - - - - - -\n") } + +} + + + + +# Table match check functions - - - - - - - - - - - - - - - - - - - - - - - - - + +# To check if dataMatrix and (variable or sample)Metadata match regarding identifiers +match2 <- function(dataMatrix, Metadata, Mtype){ + + # dataMatrix = data.frame containing dataMatrix + # Metadata = data.frame containing sampleMetadata or variableMetadata + # Mtype = "sample" or "variable" depending on Metadata content + + err.stock <- NULL # error vector + + id2 <- Metadata[,1] + if(Mtype=="sample"){ id1 <- colnames(dataMatrix)[-1] } + if(Mtype=="variable"){ id1 <- dataMatrix[,1] } + + if( length(which(id1%in%id2))!=length(id1) || length(which(id2%in%id1))!=length(id2) ){ + err.stock <- c("\nData matrix and ",Mtype," metadata do not match regarding ",Mtype," identifiers.") + if(length(which(id1%in%id2))!=length(id1)){ + if(length(which(!(id1%in%id2)))<4){ err.stock <- c(err.stock,"\n The ") + }else{ err.stock <- c(err.stock,"\n For example, the ") } + err.stock <- c(err.stock,"following identifiers found in the data matrix\n", + " do not appear in the ",Mtype," metadata file:\n") + identif <- id1[which(!(id1%in%id2))][1:min(3,length(which(!(id1%in%id2))))] + err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n") + } + if(length(which(id2%in%id1))!=length(id2)){ + if(length(which(!(id2%in%id1)))<4){ err.stock <- c(err.stock,"\n The ") + }else{ err.stock <- c(err.stock,"\n For example, the ") } + err.stock <- c(err.stock,"following identifiers found in the ",Mtype," metadata file\n", + " do not appear in the data matrix:\n") + identif <- id2[which(!(id2%in%id1))][1:min(3,length(which(!(id2%in%id1))))] + err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n") + } + err.stock <- c(err.stock,"\nPlease check your data.\n") + } + + return(err.stock) + +} + +# To check if the 3 standard tables match regarding identifiers +match3 <- function(dataMatrix, sampleMetadata, variableMetadata){ + + # dataMatrix = data.frame containing dataMatrix + # sampleMetadata = data.frame containing sampleMetadata + # variableMetadata = data.frame containing variableMetadata + + err.stock <- NULL # error vector + + id1 <- colnames(dataMatrix)[-1] + id2 <- sampleMetadata[,1] + id3 <- dataMatrix[,1] + id4 <- variableMetadata[,1] + + if( length(which(id1%in%id2))!=length(id1) || length(which(id2%in%id1))!=length(id2) ){ + err.stock <- c(err.stock,"\nData matrix and sample metadata do not match regarding sample identifiers.") + if(length(which(id1%in%id2))!=length(id1)){ + if(length(which(!(id1%in%id2)))<4){ err.stock <- c(err.stock,"\n The ") + }else{ err.stock <- c(err.stock,"\n For example, the ") } + err.stock <- c(err.stock,"following identifiers found in the data matrix\n", + " do not appear in the sample metadata file:\n") + identif <- id1[which(!(id1%in%id2))][1:min(3,length(which(!(id1%in%id2))))] + err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n") + } + if(length(which(id2%in%id1))!=length(id2)){ + if(length(which(!(id2%in%id1)))<4){ err.stock <- c(err.stock,"\n The ") + }else{ err.stock <- c(err.stock,"\n For example, the ") } + err.stock <- c(err.stock,"following identifiers found in the sample metadata file\n", + " do not appear in the data matrix:\n") + identif <- id2[which(!(id2%in%id1))][1:min(3,length(which(!(id2%in%id1))))] + err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n") + } + } + + if( length(which(id3%in%id4))!=length(id3) || length(which(id4%in%id3))!=length(id4) ){ + err.stock <- c(err.stock,"\nData matrix and variable metadata do not match regarding variable identifiers.") + if(length(which(id3%in%id4))!=length(id3)){ + if(length(which(!(id3%in%id4)))<4){ err.stock <- c(err.stock,"\n The ") + }else{ err.stock <- c(err.stock,"\n For example, the ") } + err.stock <- c(err.stock,"following identifiers found in the data matrix\n", + " do not appear in the variable metadata file:\n") + identif <- id3[which(!(id3%in%id4))][1:min(3,length(which(!(id3%in%id4))))] + err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n") + } + if(length(which(id4%in%id3))!=length(id4)){ + if(length(which(!(id4%in%id3)))<4){ err.stock <- c(err.stock,"\n The ") + }else{ err.stock <- c(err.stock,"\n For example, the ") } + err.stock <- c(err.stock,"following identifiers found in the variable metadata file\n", + " do not appear in the data matrix:\n") + identif <- id4[which(!(id4%in%id3))][1:min(3,length(which(!(id4%in%id3))))] + err.stock <- c(err.stock," ",paste(identif,collapse="\n "),"\n") + } + } + + if(length(err.stock)!=0){ err.stock <- c(err.stock,"\nPlease check your data.\n") } + + return(err.stock) + +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GalFilter/filter_script.R Thu Feb 23 04:39:36 2017 -0500 @@ -0,0 +1,226 @@ +################################################################################################ +# GENERIC FILTERS # +# # +# User: Galaxy # +# Starting date: 03-09-2014 # +# V-1.0: Restriction of old filter script to Filter according to factors # +# V-1.1: Choice of metadata table for filtering added ; data check added ; handling of NA ; # +# check for minimum remaining data # +# V-1.2: Minor modifications in script layout # +# V-2.0: Addition of numerical filter # +# V-2.1: Handling special characters # +# # +# # +# Input files: dataMatrix ; sampleMetadata ; variableMetadata # +# Output files: dataMatrix ; sampleMetadata ; variableMetadata # +# # +################################################################################################ + +# Parameters (for dev) +if(FALSE){ + + ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file + meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file + meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file + + ion.file.out <- "test/ressources/outputs/ex_data_IONS_fl.txt" #tab file + meta.samp.file.out <- "test/ressources/outputs/ex_data_PROTOCOLE1_fl.txt" #tab file + meta.ion.file.out <- "test/ressources/outputs/ex_data_METAION_fl.txt" #tab file + +NUM <- TRUE ; if(NUM){ls.num<-list(c("sample","injectionOrder","upper","20"),c("variable","var1","extremity","0.12","500"))}else{ls.num<-NULL} + +FACT <- TRUE ; if(FACT){ls.fact<-list(c("centre","C","sample"),c("var2","A","variable"))}else{ls.fact<-NULL} + +} + +filters <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in, + NUM, ls.num, FACT, ls.fact, + ion.file.out, meta.samp.file.out, meta.ion.file.out){ + # This function allows to filter variables and samples according to factors or numerical values. + # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata. + # It generates 3 new datasets corresponding to the 3 inputs filtered. + # + # Parameters: + # - xxx.in: input files' access + # - xxx.out: output files' access + # - NUM: filter according to numerical variables yes/no + # | > ls.num: numerical variables' list for filter + # - FACT: filter according to factors yes/no + # | > ls.fact: factors' list for filter + + +# Input ----------------------------------------------------------------------------------- + +ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE) +meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE) +meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE) + +# Error vector +err.stock <- "\n" + + +# Table match check +table.check <- match3(ion.data,meta.samp.data,meta.ion.data) +check.err(table.check) + +# StockID +samp.id <- stockID(ion.data,meta.samp.data,"sample") +ion.data <- samp.id$dataMatrix +meta.samp.data <- samp.id$Metadata +samp.id <- samp.id$id.match + + + +# Function 1: Filter according to numerical variables ------------------------------------- +# Allows to delete all elements corresponding to defined values of designated variables. +if(NUM){ + + # For each numerical variable to filter + for(i in 1:length(ls.num)){ + + # Which metadata table is concerned + if(ls.num[[i]][1]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data} + + # Checking the columns and factors variables + numcol <- which(colnames(metadata)==ls.num[[i]][2]) + if(length(numcol)==0) { + err.stock <- c(err.stock,"\n-------", + "\nWarning: no '",ls.num[[i]][2],"' column detected in ",ls.num[[i]][1], + " metadata!","\nFiltering impossible for this variable.\n-------\n") + }else{ + if(!is.numeric(metadata[,numcol])){ + err.stock <- c(err.stock,"\n-------", + "\nWarning: column '",ls.num[[i]][2],"' in ",ls.num[[i]][1], + " metadata is not a numerical variable!", + "\nNumerical filtering impossible for this variable.\n-------\n") + }else{ + + # Filtering + if(ls.num[[i]][3]=="lower"){ + toremove <- which(metadata[,numcol]<as.numeric(ls.num[[i]][4])) + if(length(toremove)!=0){ + metadata <- metadata[-c(toremove),] + } + }else{if(ls.num[[i]][3]=="upper"){ + toremove <- which(metadata[,numcol]>as.numeric(ls.num[[i]][4])) + if(length(toremove)!=0){ + metadata <- metadata[-c(toremove),] + } + }else{if(ls.num[[i]][3]=="between"){ + toremove <- (metadata[,numcol]>as.numeric(ls.num[[i]][4]))+(metadata[,numcol]<as.numeric(ls.num[[i]][5])) + toremove <- which(toremove==2) + if(length(toremove)!=0){ + metadata <- metadata[-c(toremove),] + } + }else{if(ls.num[[i]][3]=="extremity"){ + toremove <- c(which(metadata[,numcol]<as.numeric(ls.num[[i]][4])), + which(metadata[,numcol]>as.numeric(ls.num[[i]][5]))) + if(length(toremove)!=0){ + metadata <- metadata[-c(toremove),] + } + }}}} + + # Extension to the tables + if(ls.num[[i]][1]=="sample"){ + meta.samp.data <- metadata + ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))] + }else{ + meta.ion.data <- metadata + ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),] + } + + }}} + +} # end if(NUM) +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - + + + +# Function 2: Filter according to factors ------------------------------------------------- +# Allows to delete all elements corresponding to selected value of designated factor. +if(FACT){ + + # For each factor to filter + for(i in 1:length(ls.fact)){ + + # Which metadata table is concerned + if(ls.fact[[i]][3]=="sample"){metadata <- meta.samp.data}else{metadata <- meta.ion.data} + + # Checking the columns and factors variables + numcol <- which(colnames(metadata)==ls.fact[[i]][1]) + if(length(numcol)==0) { + err.stock <- c(err.stock,"\n-------", + "\nWarning: no '",ls.fact[[i]][1],"' column detected in ",ls.fact[[i]][3], + " metadata!","\nFiltering impossible for this factor.\n-------\n") + }else{ + if((!(ls.fact[[i]][2]%in%levels(as.factor(metadata[,numcol]))))&((ls.fact[[i]][2]!="NA")|(length(which(is.na(metadata[,numcol])))==0))){ + err.stock <- c(err.stock,"\n-------", + "\nWarning: no '",ls.fact[[i]][2],"' level detected in '", + ls.fact[[i]][1],"' column (",ls.fact[[i]][3]," metadata)!\n", + "Filtering impossible for this factor.\n-------\n") + }else{ + + # Filtering + if(length(which(metadata[,numcol]==ls.fact[[i]][2]))!=0){ #if the level still exists in the data + metadata <- metadata[-c(which(metadata[,numcol]==ls.fact[[i]][2])),] + }else{ #to treat the special case of "NA" level + if(ls.fact[[i]][2]=="NA"){metadata <- metadata[-c(which(is.na(metadata[,numcol]))),]} + } + + # Extension to the tables + if(ls.fact[[i]][3]=="sample"){ + meta.samp.data <- metadata + ion.data <- ion.data[,c(1,which(colnames(ion.data)%in%meta.samp.data[,1]))] + }else{ + meta.ion.data <- metadata + ion.data <- ion.data[which(ion.data[,1]%in%meta.ion.data[,1]),] + } + + }}} + +} # end if(FACT) +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -- - - - - - - - - - + + + + +# Check if at least one sample and one variable remain ------------------------------------ + +if(nrow(meta.samp.data)==0){ + stop("\n /!\\ Your filtering options lead to no more sample in your data matrix!\n", + "Think about reducing your number of filter.") +} + +if(nrow(meta.ion.data)==0){ + stop("\n /!\\ Your filtering options lead to no more variable in your data matrix!\n", + "Think about reducing your number of filter.") +} + +# Output ---------------------------------------------------------------------------------- + +# Getting back original identifiers +id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id) +ion.data <- id.ori$dataMatrix +meta.samp.data <- id.ori$Metadata + + +# Error checking +if(length(err.stock)>1){ + stop(err.stock) +}else{ + +write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) +write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE) +write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) + +} + + +} # end of filters function + + +# Typical function call +#filters(ion.file.in, meta.samp.file.in, meta.ion.file.in, +# NUM, ls.num, FACT, ls.fact, +# ion.file.out, meta.samp.file.out, meta.ion.file.out) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GalFilter/filter_wrap.R Thu Feb 23 04:39:36 2017 -0500 @@ -0,0 +1,63 @@ +#!/usr/bin/Rscript --vanilla --slave --no-site-file + +################################################################################################ +# WRAPPER FOR filter_script.R (GENERIC FILTERS) # +# # +# Author: Melanie PETERA based on Marion LANDI's filters' wrapper # +# User: Galaxy # +# Original data: used with filter_script.R # +# Starting date: 04-09-2014 # +# V-1: Restriction of old filter wrapper to Filter according to factors # +# V-1.1: Modification to allow the choice of meta-data table for filtering # +# V-2: Addition of numerical filter # +# # +# # +# Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt # +# Output files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt # +# # +################################################################################################ + + +library(batch) #necessary for parseCommandArgs function +args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects + +source_local <- function(...){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))} +} +#Import the different functions +source_local("filter_script.R","RcheckLibrary.R","miniTools.R") + + +if(length(args) < 8){ stop("NOT enough argument !!!") } + +list_num <- NULL +if(!is.null(args$parm_col)){ + for( i in which(names(args)=="num_file") ){ + if(args[[i+2]] %in% c("lower","upper")){ + list_num <- c(list_num, list(c(args[[i]], args[[i+1]], args[[i+2]],args[[i+3]]))) + } + if(args[[i+2]] %in% c("between","extremity")){ + list_num <- c(list_num, list(c(args[[i]], args[[i+1]], args[[i+2]],args[[i+3]],args[[i+4]]))) + } + } +} + +list_fact <- NULL +if(!is.null(args$factor_col)){ + for( i in which(names(args)=="qual_file") ){ + list_fact <- c(list_fact, list(c(args[[i+1]], args[[i+2]], args[[i]]))) + } +} + +filters(args$dataMatrix_in, args$sampleMetadata_in, args$variableMetadata_in, + args$Numeric, list_num, args$Factors, list_fact, + args$dataMatrix_out, args$sampleMetadata_out, args$variableMetadata_out) + +#filters(ion.file.in, meta.samp.file.in, meta.ion.file.in, +# NUM, ls.num, FACT, ls.fact, +# ion.file.out, meta.samp.file.out, meta.ion.file.out) + +#delete the parameters to avoid the passage to the next tool in .RData image +rm(args)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GalFilter/generic_filter.xml Thu Feb 23 04:39:36 2017 -0500 @@ -0,0 +1,257 @@ +<tool id="generic_filter" name="Generic_Filter" version="2016.03.03"> + <description>Removes elements according to numerical or qualitative values</description> + <command interpreter="Rscript"> + filter_wrap.R + dataMatrix_in "$dataMatrix_in" + sampleMetadata_in "$sampleMetadata_in" + variableMetadata_in "$variableMetadata_in" + + Numeric "${numeric_condition.Factors}" + #if str($numeric_condition.Factors) == 'TRUE': + #for $i in $numeric_condition.numeric_repeat: + num_file "${i.num_file}" + parm_col "${i.parm_col_num}" + Interval "${i.interval_condition.interval}" + #if str($i.interval_condition.interval) == 'lower': + low_value "${i.interval_condition.low_value}" + #elif str($i.interval_condition.interval) == 'upper': + up_value "${i.interval_condition.up_value}" + #elif str($i.interval_condition.interval) == 'between': + low_value "${i.interval_condition.low_value}" + up_value "${i.interval_condition.up_value}" + #elif str($i.interval_condition.interval) == 'extremity': + low_value "${i.interval_condition.low_value}" + up_value "${i.interval_condition.up_value}" + #end if + #end for + #end if + + Factors "${qualitative_condition.Factors}" + #if str($qualitative_condition.Factors) == 'TRUE': + #for $i in $qualitative_condition.qualitative_repeat: + qual_file "${i.qual_file}" + factor_col "${i.factor_col}" + factors_value "${i.factors_value}" + #end for + #end if + + dataMatrix_out "$dataMatrix_out" + sampleMetadata_out "$sampleMetadata_out" + variableMetadata_out "$variableMetadata_out" + </command> + + <inputs> + <param name="dataMatrix_in" type="data" label="Data Matrix file" help="" format="tabular" /> + <param name="sampleMetadata_in" type="data" label="Sample metadata file" help="" format="tabular" /> + <param name="variableMetadata_in" type="data" label="Variable metadata file" help="" format="tabular" /> + + <conditional name="numeric_condition"> + <param name="Factors" type="select" label="Deleting samples and/or variables according to Numerical values" help=""> + <option value="FALSE">no</option> + <option value="TRUE">yes</option> + </param> + <when value="TRUE"> + <repeat name="numeric_repeat" title="Identify the parameter to filter " min="1"> + <param name="num_file" label="On file" type="select" display="radio" help=""> + <option value="sample">Sample metadata</option> + <option value="variable">Variable metadata</option> + </param> + <param name="parm_col_num" label="Name of the column to filter" type="text" value="" help="" /> + <conditional name="interval_condition"> + <param name="interval" type="select" label="Interval of values to remove" help=""> + <option value="lower">lower</option> + <option value="upper">upper</option> + <option value="between">between</option> + <option value="extremity">extremity</option> + </param> + <when value="lower"> + <param name="low_value" type="float" label="Remove all values lower than" optional="true" value="0" help=""/> + </when> + <when value="upper"> + <param name="up_value" type="float" label="Remove all values upper than" optional="true" value="0" help=""/> + </when> + <when value="between"> + <param name="low_value" type="float" label="Remove all values between" optional="true" value="0" help=""/> + <param name="up_value" type="float" label="And" optional="true" value="1" help=""/> + </when> + <when value="extremity"> + <param name="low_value" type="float" label="Remove all values lower than" optional="true" value="0" help=""/> + <param name="up_value" type="float" label="And upper than" optional="true" value="1" help=""/> + </when> + </conditional> + </repeat> + </when> + <when value="FALSE"> + </when> + </conditional> + + <conditional name="qualitative_condition"> + <param name="Factors" type="select" label="Deleting samples and/or variables according to Qualitative values" help=""> + <option value="FALSE">no</option> + <option value="TRUE">yes</option> + </param> + <when value="TRUE"> + <repeat name="qualitative_repeat" title="Removing a level in factor" min="1"> + <param name="qual_file" label="On file" type="select" display="radio" help=""> + <option value="sample">Sample metadata</option> + <option value="variable">Variable metadata</option> + </param> + <param name="factor_col" label="Name of the column to filter" type="text" value="" help="" /> + <param name="factors_value" type="text" label="Remove factor when" help="" value="" /> + </repeat> + </when> + <when value="FALSE"> + </when> + </conditional> + </inputs> + + <outputs> + <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data> + <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data> + <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data> + </outputs> + + <help> + +.. class:: infomark + +**Authors** Marion Landi and Melanie Petera + +--------------------------------------------------- + +============== +Generic_Filter +============== + +----------- +Description +----------- + + | Allows to remove all samples and/or variables corresponding to specific values regarding designated factors or numerical variables. + | + + +----------------- +Workflow position +----------------- + + +.. image:: filter.png + :width: 800 + + + +----------- +Input files +----------- + ++----------------------------+------------+ +| Parameter : num + label | Format | ++============================+============+ +| 1 : Data Matrix file | tabular | ++----------------------------+------------+ +| 2 : Sample metadata file | tabular | ++----------------------------+------------+ +| 3 : Variable metadata file | tabular | ++----------------------------+------------+ + +| +| Missing values in numerical columns of data must be coded NA if you want to use the Numerical filter on them. + + +---------- +Parameters +---------- + +Deleting samples and/or variables according to Numerical values: + | If 'yes' (not default): execution deletes all samples or variables (according to selection) for which the designated + | numerical parameter ("Name of the column to remove" field) equals the selected range of values (depending on the "Interval of values to remove" chosen). + | To delete multiple independent intervals of value from the same parameter, it is necessary to add as many times as necessary + | the corresponding column via the button "Add new Identify the parameter to filter". + +| *On file (only if 'yes')* +| Choice between sample and variable filtering (referring to the input file containing the factor). +| + +| *Name of the column to filter (only if 'yes')* +| Column name of the numerical parameter from which we want to remove a range of values. +| + +| *Interval of values to remove (only if 'yes')* +| Allows to choose which type of interval should be removed: +| - lower: removes all values below a specified value +| - upper: removes all values above a specified value +| - between: removes all values both greater than a specified value and lower than another (upper) one +| - extremity: removes all values that are either lower than a specified value or greater than another (upper) one +| + +| *'Remove all values' + interval chosen (only if 'yes')* +| Numerical value(s) used as threshold, depending on the type of interval chosen above. +| Only one value for 'lower' and 'upper'; two values for 'between' and 'extremity'. +| + +| *Notes:* +| - these parameters can be used several times using the "Add new Identify the parameter to filter" button +| - Numerical filter does not remove NA values; if you want to suppress them, use the Qualitative filter below +| + + +Deleting samples and/or variables according to Qualitative values + | If 'yes' (not default): execution deletes all data samples or variables (according to selection) for which the designated + | factor ("Name of the column to remove" field) equals the selected level ("Remove factor when" field). + | To delete multiple values of the same factor, it is necessary to add as many times as necessary the corresponding factor + | via the button "Add new Removing a level in factor". + +| *On file (only if 'yes')* +| Choice between sample and variable filtering (referring to the input file containing the factor). +| + +| *Name of the column to filter (only if 'yes')* +| Column name of the factor from which we want to remove a level. +| + +| *Remove factor when (only if 'yes')* +| Value that you want to delete from the selected column. +| Each line with that value of designated factor will be deleted. +| + +| *Notes:* +| - these parameters can be used several times using the "Add new Removing a level in factor" button +| - this filter can be used to filter NA in numerical parameters, or to remove a specific numerical value +| + + +------------ +Output files +------------ + + +dataMatrix + | tabular output + | Identical to the input Data Matrix file with the sample columns and variable lines deleted according to specified filters + | + +sampleMetadata + | tabular output + | Identical to the input Sample metadata file with the sample lines deleted according to specified filters + | + +variableMetadata + | tabular output + | Identical to the input Variable metadata file with the variable lines deleted according to specified filters + | + + +--------------------------------------------------- + +--------------- +Working example +--------------- + + +.. class:: warningmark + +Soon see the corresponding "Page" + + </help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GalFilter/miniTools.R Thu Feb 23 04:39:36 2017 -0500 @@ -0,0 +1,133 @@ +##################################################### +# Mini tools for Galaxy scripting +# Coded by: M.Petera, +# - - +# R functions to use in R scripts and wrappers +# to make things easier (lightening code, reducing verbose...) +# - - +# V0: script structure + first functions +# V1: addition of functions to handle special characters in identifiers +##################################################### + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# Function to call packages without printing all the verbose +# (only getting the essentials, like warning messages for example) + +shyLib <- function(...){ + for(i in 1:length(list(...))){ + suppressPackageStartupMessages(library(list(...)[[i]],character.only=TRUE)) + } +} + +#example: shyLib("xcms","pcaMethods") + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# Fonction pour sourcer les scripts R requis +# /!\ ATTENTION : actuellement la fonction n'est pas chargee au lancement du script, +# il faut donc la copier-coller dans le wrapper R pour pouvoir l'utiliser. + +if(FALSE){ +source_local <- function(...){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + for(i in 1:length(list(...))){ + source(paste(base_dir, list(...)[[i]], sep="/")) + } +} +} + +#example: source_local("filter_script.R","RcheckLibrary.R") + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# Functions to stock identifiers before applying make.names() and +# to reinject it into final matrices +# Note: it reproduces the original order of datasets' identifiers +# - - - +# stockID: stocks original identifiers and original order +# -> needs checked data regarding table match +# reproduceID: reinjects original identifiers and original order into final tables +# -> function to be used at the very end, when exporting tables + +stockID <- function(dataMatrix, Metadata, Mtype){ + # dataMatrix = data.frame containing dataMatrix + # Metadata = data.frame containing sampleMetadata or variableMetadata + # Mtype = "sample" or "variable" depending on Metadata content + cname <- colnames(dataMatrix)[1] + # dataMatrix temporary-stock + transfo - - - - + if(Mtype=="sample"){ + id.ori <- colnames(dataMatrix)[-1] + colnames(dataMatrix) <- make.names(colnames(dataMatrix)) + } + if(Mtype=="variable"){ + id.ori <- dataMatrix[,1] + dataMatrix[,1] <- make.names(dataMatrix[,1]) + } + # global stock - - - - - - - - - - - - - - - - + id.new <- data.frame(order.ori=c(1:length(Metadata[,1])),Metadata[,1], + id.new=make.names(Metadata[,1]),id.ori, + id.new.DM=make.names(id.ori),stringsAsFactors=FALSE) + colnames(id.new)[c(2,4)] <- c(colnames(Metadata)[1],cname) + # Metadata transfo + returning data - - - - - + Metadata[,1] <- make.names(Metadata[,1]) + return(list(id.match=id.new, dataMatrix=dataMatrix, Metadata=Metadata)) +} +#example: A<-stockID(myDM,mysM,"sample") ; myDM<-A$dataMatrix ; mysM<-A$Metadata ; A<-A$id.match + +reproduceID <- function(dataMatrix, Metadata, Mtype, id.match){ + # dataMatrix = data.frame containing dataMatrix + # Metadata = data.frame containing sampleMetadata or variableMetadata + # Mtype = "sample" or "variable" depending on Metadata content + # id.match = 'id.match' element produced by stockID + #Metadada - - - - - - - - - - - - - - + temp.table <- id.match[,c(1,2,3)] + ## Removing deleted rows + for(i in 1:(dim(id.match)[1])){ + if(!(temp.table[i,3]%in%Metadata[,1])){temp.table[i,1] <- 0} + } + if(length(which(temp.table[,1]==0))!=0){ + temp.table <- temp.table[-c(which(temp.table[,1]==0)),] + } + ## Restoring original identifiers and order + temp.table <- merge(x=temp.table,y=Metadata,by.x=3,by.y=1) + temp.table <- temp.table[order(temp.table$order.ori),] + Metadata <- temp.table[,-c(1,2)] + rownames(Metadata) <- NULL + #dataMatrix - - - - - - - - - - - - - + rownames(dataMatrix)<-dataMatrix[,1] + if(Mtype=="sample"){ + dataMatrix <- t(dataMatrix[,-1]) + } + temp.table <- id.match[,c(1,4,5)] + ## Removing deleted rows + for(i in 1:(dim(id.match)[1])){ + if(!(temp.table[i,3]%in%rownames(dataMatrix))){temp.table[i,1] <- 0} + } + if(length(which(temp.table[,1]==0))!=0){ + temp.table <- temp.table[-c(which(temp.table[,1]==0)),] + } + ## Restoring original identifiers and order + temp.table <- merge(x=temp.table,y=dataMatrix,by.x=3,by.y=0) + temp.table <- temp.table[order(temp.table$order.ori),] + if(Mtype=="variable"){ + dataMatrix <- temp.table[,-c(1,2,4)] + colnames(dataMatrix)[1] <- colnames(id.match)[4] + } else { + rownames(temp.table) <- temp.table[,3] + temp.table <- t(temp.table[,-c(1,2,3)]) + dataMatrix <- data.frame(rownames(temp.table),temp.table) + colnames(dataMatrix)[1] <- colnames(id.match)[4] + } + rownames(dataMatrix) <- NULL + # return datasets - - - - - - - - - - - + return(list(dataMatrix=dataMatrix, Metadata=Metadata)) +} +#example: B<-reproduceID(myDM,mysM,"sample",A) ; myDM<-B$dataMatrix ; mysM<-B$Metadata + + + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -