# HG changeset patch # User proteomisc # Date 1701424698 0 # Node ID fda6b789e26727bbef8865a551f8665f3e0b5086 Uploaded diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Make_Design.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Make_Design.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,63 @@ +options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) +sink(stdout(), type = "message") +suppressWarnings(suppressMessages(library("batch"))) +suppressWarnings(suppressMessages(library(tools))) +listArguments = parseCommandArgs(evaluate=FALSE) +print(listArguments) +technology = listArguments[["technology"]] +designway = listArguments[["designway"]] +listArguments[["designway"]]<-NULL +listArguments[["technology"]]<-NULL +listcelsfullpath=c() +listfullnames=c() +listcelsnames=c("sample") +listgroup=c("group") +designpath ="" +designo=c() +currentgroup="" +ingroup=F +names(listArguments) +if(designway=="makeit"){ + for (name in names(listArguments)) { + print(name) + if(!is.na(pmatch("group",name))){ + currentgroup=listArguments[[name]] + } + if(!is.na(pmatch("rank",name))){ + listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]]))) + listfullnames=c(listfullnames,basename(listArguments[[name]])) + listgroup=c(listgroup,currentgroup) + } + if(!is.na(pmatch("file",name))){ + + listcelsfullpath=c(listcelsfullpath,listArguments[[name]]) + } + + } + write.table(format(cbind(listcelsnames,listgroup), justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=F,file="Imported.DataSet.Design.tsv") + designo <- as.data.frame(cbind(listcelsnames[-1],listgroup[-1],deparse.level = 0),colnames=c("sample","group")) + colnames(designo)<-c("sample","group") + + +}else{ + + designpath =listArguments[["designfile"]] + listArguments[["designfile"]]<-NULL + designo <- read.table(designpath,header = TRUE) + for (name in names(listArguments)) { + if(!is.na(pmatch("rank",name))){ + listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]]))) + listfullnames=c(listfullnames,basename(listArguments[[name]])) + } + if(!is.na(pmatch("file",name))){ + listcelsfullpath=c(listcelsfullpath,listArguments[[name]]) + } + } +} +rm(listArguments) +save.image(paste("Imported.Project.Information","RData",sep=".")) + + + + diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Make_Design_Read_Datasets.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Make_Design_Read_Datasets.xml Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,145 @@ + + Create a design file and collect technology information and read datasets. + + citations.xml + + + r-base + r-batch + bioconductor-affy + bioconductor-affyPLM + bioconductor-limma + + + + + + + + + + + + + + + + + + + + + ' ' not in value + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**Authors** T.Bensellak, B.Ettetuani. + +--------------------------------------------------- + +======================================================== +Make design and read dataSets +======================================================== + +----------- +Description +----------- + +Creates design and store needed information about datasets, it also reads choosen dataset in order to process. Dataset can be imported or selected from available datasets.. + +----------------- +Workflow position +----------------- + +**Upstream tools** + ++---------------------------+--------------------+-----------+ +| Name | output file |format | ++===========================+====================+===========+ +| Query GEO or upload files | Raw files | Cel,Gpr,Gz| ++---------------------------+--------------------+-----------+ + + + +**Downstream tools** + ++-------------------------------------------+----------------------------------+----------+ +| Name | Output file | Format | ++===========================================+==================================+==========+ +|Preprocess.DataSet.Microarray | Preprocess.Project.Data.RData | RData | ++-------------------------------------------+----------------------------------+----------+ + + +----------- +Input files +----------- + ++---------------------------+-----------------+ +| Parameter : num + label | Format | ++===========================+=================+ +| Raw Files | CEL,GPR,GZ | ++---------------------------+-----------------+ +| Design | Tabular | ++---------------------------+-----------------+ +| Technology | String | ++---------------------------+-----------------+ + +------------ +Output files +------------ + +**DataSet.Design.tsv** + +**Read.Project.Data.RData** + +------------------------------ +General schema of the workflow +------------------------------ + +https://bensellak.github.io/microarrays-galaxy/ + + + + + diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/ReadDataSet.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/ReadDataSet.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,81 @@ +options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) +sink(stdout(), type = "message") +suppressWarnings(suppressMessages(library("batch"))) +suppressWarnings(suppressMessages(library(affy))) +suppressWarnings(suppressMessages(library(affyPLM))) +suppressWarnings(suppressMessages(library(limma))) +source_local <- function(fname){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) +} +source_local("Read_GenePix_Functions.R") +source_local("Read_Affymetrix_Functions.R") +source_local("Read_Agilent_Functions.R") +listArguments = parseCommandArgs(evaluate=FALSE) +datasetsource = listArguments[["datasetsource"]] +listArguments[["datasetsource"]] = NULL +if(datasetsource=="intern"){ + if (!is.null(listArguments[["image"]])){ + load(listArguments[["image"]]) + listArguments[["image"]]=NULL + } + dataset=listArguments[["projectchoice"]] + colus=as.integer(dataset) + datachosen=listdataset[colus] + listArguments[["projectchoice"]]=NULL + if(technology=="Affymetrix"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadAffymetrix",listArguments) + } + if(technology=="Agilent_One_Color"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadAgilentOneChannel",listArguments) + } + if(technology=="Agilent_Two_Colors"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadAgilentTwoChannels",listArguments) + } + if(technology=="GenePix_One_Color"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadGenePixOneChannel",listArguments) + } + if(technology=="GenePix_Two_Colors"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadGenePixTwoChannels",listArguments) + } +}else{ + if (!is.null(listArguments[["imageimported"]])){ + load(listArguments[["imageimported"]]) + listArguments[["imageimported"]]=NULL + } + listArguments<-append(listArguments,list(listcelsfullpath=listcelsfullpath,listfullnames=listfullnames)) + if(technology=="Affymetrix"){ + listArguments[["listfullnames"]]=NULL + MicroArray_Object<-do.call("ReadAffymetrixImported",listArguments) + } + if(technology=="Agilent_One_Color"){ + MicroArray_Object<-do.call("ReadAgilentOneChannelImported",listArguments) + } + if(technology=="Agilent_Two_Colors"){ + MicroArray_Object<-do.call("ReadAgilentTwoChannelsImported",listArguments) + } + if(technology=="GenePix_One_Color"){ + MicroArray_Object<-do.call("ReadGenePixOneChannelImported",listArguments) + } + if(technology=="GenePix_Two_Colors"){ + MicroArray_Object<-do.call("ReadGenePixTwoChannelsImported",listArguments) + } + +} + + +rm(listArguments) +save.image(paste("MicroArrayObject","RData",sep=".")) + + + + + + + diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Read_Affymetrix_Functions.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Read_Affymetrix_Functions.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,13 @@ +ReadAffymetrix<-function(path="",compressed=TRUE){ + + affy_object<-ReadAffy(celfile.path=paste(path,"/data/rawfiles",sep=""),compress=compressed) + designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE) + write.table(format(designo, justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=T,file="design.tsv") + return(list(affy_object=affy_object,designo=designo)) +} +ReadAffymetrixImported<-function(listcelsfullpath="",compressed=TRUE){ + + affy_object<-ReadAffy(filenames=listcelsfullpath,compress=compressed) + return(list(affy_object=affy_object)) +} diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Read_Agilent_Functions.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Read_Agilent_Functions.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,42 @@ +getAgilFiles <- function(path) +{ + files<-list.files((paste(path,"/data/rawfiles",sep=""))) + AgilFiles <<- files[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))] + designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE) + write.table(format(designo, justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=T,file="design.tsv") + return(list(AgilFiles=AgilFiles,designo=designo)) +} # reading agilent files +ReadAgilentOneChannel <- function(path) +{ + files <<- getAgilFiles(path) + RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep=""),green.only=T) + return(list(RFile=RFile,designo=files[[2]])) +} # reading agilent one channel files + +ReadAgilentTwoChannels <- function(path) +{ + files <<- getAgilFiles(path) + RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep="")) + return(list(RFile=RFile,designo=files[[2]])) +} +getAgilFilesImported <- function(listcelsfullpath,listfullnames) +{ + files<-listfullnames + #AgilFiles <<- listcelsfullpath[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))] + AgilFiles <<- listcelsfullpath + return(list(AgilFiles=AgilFiles)) +} # reading agilent files +ReadAgilentOneChannelImported <- function(listcelsfullpath,listfullnames) +{ + files <<- getAgilFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="agilent",green.only=T) + return(list(RFile=RFile)) +} # reading agilent one channel files + +ReadAgilentTwoChannelsImported <- function(listcelsfullpath,listfullnames) +{ + files <<- getAgilFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="agilent") + return(list(RFile=RFile)) +} diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Read_GenePix_Functions.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Read_GenePix_Functions.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,63 @@ +getGprFiles <- function(path) +{ + files<-(dir(paste(path,"/data/rawfiles",sep=""))) + gprFiles <<- files[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))] + designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE) + write.table(format(designo, justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=T,file="design.tsv") + return(list(gprFiles=gprFiles,designo=designo)) +} # reading (*.gpr) files + + +getGalFiles <- function(path) +{ + files<-(dir(paste(path,"/data/rawfiles",sep=""))) + galFiles <- files[c(grep("*.GAL$", files),grep("*.gal$", files))] + return(galFiles) +} # extracting (*.gal) files names + +ReadGenePixTwoChannels<- function(path) +{ + files <<- getGprFiles(path) + RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep="")) + return(list(RFile=RFile,designo=files[[2]])) +} # reading files + +ReadGenePixOneChannel<- function(path) +{ + files <<- getGprFiles(path) + RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep=""),green.only = T) + return(list(RFile=RFile,designo=files[[2]])) + +} +getGprFilesImported <- function(listcelsfullpath,listfullnames) +{ + files<-listfullnames + #gprFiles <<- listcelsfullpath[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))] + gprFiles <<- listcelsfullpath + return(list(gprFiles=gprFiles)) +} # reading (*.gpr) files + + +getGalFilesImported <- function(listcelsfullpath,listfullnames) +{ + files<-listfullnames + #galFiles <- listcelsfullpath[c(grep("*.GAL$", files),grep("*.gal$", files))] + galFiles <- listcelsfullpath + return(galFiles) +} # extracting (*.gal) files names + +ReadGenePixTwoChannelsImported<- function(listcelsfullpath,listfullnames) +{ + files <<- getGprFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="genepix") + return(list(RFile=RFile)) +} # reading files + +ReadGenePixOneChannelImported<- function(listcelsfullpath,listfullnames) +{ + files <<- getGprFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="genepix",green.only = T) + return(list(RFile=RFile)) + +} diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Readme.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Readme.txt Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,1 @@ +Galaxy tool for making a design file and reading datasets as a binary rdata for the preprocessing step diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/citations.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/citations.xml Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,66 @@ + + 1.0 + + + + @Manual{, + title = {R: A Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2017}, + url = {https://www.R-project.org/}, + } + + + @Article{, + title = {Passing in Command Line Arguments and Parallel Cluster/Multicore Batching in {R} with {batch}}, + author = {Thomas J. Hoffmann}, + journal = {Journal of Statistical Software, Code Snippets}, + year = {2011}, + volume = {39}, + number = {1}, + pages = {1--11}, + url = {http://www.jstatsoft.org/v39/c01/}, + } + + + @Article{, + author = {Laurent Gautier and Leslie Cope and Benjamin M. Bolstad and Rafael A. Irizarry}, + title = {affy---analysis of Affymetrix GeneChip data at the probe level}, + journal = {Bioinformatics}, + volume = {20}, + number = {3}, + year = {2004}, + issn = {1367-4803}, + pages = {307--315}, + doi = {10.1093/bioinformatics/btg405}, + publisher = {Oxford University Press}, + address = {Oxford, UK}, + } + + + @Article{, + author = {Matthew E Ritchie and Belinda Phipson and Di Wu and Yifang Hu and Charity W Law and Wei Shi and Gordon K Smyth}, + title = {{limma} powers differential expression analyses for {RNA}-sequencing and microarray studies}, + journal = {Nucleic Acids Research}, + year = {2015}, + volume = {43}, + number = {7}, + pages = {e47}, + } + + + @Article{, + title = {Quality assessment for short oligonucleotide arrays.}, + author = {Julia Brettschneider and Francois Collin and Benjamin M Bolstad and Terence P Speed}, + journal = {Technometrics}, + year = {2007}, + volume = {In press}, + } + + + + + + diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/images/workflow.jpg Binary file read_make_design_datasets/images/workflow.jpg has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,9 @@ + sample group + GSM103772_1 Col-0_MgSO4_24hpi_C_miniarray + GSM103772_2 Col-0_MgSO4_24hpi_C_miniarray + GSM103773_1 Col-0_MgSO4_24hpi_C_miniarray + GSM103773_2 Col-0_MgSO4_24hpi_C_miniarray + GSM103774_1 Col-0_PsmES4326_24hpi_C_miniarray + GSM103774_2 Col-0_PsmES4326_24hpi_C_miniarray + GSM103775_1 Col-0_PsmES4326_24hpi_C_miniarray + GSM103775_2 Col-0_PsmES4326_24hpi_C_miniarray diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103772_1 Binary file read_make_design_datasets/test-data/GSM103772_1 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103772_2 Binary file read_make_design_datasets/test-data/GSM103772_2 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103773_1 Binary file read_make_design_datasets/test-data/GSM103773_1 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103773_2 Binary file read_make_design_datasets/test-data/GSM103773_2 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103774_1 Binary file read_make_design_datasets/test-data/GSM103774_1 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103774_2 Binary file read_make_design_datasets/test-data/GSM103774_2 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103775_1 Binary file read_make_design_datasets/test-data/GSM103775_1 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103775_2 Binary file read_make_design_datasets/test-data/GSM103775_2 has changed diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/Read.Project.Data.RData Binary file read_make_design_datasets/test-data/Read.Project.Data.RData has changed