# HG changeset patch
# User proteomisc
# Date 1701424698 0
# Node ID fda6b789e26727bbef8865a551f8665f3e0b5086
Uploaded
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Make_Design.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Make_Design.R Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,63 @@
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+sink(stdout(), type = "message")
+suppressWarnings(suppressMessages(library("batch")))
+suppressWarnings(suppressMessages(library(tools)))
+listArguments = parseCommandArgs(evaluate=FALSE)
+print(listArguments)
+technology = listArguments[["technology"]]
+designway = listArguments[["designway"]]
+listArguments[["designway"]]<-NULL
+listArguments[["technology"]]<-NULL
+listcelsfullpath=c()
+listfullnames=c()
+listcelsnames=c("sample")
+listgroup=c("group")
+designpath =""
+designo=c()
+currentgroup=""
+ingroup=F
+names(listArguments)
+if(designway=="makeit"){
+ for (name in names(listArguments)) {
+ print(name)
+ if(!is.na(pmatch("group",name))){
+ currentgroup=listArguments[[name]]
+ }
+ if(!is.na(pmatch("rank",name))){
+ listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]])))
+ listfullnames=c(listfullnames,basename(listArguments[[name]]))
+ listgroup=c(listgroup,currentgroup)
+ }
+ if(!is.na(pmatch("file",name))){
+
+ listcelsfullpath=c(listcelsfullpath,listArguments[[name]])
+ }
+
+ }
+ write.table(format(cbind(listcelsnames,listgroup), justify="right"),sep="\t", quote=FALSE,
+ row.names=F, col.names=F,file="Imported.DataSet.Design.tsv")
+ designo <- as.data.frame(cbind(listcelsnames[-1],listgroup[-1],deparse.level = 0),colnames=c("sample","group"))
+ colnames(designo)<-c("sample","group")
+
+
+}else{
+
+ designpath =listArguments[["designfile"]]
+ listArguments[["designfile"]]<-NULL
+ designo <- read.table(designpath,header = TRUE)
+ for (name in names(listArguments)) {
+ if(!is.na(pmatch("rank",name))){
+ listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]])))
+ listfullnames=c(listfullnames,basename(listArguments[[name]]))
+ }
+ if(!is.na(pmatch("file",name))){
+ listcelsfullpath=c(listcelsfullpath,listArguments[[name]])
+ }
+ }
+}
+rm(listArguments)
+save.image(paste("Imported.Project.Information","RData",sep="."))
+
+
+
+
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Make_Design_Read_Datasets.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Make_Design_Read_Datasets.xml Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,145 @@
+
+ Create a design file and collect technology information and read datasets.
+
+ citations.xml
+
+
+ r-base
+ r-batch
+ bioconductor-affy
+ bioconductor-affyPLM
+ bioconductor-limma
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ' ' not in value
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**Authors** T.Bensellak, B.Ettetuani.
+
+---------------------------------------------------
+
+========================================================
+Make design and read dataSets
+========================================================
+
+-----------
+Description
+-----------
+
+Creates design and store needed information about datasets, it also reads choosen dataset in order to process. Dataset can be imported or selected from available datasets..
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
++---------------------------+--------------------+-----------+
+| Name | output file |format |
++===========================+====================+===========+
+| Query GEO or upload files | Raw files | Cel,Gpr,Gz|
++---------------------------+--------------------+-----------+
+
+
+
+**Downstream tools**
+
++-------------------------------------------+----------------------------------+----------+
+| Name | Output file | Format |
++===========================================+==================================+==========+
+|Preprocess.DataSet.Microarray | Preprocess.Project.Data.RData | RData |
++-------------------------------------------+----------------------------------+----------+
+
+
+-----------
+Input files
+-----------
+
++---------------------------+-----------------+
+| Parameter : num + label | Format |
++===========================+=================+
+| Raw Files | CEL,GPR,GZ |
++---------------------------+-----------------+
+| Design | Tabular |
++---------------------------+-----------------+
+| Technology | String |
++---------------------------+-----------------+
+
+------------
+Output files
+------------
+
+**DataSet.Design.tsv**
+
+**Read.Project.Data.RData**
+
+------------------------------
+General schema of the workflow
+------------------------------
+
+https://bensellak.github.io/microarrays-galaxy/
+
+
+
+
+
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/ReadDataSet.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/ReadDataSet.R Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,81 @@
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+sink(stdout(), type = "message")
+suppressWarnings(suppressMessages(library("batch")))
+suppressWarnings(suppressMessages(library(affy)))
+suppressWarnings(suppressMessages(library(affyPLM)))
+suppressWarnings(suppressMessages(library(limma)))
+source_local <- function(fname){
+ argv <- commandArgs(trailingOnly = FALSE)
+ base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+ source(paste(base_dir, fname, sep="/"))
+}
+source_local("Read_GenePix_Functions.R")
+source_local("Read_Affymetrix_Functions.R")
+source_local("Read_Agilent_Functions.R")
+listArguments = parseCommandArgs(evaluate=FALSE)
+datasetsource = listArguments[["datasetsource"]]
+listArguments[["datasetsource"]] = NULL
+if(datasetsource=="intern"){
+ if (!is.null(listArguments[["image"]])){
+ load(listArguments[["image"]])
+ listArguments[["image"]]=NULL
+ }
+ dataset=listArguments[["projectchoice"]]
+ colus=as.integer(dataset)
+ datachosen=listdataset[colus]
+ listArguments[["projectchoice"]]=NULL
+ if(technology=="Affymetrix"){
+ listArguments=append(list(datachosen), listArguments)
+ MicroArray_Object<-do.call("ReadAffymetrix",listArguments)
+ }
+ if(technology=="Agilent_One_Color"){
+ listArguments=append(list(datachosen), listArguments)
+ MicroArray_Object<-do.call("ReadAgilentOneChannel",listArguments)
+ }
+ if(technology=="Agilent_Two_Colors"){
+ listArguments=append(list(datachosen), listArguments)
+ MicroArray_Object<-do.call("ReadAgilentTwoChannels",listArguments)
+ }
+ if(technology=="GenePix_One_Color"){
+ listArguments=append(list(datachosen), listArguments)
+ MicroArray_Object<-do.call("ReadGenePixOneChannel",listArguments)
+ }
+ if(technology=="GenePix_Two_Colors"){
+ listArguments=append(list(datachosen), listArguments)
+ MicroArray_Object<-do.call("ReadGenePixTwoChannels",listArguments)
+ }
+}else{
+ if (!is.null(listArguments[["imageimported"]])){
+ load(listArguments[["imageimported"]])
+ listArguments[["imageimported"]]=NULL
+ }
+ listArguments<-append(listArguments,list(listcelsfullpath=listcelsfullpath,listfullnames=listfullnames))
+ if(technology=="Affymetrix"){
+ listArguments[["listfullnames"]]=NULL
+ MicroArray_Object<-do.call("ReadAffymetrixImported",listArguments)
+ }
+ if(technology=="Agilent_One_Color"){
+ MicroArray_Object<-do.call("ReadAgilentOneChannelImported",listArguments)
+ }
+ if(technology=="Agilent_Two_Colors"){
+ MicroArray_Object<-do.call("ReadAgilentTwoChannelsImported",listArguments)
+ }
+ if(technology=="GenePix_One_Color"){
+ MicroArray_Object<-do.call("ReadGenePixOneChannelImported",listArguments)
+ }
+ if(technology=="GenePix_Two_Colors"){
+ MicroArray_Object<-do.call("ReadGenePixTwoChannelsImported",listArguments)
+ }
+
+}
+
+
+rm(listArguments)
+save.image(paste("MicroArrayObject","RData",sep="."))
+
+
+
+
+
+
+
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Read_Affymetrix_Functions.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Read_Affymetrix_Functions.R Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,13 @@
+ReadAffymetrix<-function(path="",compressed=TRUE){
+
+ affy_object<-ReadAffy(celfile.path=paste(path,"/data/rawfiles",sep=""),compress=compressed)
+ designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE)
+ write.table(format(designo, justify="right"),sep="\t", quote=FALSE,
+ row.names=F, col.names=T,file="design.tsv")
+ return(list(affy_object=affy_object,designo=designo))
+}
+ReadAffymetrixImported<-function(listcelsfullpath="",compressed=TRUE){
+
+ affy_object<-ReadAffy(filenames=listcelsfullpath,compress=compressed)
+ return(list(affy_object=affy_object))
+}
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Read_Agilent_Functions.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Read_Agilent_Functions.R Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,42 @@
+getAgilFiles <- function(path)
+{
+ files<-list.files((paste(path,"/data/rawfiles",sep="")))
+ AgilFiles <<- files[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))]
+ designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE)
+ write.table(format(designo, justify="right"),sep="\t", quote=FALSE,
+ row.names=F, col.names=T,file="design.tsv")
+ return(list(AgilFiles=AgilFiles,designo=designo))
+} # reading agilent files
+ReadAgilentOneChannel <- function(path)
+{
+ files <<- getAgilFiles(path)
+ RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep=""),green.only=T)
+ return(list(RFile=RFile,designo=files[[2]]))
+} # reading agilent one channel files
+
+ReadAgilentTwoChannels <- function(path)
+{
+ files <<- getAgilFiles(path)
+ RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep=""))
+ return(list(RFile=RFile,designo=files[[2]]))
+}
+getAgilFilesImported <- function(listcelsfullpath,listfullnames)
+{
+ files<-listfullnames
+ #AgilFiles <<- listcelsfullpath[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))]
+ AgilFiles <<- listcelsfullpath
+ return(list(AgilFiles=AgilFiles))
+} # reading agilent files
+ReadAgilentOneChannelImported <- function(listcelsfullpath,listfullnames)
+{
+ files <<- getAgilFilesImported(listcelsfullpath,listfullnames)
+ RFile <- read.maimages(files[[1]],source="agilent",green.only=T)
+ return(list(RFile=RFile))
+} # reading agilent one channel files
+
+ReadAgilentTwoChannelsImported <- function(listcelsfullpath,listfullnames)
+{
+ files <<- getAgilFilesImported(listcelsfullpath,listfullnames)
+ RFile <- read.maimages(files[[1]],source="agilent")
+ return(list(RFile=RFile))
+}
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Read_GenePix_Functions.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Read_GenePix_Functions.R Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,63 @@
+getGprFiles <- function(path)
+{
+ files<-(dir(paste(path,"/data/rawfiles",sep="")))
+ gprFiles <<- files[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))]
+ designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE)
+ write.table(format(designo, justify="right"),sep="\t", quote=FALSE,
+ row.names=F, col.names=T,file="design.tsv")
+ return(list(gprFiles=gprFiles,designo=designo))
+} # reading (*.gpr) files
+
+
+getGalFiles <- function(path)
+{
+ files<-(dir(paste(path,"/data/rawfiles",sep="")))
+ galFiles <- files[c(grep("*.GAL$", files),grep("*.gal$", files))]
+ return(galFiles)
+} # extracting (*.gal) files names
+
+ReadGenePixTwoChannels<- function(path)
+{
+ files <<- getGprFiles(path)
+ RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep=""))
+ return(list(RFile=RFile,designo=files[[2]]))
+} # reading files
+
+ReadGenePixOneChannel<- function(path)
+{
+ files <<- getGprFiles(path)
+ RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep=""),green.only = T)
+ return(list(RFile=RFile,designo=files[[2]]))
+
+}
+getGprFilesImported <- function(listcelsfullpath,listfullnames)
+{
+ files<-listfullnames
+ #gprFiles <<- listcelsfullpath[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))]
+ gprFiles <<- listcelsfullpath
+ return(list(gprFiles=gprFiles))
+} # reading (*.gpr) files
+
+
+getGalFilesImported <- function(listcelsfullpath,listfullnames)
+{
+ files<-listfullnames
+ #galFiles <- listcelsfullpath[c(grep("*.GAL$", files),grep("*.gal$", files))]
+ galFiles <- listcelsfullpath
+ return(galFiles)
+} # extracting (*.gal) files names
+
+ReadGenePixTwoChannelsImported<- function(listcelsfullpath,listfullnames)
+{
+ files <<- getGprFilesImported(listcelsfullpath,listfullnames)
+ RFile <- read.maimages(files[[1]],source="genepix")
+ return(list(RFile=RFile))
+} # reading files
+
+ReadGenePixOneChannelImported<- function(listcelsfullpath,listfullnames)
+{
+ files <<- getGprFilesImported(listcelsfullpath,listfullnames)
+ RFile <- read.maimages(files[[1]],source="genepix",green.only = T)
+ return(list(RFile=RFile))
+
+}
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/Readme.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Readme.txt Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,1 @@
+Galaxy tool for making a design file and reading datasets as a binary rdata for the preprocessing step
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/citations.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/citations.xml Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,66 @@
+
+ 1.0
+
+
+
+ @Manual{,
+ title = {R: A Language and Environment for Statistical Computing},
+ author = {{R Core Team}},
+ organization = {R Foundation for Statistical Computing},
+ address = {Vienna, Austria},
+ year = {2017},
+ url = {https://www.R-project.org/},
+ }
+
+
+ @Article{,
+ title = {Passing in Command Line Arguments and Parallel Cluster/Multicore Batching in {R} with {batch}},
+ author = {Thomas J. Hoffmann},
+ journal = {Journal of Statistical Software, Code Snippets},
+ year = {2011},
+ volume = {39},
+ number = {1},
+ pages = {1--11},
+ url = {http://www.jstatsoft.org/v39/c01/},
+ }
+
+
+ @Article{,
+ author = {Laurent Gautier and Leslie Cope and Benjamin M. Bolstad and Rafael A. Irizarry},
+ title = {affy---analysis of Affymetrix GeneChip data at the probe level},
+ journal = {Bioinformatics},
+ volume = {20},
+ number = {3},
+ year = {2004},
+ issn = {1367-4803},
+ pages = {307--315},
+ doi = {10.1093/bioinformatics/btg405},
+ publisher = {Oxford University Press},
+ address = {Oxford, UK},
+ }
+
+
+ @Article{,
+ author = {Matthew E Ritchie and Belinda Phipson and Di Wu and Yifang Hu and Charity W Law and Wei Shi and Gordon K Smyth},
+ title = {{limma} powers differential expression analyses for {RNA}-sequencing and microarray studies},
+ journal = {Nucleic Acids Research},
+ year = {2015},
+ volume = {43},
+ number = {7},
+ pages = {e47},
+ }
+
+
+ @Article{,
+ title = {Quality assessment for short oligonucleotide arrays.},
+ author = {Julia Brettschneider and Francois Collin and Benjamin M Bolstad and Terence P Speed},
+ journal = {Technometrics},
+ year = {2007},
+ volume = {In press},
+ }
+
+
+
+
+
+
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/images/workflow.jpg
Binary file read_make_design_datasets/images/workflow.jpg has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,9 @@
+ sample group
+ GSM103772_1 Col-0_MgSO4_24hpi_C_miniarray
+ GSM103772_2 Col-0_MgSO4_24hpi_C_miniarray
+ GSM103773_1 Col-0_MgSO4_24hpi_C_miniarray
+ GSM103773_2 Col-0_MgSO4_24hpi_C_miniarray
+ GSM103774_1 Col-0_PsmES4326_24hpi_C_miniarray
+ GSM103774_2 Col-0_PsmES4326_24hpi_C_miniarray
+ GSM103775_1 Col-0_PsmES4326_24hpi_C_miniarray
+ GSM103775_2 Col-0_PsmES4326_24hpi_C_miniarray
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103772_1
Binary file read_make_design_datasets/test-data/GSM103772_1 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103772_2
Binary file read_make_design_datasets/test-data/GSM103772_2 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103773_1
Binary file read_make_design_datasets/test-data/GSM103773_1 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103773_2
Binary file read_make_design_datasets/test-data/GSM103773_2 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103774_1
Binary file read_make_design_datasets/test-data/GSM103774_1 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103774_2
Binary file read_make_design_datasets/test-data/GSM103774_2 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103775_1
Binary file read_make_design_datasets/test-data/GSM103775_1 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/GSM103775_2
Binary file read_make_design_datasets/test-data/GSM103775_2 has changed
diff -r 000000000000 -r fda6b789e267 read_make_design_datasets/test-data/Read.Project.Data.RData
Binary file read_make_design_datasets/test-data/Read.Project.Data.RData has changed