Mercurial > repos > proteomisc > read_make_design_datasets
changeset 0:fda6b789e267 draft
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Make_Design.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,63 @@ +options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) +sink(stdout(), type = "message") +suppressWarnings(suppressMessages(library("batch"))) +suppressWarnings(suppressMessages(library(tools))) +listArguments = parseCommandArgs(evaluate=FALSE) +print(listArguments) +technology = listArguments[["technology"]] +designway = listArguments[["designway"]] +listArguments[["designway"]]<-NULL +listArguments[["technology"]]<-NULL +listcelsfullpath=c() +listfullnames=c() +listcelsnames=c("sample") +listgroup=c("group") +designpath ="" +designo=c() +currentgroup="" +ingroup=F +names(listArguments) +if(designway=="makeit"){ + for (name in names(listArguments)) { + print(name) + if(!is.na(pmatch("group",name))){ + currentgroup=listArguments[[name]] + } + if(!is.na(pmatch("rank",name))){ + listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]]))) + listfullnames=c(listfullnames,basename(listArguments[[name]])) + listgroup=c(listgroup,currentgroup) + } + if(!is.na(pmatch("file",name))){ + + listcelsfullpath=c(listcelsfullpath,listArguments[[name]]) + } + + } + write.table(format(cbind(listcelsnames,listgroup), justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=F,file="Imported.DataSet.Design.tsv") + designo <- as.data.frame(cbind(listcelsnames[-1],listgroup[-1],deparse.level = 0),colnames=c("sample","group")) + colnames(designo)<-c("sample","group") + + +}else{ + + designpath =listArguments[["designfile"]] + listArguments[["designfile"]]<-NULL + designo <- read.table(designpath,header = TRUE) + for (name in names(listArguments)) { + if(!is.na(pmatch("rank",name))){ + listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]]))) + listfullnames=c(listfullnames,basename(listArguments[[name]])) + } + if(!is.na(pmatch("file",name))){ + listcelsfullpath=c(listcelsfullpath,listArguments[[name]]) + } + } +} +rm(listArguments) +save.image(paste("Imported.Project.Information","RData",sep=".")) + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Make_Design_Read_Datasets.xml Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,145 @@ +<tool id="Make_Design_Read_Datasets" name="Make Design and Read Datasets" version="1.0"> + <description> Create a design file and collect technology information and read datasets. </description> + <macros> + <import>citations.xml</import> + </macros> + <requirements> + <requirement type="package">r-base</requirement> + <requirement type="package">r-batch</requirement> + <requirement type="package">bioconductor-affy</requirement> + <requirement type="package">bioconductor-affyPLM</requirement> + <requirement type="package">bioconductor-limma</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + <command> + <![CDATA[ + Rscript '$__tool_directory__/Make_Design.R' + technology $technology + designway "makeit" + #set compteur=0 + #for $i, $s in enumerate( $filesmake) + group.${$compteur} ${s.group} + #set compteur=compteur+1 + #for $fichero in $s.inputmake: + rank.${$compteur} ${fichero.name} + #set compteur=compteur+1 + file.${$compteur} ${fichero.file_name} + #set compteur=compteur+1 + #end for + ##rank.${i} ${s.inputmake.name} + ##file.${i} ${s.inputmake.file_name} + ##group.${i} ${s.group} + #end for + & + Rscript '$__tool_directory__/ReadDataSet.R' + datasetsource "extern" + imageimported "Imported.Project.Information.RData" + ]]> + </command> + <inputs> + <param name="technology" type="select" label="Select the technology"> + <option value="Affymetrix" selected="true">Affymetrix</option> + <option value="Agilent_One_Color">Agilent One Color</option> + <option value="Agilent_Two_Colors">Agilent Two Colors</option> + <option value="GenePix_One_Color">GenePix One Color</option> + <option value="GenePix_Two_Colors">GenePix Two Colors</option> + </param> + <repeat name="filesmake" title="File"> + <param name="inputmake" type="data" multiple="True" format="cel,gpr,gz,txt,binary" label="Select a raw data file."> + </param> + <param name="group" type="text" label="Group"> + <validator type="empty_field" message="You must specify a group"/> + <validator type="expression" message="No space allowed">' ' not in value</validator> + </param> + </repeat> + + </inputs> + <outputs> + <data name="Imported.DataSet.Design" format="tabular" from_work_dir="Imported.DataSet.Design.tsv" label="DataSet.Design.${technology}.tsv"/> + <data name="ReadDataSetRData" format="rdata" from_work_dir="MicroArrayObject.RData" label="Read.Project.Data.RData" /> + </outputs> + <tests> + <test> + <param name="technology" value="GenePix Two Colors" /> + <param name="filesmake_0|inputmake" value="GSM103772_1,GSM103772_2,GSM103773_1,GSM103773_2" ftype="gpr" /> + <param name="filesmake_0|group" value="Col-0_MgSO4_24hpi_C_miniarray" ftype="gpr" /> + <param name="filesmake_1|inputmake" value="GSM103774_1,GSM103774_2,GSM103775_1,GSM103775_2" ftype="gpr" /> + <param name="filesmake_1|group" value="Col-0_PsmES4326_24hpi_C_miniarray" ftype="gpr" /> + <output name="Imported.DataSet.Design" ftype="tabular" file="DataSet.Design.GenePix_Two_Colors.tsv"/> + <output name="ReadDataSetRData" file="Read.Project.Data.RData" ftype="rdata" compare="sim_size"/> + </test> + </tests> + <help> +.. class:: infomark + +**Authors** T.Bensellak, B.Ettetuani. + +--------------------------------------------------- + +======================================================== +Make design and read dataSets +======================================================== + +----------- +Description +----------- + +Creates design and store needed information about datasets, it also reads choosen dataset in order to process. Dataset can be imported or selected from available datasets.. + +----------------- +Workflow position +----------------- + +**Upstream tools** + ++---------------------------+--------------------+-----------+ +| Name | output file |format | ++===========================+====================+===========+ +| Query GEO or upload files | Raw files | Cel,Gpr,Gz| ++---------------------------+--------------------+-----------+ + + + +**Downstream tools** + ++-------------------------------------------+----------------------------------+----------+ +| Name | Output file | Format | ++===========================================+==================================+==========+ +|Preprocess.DataSet.Microarray | Preprocess.Project.Data.RData | RData | ++-------------------------------------------+----------------------------------+----------+ + + +----------- +Input files +----------- + ++---------------------------+-----------------+ +| Parameter : num + label | Format | ++===========================+=================+ +| Raw Files | CEL,GPR,GZ | ++---------------------------+-----------------+ +| Design | Tabular | ++---------------------------+-----------------+ +| Technology | String | ++---------------------------+-----------------+ + +------------ +Output files +------------ + +**DataSet.Design.tsv** + +**Read.Project.Data.RData** + +------------------------------ +General schema of the workflow +------------------------------ + +https://bensellak.github.io/microarrays-galaxy/ + + </help> + <expand macro="R_citation"> + </expand> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/ReadDataSet.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,81 @@ +options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) +sink(stdout(), type = "message") +suppressWarnings(suppressMessages(library("batch"))) +suppressWarnings(suppressMessages(library(affy))) +suppressWarnings(suppressMessages(library(affyPLM))) +suppressWarnings(suppressMessages(library(limma))) +source_local <- function(fname){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep="/")) +} +source_local("Read_GenePix_Functions.R") +source_local("Read_Affymetrix_Functions.R") +source_local("Read_Agilent_Functions.R") +listArguments = parseCommandArgs(evaluate=FALSE) +datasetsource = listArguments[["datasetsource"]] +listArguments[["datasetsource"]] = NULL +if(datasetsource=="intern"){ + if (!is.null(listArguments[["image"]])){ + load(listArguments[["image"]]) + listArguments[["image"]]=NULL + } + dataset=listArguments[["projectchoice"]] + colus=as.integer(dataset) + datachosen=listdataset[colus] + listArguments[["projectchoice"]]=NULL + if(technology=="Affymetrix"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadAffymetrix",listArguments) + } + if(technology=="Agilent_One_Color"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadAgilentOneChannel",listArguments) + } + if(technology=="Agilent_Two_Colors"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadAgilentTwoChannels",listArguments) + } + if(technology=="GenePix_One_Color"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadGenePixOneChannel",listArguments) + } + if(technology=="GenePix_Two_Colors"){ + listArguments=append(list(datachosen), listArguments) + MicroArray_Object<-do.call("ReadGenePixTwoChannels",listArguments) + } +}else{ + if (!is.null(listArguments[["imageimported"]])){ + load(listArguments[["imageimported"]]) + listArguments[["imageimported"]]=NULL + } + listArguments<-append(listArguments,list(listcelsfullpath=listcelsfullpath,listfullnames=listfullnames)) + if(technology=="Affymetrix"){ + listArguments[["listfullnames"]]=NULL + MicroArray_Object<-do.call("ReadAffymetrixImported",listArguments) + } + if(technology=="Agilent_One_Color"){ + MicroArray_Object<-do.call("ReadAgilentOneChannelImported",listArguments) + } + if(technology=="Agilent_Two_Colors"){ + MicroArray_Object<-do.call("ReadAgilentTwoChannelsImported",listArguments) + } + if(technology=="GenePix_One_Color"){ + MicroArray_Object<-do.call("ReadGenePixOneChannelImported",listArguments) + } + if(technology=="GenePix_Two_Colors"){ + MicroArray_Object<-do.call("ReadGenePixTwoChannelsImported",listArguments) + } + +} + + +rm(listArguments) +save.image(paste("MicroArrayObject","RData",sep=".")) + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Read_Affymetrix_Functions.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,13 @@ +ReadAffymetrix<-function(path="",compressed=TRUE){ + + affy_object<-ReadAffy(celfile.path=paste(path,"/data/rawfiles",sep=""),compress=compressed) + designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE) + write.table(format(designo, justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=T,file="design.tsv") + return(list(affy_object=affy_object,designo=designo)) +} +ReadAffymetrixImported<-function(listcelsfullpath="",compressed=TRUE){ + + affy_object<-ReadAffy(filenames=listcelsfullpath,compress=compressed) + return(list(affy_object=affy_object)) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Read_Agilent_Functions.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,42 @@ +getAgilFiles <- function(path) +{ + files<-list.files((paste(path,"/data/rawfiles",sep=""))) + AgilFiles <<- files[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))] + designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE) + write.table(format(designo, justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=T,file="design.tsv") + return(list(AgilFiles=AgilFiles,designo=designo)) +} # reading agilent files +ReadAgilentOneChannel <- function(path) +{ + files <<- getAgilFiles(path) + RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep=""),green.only=T) + return(list(RFile=RFile,designo=files[[2]])) +} # reading agilent one channel files + +ReadAgilentTwoChannels <- function(path) +{ + files <<- getAgilFiles(path) + RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep="")) + return(list(RFile=RFile,designo=files[[2]])) +} +getAgilFilesImported <- function(listcelsfullpath,listfullnames) +{ + files<-listfullnames + #AgilFiles <<- listcelsfullpath[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))] + AgilFiles <<- listcelsfullpath + return(list(AgilFiles=AgilFiles)) +} # reading agilent files +ReadAgilentOneChannelImported <- function(listcelsfullpath,listfullnames) +{ + files <<- getAgilFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="agilent",green.only=T) + return(list(RFile=RFile)) +} # reading agilent one channel files + +ReadAgilentTwoChannelsImported <- function(listcelsfullpath,listfullnames) +{ + files <<- getAgilFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="agilent") + return(list(RFile=RFile)) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Read_GenePix_Functions.R Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,63 @@ +getGprFiles <- function(path) +{ + files<-(dir(paste(path,"/data/rawfiles",sep=""))) + gprFiles <<- files[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))] + designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE) + write.table(format(designo, justify="right"),sep="\t", quote=FALSE, + row.names=F, col.names=T,file="design.tsv") + return(list(gprFiles=gprFiles,designo=designo)) +} # reading (*.gpr) files + + +getGalFiles <- function(path) +{ + files<-(dir(paste(path,"/data/rawfiles",sep=""))) + galFiles <- files[c(grep("*.GAL$", files),grep("*.gal$", files))] + return(galFiles) +} # extracting (*.gal) files names + +ReadGenePixTwoChannels<- function(path) +{ + files <<- getGprFiles(path) + RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep="")) + return(list(RFile=RFile,designo=files[[2]])) +} # reading files + +ReadGenePixOneChannel<- function(path) +{ + files <<- getGprFiles(path) + RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep=""),green.only = T) + return(list(RFile=RFile,designo=files[[2]])) + +} +getGprFilesImported <- function(listcelsfullpath,listfullnames) +{ + files<-listfullnames + #gprFiles <<- listcelsfullpath[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))] + gprFiles <<- listcelsfullpath + return(list(gprFiles=gprFiles)) +} # reading (*.gpr) files + + +getGalFilesImported <- function(listcelsfullpath,listfullnames) +{ + files<-listfullnames + #galFiles <- listcelsfullpath[c(grep("*.GAL$", files),grep("*.gal$", files))] + galFiles <- listcelsfullpath + return(galFiles) +} # extracting (*.gal) files names + +ReadGenePixTwoChannelsImported<- function(listcelsfullpath,listfullnames) +{ + files <<- getGprFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="genepix") + return(list(RFile=RFile)) +} # reading files + +ReadGenePixOneChannelImported<- function(listcelsfullpath,listfullnames) +{ + files <<- getGprFilesImported(listcelsfullpath,listfullnames) + RFile <- read.maimages(files[[1]],source="genepix",green.only = T) + return(list(RFile=RFile)) + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/Readme.txt Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,1 @@ +Galaxy tool for making a design file and reading datasets as a binary rdata for the preprocessing step
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/citations.xml Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,66 @@ +<macros> + <token name="@VERSION@">1.0</token> + <xml name="R_citation"> + <citations> + <citation type="bibtex"> + @Manual{, + title = {R: A Language and Environment for Statistical Computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + address = {Vienna, Austria}, + year = {2017}, + url = {https://www.R-project.org/}, + } + </citation> + <citation type="bibtex"> + @Article{, + title = {Passing in Command Line Arguments and Parallel Cluster/Multicore Batching in {R} with {batch}}, + author = {Thomas J. Hoffmann}, + journal = {Journal of Statistical Software, Code Snippets}, + year = {2011}, + volume = {39}, + number = {1}, + pages = {1--11}, + url = {http://www.jstatsoft.org/v39/c01/}, + } + </citation> + <citation type="bibtex"> + @Article{, + author = {Laurent Gautier and Leslie Cope and Benjamin M. Bolstad and Rafael A. Irizarry}, + title = {affy---analysis of Affymetrix GeneChip data at the probe level}, + journal = {Bioinformatics}, + volume = {20}, + number = {3}, + year = {2004}, + issn = {1367-4803}, + pages = {307--315}, + doi = {10.1093/bioinformatics/btg405}, + publisher = {Oxford University Press}, + address = {Oxford, UK}, + } + </citation> + <citation type="bibtex"> + @Article{, + author = {Matthew E Ritchie and Belinda Phipson and Di Wu and Yifang Hu and Charity W Law and Wei Shi and Gordon K Smyth}, + title = {{limma} powers differential expression analyses for {RNA}-sequencing and microarray studies}, + journal = {Nucleic Acids Research}, + year = {2015}, + volume = {43}, + number = {7}, + pages = {e47}, + } + </citation> + <citation type="bibtex"> + @Article{, + title = {Quality assessment for short oligonucleotide arrays.}, + author = {Julia Brettschneider and Francois Collin and Benjamin M Bolstad and Terence P Speed}, + journal = {Technometrics}, + year = {2007}, + volume = {In press}, + } + </citation> + + </citations> + </xml> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv Fri Dec 01 09:58:18 2023 +0000 @@ -0,0 +1,9 @@ + sample group + GSM103772_1 Col-0_MgSO4_24hpi_C_miniarray + GSM103772_2 Col-0_MgSO4_24hpi_C_miniarray + GSM103773_1 Col-0_MgSO4_24hpi_C_miniarray + GSM103773_2 Col-0_MgSO4_24hpi_C_miniarray + GSM103774_1 Col-0_PsmES4326_24hpi_C_miniarray + GSM103774_2 Col-0_PsmES4326_24hpi_C_miniarray + GSM103775_1 Col-0_PsmES4326_24hpi_C_miniarray + GSM103775_2 Col-0_PsmES4326_24hpi_C_miniarray