changeset 0:fda6b789e267 draft

Uploaded
author proteomisc
date Fri, 01 Dec 2023 09:58:18 +0000
parents
children af310a65590b
files read_make_design_datasets/Make_Design.R read_make_design_datasets/Make_Design_Read_Datasets.xml read_make_design_datasets/ReadDataSet.R read_make_design_datasets/Read_Affymetrix_Functions.R read_make_design_datasets/Read_Agilent_Functions.R read_make_design_datasets/Read_GenePix_Functions.R read_make_design_datasets/Readme.txt read_make_design_datasets/citations.xml read_make_design_datasets/images/workflow.jpg read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv read_make_design_datasets/test-data/GSM103772_1 read_make_design_datasets/test-data/GSM103772_2 read_make_design_datasets/test-data/GSM103773_1 read_make_design_datasets/test-data/GSM103773_2 read_make_design_datasets/test-data/GSM103774_1 read_make_design_datasets/test-data/GSM103774_2 read_make_design_datasets/test-data/GSM103775_1 read_make_design_datasets/test-data/GSM103775_2 read_make_design_datasets/test-data/Read.Project.Data.RData
diffstat 19 files changed, 483 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Make_Design.R	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,63 @@
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+sink(stdout(), type = "message")
+suppressWarnings(suppressMessages(library("batch")))
+suppressWarnings(suppressMessages(library(tools)))
+listArguments = parseCommandArgs(evaluate=FALSE)
+print(listArguments)
+technology = listArguments[["technology"]]
+designway = listArguments[["designway"]]
+listArguments[["designway"]]<-NULL
+listArguments[["technology"]]<-NULL
+listcelsfullpath=c()
+listfullnames=c()
+listcelsnames=c("sample")
+listgroup=c("group")
+designpath =""
+designo=c()
+currentgroup=""
+ingroup=F
+names(listArguments)
+if(designway=="makeit"){
+	for (name in names(listArguments)) {
+        print(name) 
+        if(!is.na(pmatch("group",name))){
+                currentgroup=listArguments[[name]]
+    	} 
+    	if(!is.na(pmatch("rank",name))){
+                listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]]))) 
+               listfullnames=c(listfullnames,basename(listArguments[[name]]))
+		  listgroup=c(listgroup,currentgroup)
+    	}
+        if(!is.na(pmatch("file",name))){
+
+                listcelsfullpath=c(listcelsfullpath,listArguments[[name]]) 
+    	} 
+        
+	}
+	write.table(format(cbind(listcelsnames,listgroup), justify="right"),sep="\t", quote=FALSE,
+            row.names=F, col.names=F,file="Imported.DataSet.Design.tsv")
+       designo <- as.data.frame(cbind(listcelsnames[-1],listgroup[-1],deparse.level = 0),colnames=c("sample","group"))
+        colnames(designo)<-c("sample","group")
+	
+
+}else{
+
+	designpath =listArguments[["designfile"]] 
+       listArguments[["designfile"]]<-NULL
+	designo <- read.table(designpath,header = TRUE)
+	for (name in names(listArguments)) {
+    	if(!is.na(pmatch("rank",name))){
+                listcelsnames=c(listcelsnames,file_path_sans_ext(basename(listArguments[[name]]))) 
+                listfullnames=c(listfullnames,basename(listArguments[[name]]))
+    	}
+        if(!is.na(pmatch("file",name))){
+                listcelsfullpath=c(listcelsfullpath,listArguments[[name]]) 
+    	} 
+	}
+}
+rm(listArguments)
+save.image(paste("Imported.Project.Information","RData",sep="."))
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Make_Design_Read_Datasets.xml	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,145 @@
+<tool id="Make_Design_Read_Datasets" name="Make Design and Read Datasets" version="1.0">
+    <description> Create a design file and collect technology information and read datasets. </description>  
+    <macros>
+        <import>citations.xml</import>
+    </macros>   
+    <requirements>
+        <requirement type="package">r-base</requirement>
+        <requirement type="package">r-batch</requirement>
+        <requirement type="package">bioconductor-affy</requirement>
+        <requirement type="package">bioconductor-affyPLM</requirement>
+        <requirement type="package">bioconductor-limma</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    <command>
+    <![CDATA[
+    Rscript '$__tool_directory__/Make_Design.R' 
+    technology $technology
+    designway "makeit" 
+    #set compteur=0
+    #for $i, $s in enumerate( $filesmake)
+    	group.${$compteur} ${s.group}
+   	#set compteur=compteur+1   
+   	#for $fichero in $s.inputmake:
+   		rank.${$compteur} ${fichero.name}
+   		#set compteur=compteur+1 
+   		file.${$compteur} ${fichero.file_name}
+   		#set compteur=compteur+1 
+   	#end for
+   	##rank.${i} ${s.inputmake.name}
+   	##file.${i} ${s.inputmake.file_name}
+   	##group.${i} ${s.group}
+    #end for
+    &
+    Rscript '$__tool_directory__/ReadDataSet.R'
+    datasetsource "extern"
+    imageimported "Imported.Project.Information.RData"  
+    ]]>
+    </command> 
+    <inputs>
+        <param name="technology" type="select" label="Select the technology">
+            <option value="Affymetrix" selected="true">Affymetrix</option>
+            <option value="Agilent_One_Color">Agilent One Color</option>
+            <option value="Agilent_Two_Colors">Agilent Two Colors</option>
+            <option value="GenePix_One_Color">GenePix One Color</option>
+            <option value="GenePix_Two_Colors">GenePix Two Colors</option>
+        </param>
+        <repeat name="filesmake" title="File">
+            <param name="inputmake" type="data" multiple="True" format="cel,gpr,gz,txt,binary" label="Select a raw data file.">
+            </param>
+            <param  name="group" type="text" label="Group"> 
+                 <validator type="empty_field" message="You must specify a group"/>
+                 <validator type="expression" message="No space allowed">' ' not in value</validator>
+            </param>
+        </repeat>
+    
+    </inputs>
+    <outputs>
+        <data name="Imported.DataSet.Design" format="tabular" from_work_dir="Imported.DataSet.Design.tsv" label="DataSet.Design.${technology}.tsv"/>
+        <data name="ReadDataSetRData" format="rdata" from_work_dir="MicroArrayObject.RData" label="Read.Project.Data.RData" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="technology" value="GenePix Two Colors" />
+            <param name="filesmake_0|inputmake" value="GSM103772_1,GSM103772_2,GSM103773_1,GSM103773_2" ftype="gpr" />
+            <param name="filesmake_0|group" value="Col-0_MgSO4_24hpi_C_miniarray" ftype="gpr" />
+            <param name="filesmake_1|inputmake" value="GSM103774_1,GSM103774_2,GSM103775_1,GSM103775_2" ftype="gpr" />
+            <param name="filesmake_1|group" value="Col-0_PsmES4326_24hpi_C_miniarray" ftype="gpr" />   
+            <output name="Imported.DataSet.Design" ftype="tabular" file="DataSet.Design.GenePix_Two_Colors.tsv"/>      
+            <output name="ReadDataSetRData" file="Read.Project.Data.RData" ftype="rdata" compare="sim_size"/>
+        </test>
+    </tests>
+    <help>
+.. class:: infomark
+
+**Authors**  T.Bensellak, B.Ettetuani.  
+
+---------------------------------------------------
+
+========================================================
+Make design and read dataSets 
+========================================================
+
+-----------
+Description
+-----------
+
+Creates design and store needed information about datasets, it also reads  choosen dataset in order to process. Dataset can be imported or selected from available datasets..
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
++---------------------------+--------------------+-----------+
+| Name                      | output file        |format     |
++===========================+====================+===========+
+| Query GEO or upload files | Raw files          | Cel,Gpr,Gz|   
++---------------------------+--------------------+-----------+
+
+
+
+**Downstream tools**
+
++-------------------------------------------+----------------------------------+----------+
+| Name                                      | Output file                      | Format   |
++===========================================+==================================+==========+
+|Preprocess.DataSet.Microarray              | Preprocess.Project.Data.RData    | RData    |
++-------------------------------------------+----------------------------------+----------+
+
+
+-----------
+Input files
+-----------
+
++---------------------------+-----------------+
+| Parameter : num + label   |   Format        |
++===========================+=================+
+| Raw Files                 |   CEL,GPR,GZ    |
++---------------------------+-----------------+
+| Design                    |   Tabular       |
++---------------------------+-----------------+
+| Technology                |   String        |
++---------------------------+-----------------+
+
+------------
+Output files
+------------
+
+**DataSet.Design.tsv**
+
+**Read.Project.Data.RData** 
+
+------------------------------
+General schema of the workflow
+------------------------------
+
+https://bensellak.github.io/microarrays-galaxy/
+
+    </help>
+    <expand macro="R_citation">
+    </expand>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/ReadDataSet.R	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,81 @@
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+sink(stdout(), type = "message")
+suppressWarnings(suppressMessages(library("batch")))
+suppressWarnings(suppressMessages(library(affy)))
+suppressWarnings(suppressMessages(library(affyPLM)))
+suppressWarnings(suppressMessages(library(limma)))
+source_local <- function(fname){
+    argv <- commandArgs(trailingOnly = FALSE)
+    base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+    source(paste(base_dir, fname, sep="/"))
+}
+source_local("Read_GenePix_Functions.R")
+source_local("Read_Affymetrix_Functions.R")
+source_local("Read_Agilent_Functions.R")
+listArguments = parseCommandArgs(evaluate=FALSE)
+datasetsource = listArguments[["datasetsource"]]
+listArguments[["datasetsource"]] = NULL
+if(datasetsource=="intern"){
+		if (!is.null(listArguments[["image"]])){
+    		load(listArguments[["image"]])
+    		listArguments[["image"]]=NULL
+		}
+		dataset=listArguments[["projectchoice"]]
+		colus=as.integer(dataset)
+		datachosen=listdataset[colus]
+		listArguments[["projectchoice"]]=NULL
+		if(technology=="Affymetrix"){
+		listArguments=append(list(datachosen), listArguments)
+		MicroArray_Object<-do.call("ReadAffymetrix",listArguments)
+		}
+		if(technology=="Agilent_One_Color"){
+		listArguments=append(list(datachosen), listArguments)
+		MicroArray_Object<-do.call("ReadAgilentOneChannel",listArguments)
+		}
+		if(technology=="Agilent_Two_Colors"){
+		listArguments=append(list(datachosen), listArguments)
+		MicroArray_Object<-do.call("ReadAgilentTwoChannels",listArguments)
+		}
+		if(technology=="GenePix_One_Color"){
+		listArguments=append(list(datachosen), listArguments)
+		MicroArray_Object<-do.call("ReadGenePixOneChannel",listArguments)
+		}
+		if(technology=="GenePix_Two_Colors"){
+		listArguments=append(list(datachosen), listArguments)
+		MicroArray_Object<-do.call("ReadGenePixTwoChannels",listArguments)
+		}
+}else{
+		if (!is.null(listArguments[["imageimported"]])){
+    		load(listArguments[["imageimported"]])
+    		listArguments[["imageimported"]]=NULL
+		}
+		listArguments<-append(listArguments,list(listcelsfullpath=listcelsfullpath,listfullnames=listfullnames))
+		if(technology=="Affymetrix"){
+                listArguments[["listfullnames"]]=NULL
+		MicroArray_Object<-do.call("ReadAffymetrixImported",listArguments)
+		}
+		if(technology=="Agilent_One_Color"){
+		MicroArray_Object<-do.call("ReadAgilentOneChannelImported",listArguments)
+		}
+		if(technology=="Agilent_Two_Colors"){
+		MicroArray_Object<-do.call("ReadAgilentTwoChannelsImported",listArguments)
+		}
+		if(technology=="GenePix_One_Color"){
+		MicroArray_Object<-do.call("ReadGenePixOneChannelImported",listArguments)
+		}
+		if(technology=="GenePix_Two_Colors"){
+		MicroArray_Object<-do.call("ReadGenePixTwoChannelsImported",listArguments)
+		}
+                
+}
+
+
+rm(listArguments)
+save.image(paste("MicroArrayObject","RData",sep="."))
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Read_Affymetrix_Functions.R	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,13 @@
+ReadAffymetrix<-function(path="",compressed=TRUE){
+  
+  affy_object<-ReadAffy(celfile.path=paste(path,"/data/rawfiles",sep=""),compress=compressed)
+  designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE)
+  write.table(format(designo, justify="right"),sep="\t", quote=FALSE,
+            row.names=F, col.names=T,file="design.tsv")
+  return(list(affy_object=affy_object,designo=designo))
+}
+ReadAffymetrixImported<-function(listcelsfullpath="",compressed=TRUE){
+  
+  affy_object<-ReadAffy(filenames=listcelsfullpath,compress=compressed)
+  return(list(affy_object=affy_object))
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Read_Agilent_Functions.R	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,42 @@
+getAgilFiles <- function(path)
+{
+  files<-list.files((paste(path,"/data/rawfiles",sep="")))
+  AgilFiles <<- files[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))]
+  designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE)
+  write.table(format(designo, justify="right"),sep="\t", quote=FALSE,
+              row.names=F, col.names=T,file="design.tsv")
+  return(list(AgilFiles=AgilFiles,designo=designo))
+} # reading agilent files
+ReadAgilentOneChannel <- function(path)
+{ 
+  files <<- getAgilFiles(path)
+  RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep=""),green.only=T)
+  return(list(RFile=RFile,designo=files[[2]]))
+} # reading agilent one channel files 
+
+ReadAgilentTwoChannels <- function(path)
+{ 
+  files <<- getAgilFiles(path)
+  RFile <- read.maimages(files[[1]],source="agilent",path=paste(path,"/data/rawfiles",sep=""))
+  return(list(RFile=RFile,designo=files[[2]]))
+} 
+getAgilFilesImported <- function(listcelsfullpath,listfullnames)
+{
+  files<-listfullnames
+  #AgilFiles <<- listcelsfullpath[c(grep("*.TXT.GZ$", files),grep("*.txt.gz$", files),grep("*.txt$", files),grep("*.TXT$", files))]
+  AgilFiles <<- listcelsfullpath
+  return(list(AgilFiles=AgilFiles))
+} # reading agilent files
+ReadAgilentOneChannelImported <- function(listcelsfullpath,listfullnames)
+{ 
+  files <<- getAgilFilesImported(listcelsfullpath,listfullnames)
+  RFile <- read.maimages(files[[1]],source="agilent",green.only=T)
+  return(list(RFile=RFile))
+} # reading agilent one channel files 
+
+ReadAgilentTwoChannelsImported <- function(listcelsfullpath,listfullnames)
+{ 
+  files <<- getAgilFilesImported(listcelsfullpath,listfullnames)
+  RFile <- read.maimages(files[[1]],source="agilent")
+  return(list(RFile=RFile))
+} 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Read_GenePix_Functions.R	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,63 @@
+getGprFiles <- function(path)
+{
+  files<-(dir(paste(path,"/data/rawfiles",sep="")))
+  gprFiles <<- files[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))]
+  designo<-read.table(paste(path,"data/design/design.txt",sep="/"),header = TRUE)
+  write.table(format(designo, justify="right"),sep="\t", quote=FALSE,
+            row.names=F, col.names=T,file="design.tsv")
+  return(list(gprFiles=gprFiles,designo=designo))
+} # reading (*.gpr) files
+
+
+getGalFiles <- function(path)
+{
+  files<-(dir(paste(path,"/data/rawfiles",sep="")))
+  galFiles <- files[c(grep("*.GAL$", files),grep("*.gal$", files))]
+  return(galFiles)
+} # extracting  (*.gal) files names  
+
+ReadGenePixTwoChannels<- function(path)
+{ 
+  files <<- getGprFiles(path)
+  RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep=""))
+  return(list(RFile=RFile,designo=files[[2]]))
+} # reading files 
+
+ReadGenePixOneChannel<- function(path)
+{ 
+  files <<- getGprFiles(path)
+  RFile <- read.maimages(files[[1]],source="genepix",path=paste(path,"/data/rawfiles",sep=""),green.only = T)
+  return(list(RFile=RFile,designo=files[[2]]))
+
+} 
+getGprFilesImported <- function(listcelsfullpath,listfullnames)
+{
+  files<-listfullnames
+  #gprFiles <<- listcelsfullpath[c(grep("*.GPR.GZ$", files),grep("*.gpr.gz$", files),grep("*.GPR$", files),grep("*.gpr$", files))]
+  gprFiles <<- listcelsfullpath
+   return(list(gprFiles=gprFiles))
+} # reading (*.gpr) files
+
+
+getGalFilesImported <- function(listcelsfullpath,listfullnames)
+{
+  files<-listfullnames
+  #galFiles <- listcelsfullpath[c(grep("*.GAL$", files),grep("*.gal$", files))]
+  galFiles <- listcelsfullpath
+  return(galFiles)
+} # extracting  (*.gal) files names  
+
+ReadGenePixTwoChannelsImported<- function(listcelsfullpath,listfullnames)
+{ 
+  files <<- getGprFilesImported(listcelsfullpath,listfullnames)
+  RFile <- read.maimages(files[[1]],source="genepix")
+  return(list(RFile=RFile))
+} # reading files 
+
+ReadGenePixOneChannelImported<- function(listcelsfullpath,listfullnames)
+{ 
+  files <<- getGprFilesImported(listcelsfullpath,listfullnames)
+  RFile <- read.maimages(files[[1]],source="genepix",green.only = T)
+  return(list(RFile=RFile))
+
+} 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/Readme.txt	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,1 @@
+Galaxy tool for making a design file and reading datasets as a binary rdata for the preprocessing step
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/citations.xml	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,66 @@
+<macros>
+  <token name="@VERSION@">1.0</token>
+  <xml name="R_citation">
+    <citations>
+        <citation type="bibtex">
+          @Manual{,
+    		title = {R: A Language and Environment for Statistical Computing},
+    		author = {{R Core Team}},
+    		organization = {R Foundation for Statistical Computing},
+    		address = {Vienna, Austria},
+    		year = {2017},
+    		url = {https://www.R-project.org/},
+  			}
+        </citation>
+        <citation type="bibtex">
+          @Article{,
+   		 title = {Passing in Command Line Arguments and Parallel Cluster/Multicore Batching in {R} with {batch}},
+    		author = {Thomas J. Hoffmann},
+    		journal = {Journal of Statistical Software, Code Snippets},
+    		year = {2011},
+    		volume = {39},
+    		number = {1},
+    		pages = {1--11},
+    		url = {http://www.jstatsoft.org/v39/c01/},
+  			}
+        </citation>
+        <citation type="bibtex">
+          @Article{,
+  		author = {Laurent Gautier and Leslie Cope and Benjamin M. Bolstad and Rafael A. Irizarry},
+  		title = {affy---analysis of Affymetrix GeneChip data at the probe level},
+  		journal = {Bioinformatics},
+  		volume = {20},
+  		number = {3},
+  		year = {2004},
+  		issn = {1367-4803},
+  		pages = {307--315},
+  		doi = {10.1093/bioinformatics/btg405},
+  		publisher = {Oxford University Press},
+  		address = {Oxford, UK},
+		}
+        </citation>
+        <citation type="bibtex">
+          @Article{,
+  		author = {Matthew E Ritchie and Belinda Phipson and Di Wu and Yifang Hu and Charity W Law and Wei Shi and Gordon K Smyth},
+  		title = {{limma} powers differential expression analyses for {RNA}-sequencing and microarray studies},
+  		journal = {Nucleic Acids Research},
+  		year = {2015},
+  		volume = {43},
+  		number = {7},
+  		pages = {e47},
+		}	
+        </citation>
+        <citation type="bibtex">
+          @Article{,
+  		title = {Quality assessment for short oligonucleotide arrays.},
+  		author = {Julia Brettschneider and Francois Collin and Benjamin M Bolstad and Terence P Speed},
+  		journal = {Technometrics},
+  		year = {2007},
+  		volume = {In press},
+		}
+        </citation>
+
+    </citations>
+  </xml>
+  
+</macros>
Binary file read_make_design_datasets/images/workflow.jpg has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_make_design_datasets/test-data/DataSet.Design.GenePix_Two_Colors.tsv	Fri Dec 01 09:58:18 2023 +0000
@@ -0,0 +1,9 @@
+                           sample	                            group
+                      GSM103772_1	    Col-0_MgSO4_24hpi_C_miniarray
+                      GSM103772_2	    Col-0_MgSO4_24hpi_C_miniarray
+                      GSM103773_1	    Col-0_MgSO4_24hpi_C_miniarray
+                      GSM103773_2	    Col-0_MgSO4_24hpi_C_miniarray
+                      GSM103774_1	Col-0_PsmES4326_24hpi_C_miniarray
+                      GSM103774_2	Col-0_PsmES4326_24hpi_C_miniarray
+                      GSM103775_1	Col-0_PsmES4326_24hpi_C_miniarray
+                      GSM103775_2	Col-0_PsmES4326_24hpi_C_miniarray
Binary file read_make_design_datasets/test-data/GSM103772_1 has changed
Binary file read_make_design_datasets/test-data/GSM103772_2 has changed
Binary file read_make_design_datasets/test-data/GSM103773_1 has changed
Binary file read_make_design_datasets/test-data/GSM103773_2 has changed
Binary file read_make_design_datasets/test-data/GSM103774_1 has changed
Binary file read_make_design_datasets/test-data/GSM103774_2 has changed
Binary file read_make_design_datasets/test-data/GSM103775_1 has changed
Binary file read_make_design_datasets/test-data/GSM103775_2 has changed
Binary file read_make_design_datasets/test-data/Read.Project.Data.RData has changed