changeset 0:4339db844d35 draft

Uploaded
author proteomisc
date Sun, 26 Nov 2023 18:42:20 +0000
parents
children 0e1a528e652b
files geo_query/GetDatasets.R geo_query/Get_Datasets.xml geo_query/Readme.txt geo_query/citations.xml geo_query/datatypetweek.sh geo_query/images/workflow.jpg geo_query/test-data/GSM103772_1.gpr.gz
diffstat 7 files changed, 216 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geo_query/GetDatasets.R	Sun Nov 26 18:42:20 2023 +0000
@@ -0,0 +1,31 @@
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+sink(stdout(), type = "message")
+suppressWarnings(suppressMessages(library("batch")))
+suppressWarnings(suppressMessages(library(GEOquery)))
+listArguments = parseCommandArgs(evaluate=FALSE)
+GeoCode=listArguments[["GeoCode"]]
+GeoCode=toupper(GeoCode)
+gethelp.df =suppressMessages(suppressWarnings(tryCatch(eList <- getGEOSuppFiles(GeoCode), error = function(cond)"skip")))
+if(is.null(gethelp.df)){
+  gethelp.df =suppressMessages(suppressWarnings(tryCatch(eList <- getGEOSuppFiles(GeoCode), error = function(cond)"skip")))
+}
+if(is.null(gethelp.df)){
+  write("GeoCode with no raw data, choose another code or retry.", stderr())
+}else{
+  if(is.data.frame(gethelp.df)==TRUE){
+    rawdata=rownames(eList)[grep('RAW',rownames(eList))][1]
+    untar(rawdata, exdir = 'CEL')
+     if (file.exists(rawdata)) {
+        file.remove(rawdata)
+     }
+  }else{
+    if(gethelp.df=="skip"){
+      write("Network trouble, try again or Check your geoCode.", stderr())
+      quit(status=2)
+    }else{
+      write("GeoCode with no raw data, choose another code or retry.", stderr())
+      quit(status=3)
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geo_query/Get_Datasets.xml	Sun Nov 26 18:42:20 2023 +0000
@@ -0,0 +1,118 @@
+<tool id="Get_Datasets" name="Query GEO Database" version="20181028">
+    <description> Query Gene Expression Omnibus (GEO) Database using an accession code. </description> 
+    <macros>
+        <import>citations.xml</import>
+    </macros>   
+    <requirements>
+        <requirement type="package" >r-base</requirement>
+        <requirement type="package" >r-batch</requirement>
+        <requirement type="package" version="2.50.5">bioconductor-geoquery</requirement>
+        <requirement type="package" version="1.14">libiconv</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1" level="fatal" description="Wrong accession code" />
+        <exit_code range="2" level="fatal" description="Network trouble, try again or check your GeoCode" />
+        <exit_code range="3" level="fatal" description="GeoCode with no raw data, choose another code or retry" />
+    </stdio>
+    <command>
+    <![CDATA[
+    Rscript '$__tool_directory__/GetDatasets.R' GeoCode $GeoCode;
+    sh '$__tool_directory__/datatypetweek.sh' 
+    
+    ]]>
+    </command> 
+    <inputs>
+    <param name="GeoCode"  type="text" value="GSE4632" label="GEO accession code" help="Gene Expression Omnibus (GEO) accession code">
+        <sanitizer>
+            <valid initial="string.printable,string.digits">
+            </valid>
+        </sanitizer>
+    </param>
+    </inputs>
+    <outputs>
+    <data format="" name="Raw files" >
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.(gpr|GPR)(\.gz)?"  directory="CEL" visible="true"  ext="gpr" assign_primary_output="false"/>
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.(cel|CEL)(\.gz)?"  directory="CEL" visible="true"   ext="cel" assign_primary_output="false" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.(gal|GAL)(\.gz)?"  directory="CEL" visible="true"  ext="gal" assign_primary_output="false" />
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.(txt|TXT)(\.gz)?"  directory="CEL" visible="true"   ext="txt" assign_primary_output="false" />
+    </data>
+    </outputs>
+    <tests>
+    <test>
+        <param name="GeoCode" value="GSE4632" />
+        <output name="Raw files">
+            <discovered_dataset designation="GSM103772_1" ftype="gpr" file="GSM103772_1.gpr.gz" compare="sim_size"/>
+        </output>
+    </test>
+    </tests>
+    <help>
+		
+.. class:: infomark
+
+**Authors**  Bensellak Taoufik bensellak@ensat.ac.ma, Ahmed Moussa. 
+
+---------------------------------------------------
+
+==================================================================
+Query Gene Expression Omnibus (GEO) Database.
+==================================================================
+
+-----------
+Description
+-----------
+
+This tool is used for querying Gene Expression Omnibus Database.
+
+**GEO accession code**
+
+GEO Accession code 
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
++------------------------+------------------+--------+-----------+
+| Name                   | output file      |format  | parameter |
++========================+==================+========+===========+
+| NA                     |  NA              | NA     | NA	 |			
++------------------------+------------------+--------+-----------+
+
+
+**Downstream tools**
+
++----------------------------------------------------------------+------------------------------------------------+--------------+
+| Name                                                           | Output file                                    | Format       |
++================================================================+================================================+==============+
+| Make design and read dataset                                   | Project information and design file            | RData,Tabular|
++----------------------------------------------------------------+------------------------------------------------+--------------+
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| Parameter : num + label   |   Format   |
++===========================+============+
+| GEO accession code        |   String   |
++---------------------------+------------+
+
+------------
+Output files
+------------
+
+**Set of raw samples**
+
+------------------------------
+General schema of the workflow
+------------------------------
+
+.. image:: ./workflow.jpg 
+          :height: 800 
+          :width: 700
+
+    </help>
+    <expand macro="R_citation">
+    </expand>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geo_query/Readme.txt	Sun Nov 26 18:42:20 2023 +0000
@@ -0,0 +1,1 @@
+Galaxy tool to query Gene Expression Omnibus (GEO) Database using an accession code.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geo_query/citations.xml	Sun Nov 26 18:42:20 2023 +0000
@@ -0,0 +1,40 @@
+<macros>
+  <token name="@VERSION@">1.0</token>
+  <xml name="R_citation">
+    <citations>
+        <citation type="bibtex">
+          @Manual{,
+    		title = {R: A Language and Environment for Statistical Computing},
+    		author = {{R Core Team}},
+    		organization = {R Foundation for Statistical Computing},
+    		address = {Vienna, Austria},
+    		year = {2017},
+    		url = {https://www.R-project.org/},
+  			}
+        </citation>
+        <citation type="bibtex">
+          @Article{,
+		author = {Sean Davis and Paul Meltzer},
+		title = {GEOquery: a bridge between the Gene Expression Omnibus (GEO) and BioConductor},
+		journal = {Bioinformatics},
+		year = {2007},
+		volume = {14},
+		pages = {1846--1847},
+   			}
+        </citation>
+        <citation type="bibtex">
+          @Article{,
+   		 title = {Passing in Command Line Arguments and Parallel Cluster/Multicore Batching in {R} with {batch}},
+    		author = {Thomas J. Hoffmann},
+    		journal = {Journal of Statistical Software, Code Snippets},
+    		year = {2011},
+    		volume = {39},
+    		number = {1},
+    		pages = {1--11},
+    		url = {http://www.jstatsoft.org/v39/c01/},
+  			}
+        </citation>
+    </citations>
+  </xml>
+  
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geo_query/datatypetweek.sh	Sun Nov 26 18:42:20 2023 +0000
@@ -0,0 +1,26 @@
+#! /bin/bash
+
+NON_UTF_FILE_DIR="CEL/"
+PATTERN_FILE_NAME="\( -iname \*.gpr.gz -o -iname \*.gpr \)"
+
+find $NON_UTF_FILE_DIR -type f \( -iname \*.gpr.gz -o -iname \*.gpr \) > utf8list
+iconv utf8list > asciilist
+i=1
+for file in $(cat utf8list); do
+	newname=$(head -$i asciilist | tail -1 | tr -d '\n')."utf8"
+	filenamegpr="${file%.*}"
+	filenamegprutf8="${file%.*}""utf8"
+	if file --mime-type "$file" | grep -q gzip$; then
+  		gunzip  $file;
+		iconv -f ISO-8859-1 -t UTF-8 $filenamegpr > $filenamegprutf8;
+		rm $filenamegpr
+		mv $filenamegprutf8 $filenamegpr
+		gzip $filenamegpr
+	else
+		iconv -f ISO-8859-1 -t UTF-8 $file > $newname;
+                mv $newname $file;
+	fi
+	i=$((i + 1))
+done
+
+rm utf8list asciilist
Binary file geo_query/images/workflow.jpg has changed
Binary file geo_query/test-data/GSM103772_1.gpr.gz has changed