Mercurial > repos > proteore > proteore_clusterprofiler

--- a/GO-enrich.R	Wed Feb 27 03:39:16 2019 -0500
+++ b/GO-enrich.R	Fri Jun 28 05:08:48 2019 -0400
@@ -44,7 +44,7 @@
   return (width)
 }

-repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
+repartition_GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
   ggo<-groupGO(gene=geneid,
                OrgDb = orgdb,
                ont=ontology,
@@ -66,7 +66,7 @@
 }

 # GO over-representation test
-enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
+enrich_GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
   ego<-enrichGO(gene=geneid,
                 universe=universe,
                 OrgDb=orgdb,
@@ -107,6 +107,15 @@
   }
 }

+clean_ids <- function(ids){
+  ids = gsub(" ","",ids)
+  ids = ids[which(ids!="")]
+  ids = ids[which(ids!="NA")]
+  ids = ids[!is.na(ids)]
+
+  return(ids)
+}
+
 check_ids <- function(vector,type) {
   uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
   entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
@@ -117,7 +126,7 @@
   }
 }

-clusterProfiler = function() {
+get_args <- function(){
   args <- commandArgs(TRUE)
   if(length(args)<1) {
     args <- c("--help")
@@ -153,10 +162,18 @@
   args <- as.list(as.character(argsDF$V2))
   names(args) <- argsDF$V1

+  return(args)
+}
+
+
+main <- function() {
+
+  #get args from command
+  args <- get_args()
+
   #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
   #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")

-
   go_represent=str2bool(args$go_represent)
   go_enrich=str2bool(args$go_enrich)
   if (go_enrich){
@@ -179,7 +196,7 @@
   id_type = args$id_type

   if (input_type == "text") {
-    input = strsplit(args$input, "[ \t\n]+")[[1]]
+    input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
   } else if (input_type == "file") {
     filename = args$input
     ncol = args$ncol
@@ -193,7 +210,7 @@
     file = read_file(filename, header)              # Extract Protein IDs list
     input =  unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
   }
-
+  input = clean_ids(input)

   ## Get input gene list from input IDs
   #ID format Conversion
@@ -222,7 +239,7 @@
     if (!is.null(args$universe_type)) {
       universe_type = args$universe_type
       if (universe_type == "text") {
-        universe = strsplit(args$universe, "[ \t\n]+")[[1]]
+        universe = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
       } else if (universe_type == "file") {
         universe_filename = args$universe
         universe_ncol = args$uncol
@@ -238,6 +255,7 @@
         # Extract Protein IDs list
         universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
       }
+      universe = clean_ids(input)
       universe_id_type = args$universe_id_type
       ##to initialize
       if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) {
@@ -265,14 +283,14 @@
   ##enrichGO : GO over-representation test
   for (onto in ontology) {
     if (go_represent) {
-      ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
+      ggo<-repartition_GO(gene, orgdb, onto, level, readable=TRUE)
       if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))}  #convert "" and " " to NA
       output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="")
       write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE )
     }

     if (go_enrich) {
-      ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
+      ego<-enrich_GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
       if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))}  #convert "" and " " to NA
       output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="")
       write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE )
@@ -280,4 +298,6 @@
   }
 }

-clusterProfiler()
+if(!interactive()) {
+  main()
+}
--- a/README.rst	Wed Feb 27 03:39:16 2019 -0500
+++ b/README.rst	Fri Jun 28 05:08:48 2019 -0400
@@ -40,6 +40,13 @@

 Text (tables) and graphics representing the repartition and/or enrichment of GO categories.

+**Packages used**
+    - bioconductor-org.hs.eg.db v3.5.0
+    - bioconductor-org.mm.eg.db v3.5.0
+    - bioconductor-org.rn.eg.db v3.5.0
+    - dose v3.2.0
+    - clusterpofiler v 3.4.4
+
 **User manual / Documentation** of the clusterProfiler R package (functions and parameters):
 https://bioconductor.org/packages/3.7/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html
 (Very well explained)
\ No newline at end of file
--- a/cluster_profiler.xml	Wed Feb 27 03:39:16 2019 -0500
+++ b/cluster_profiler.xml	Fri Jun 28 05:08:48 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="cluter_profiler" name="GO terms classification and enrichment analysis" version="2019.02.18">
+<tool id="cluter_profiler" name="GO terms classification and enrichment analysis" version="2019.06.27.1">
     <description>(Human, Mouse, Rat)[clusterProfiler]</description>
     <requirements>
         <requirement type="package" version="3.4.1">R</requirement>
@@ -56,7 +56,7 @@
     ]]></command>
     <inputs>
         <conditional name="input" >
-            <param name="ids" type="select" label="Enter your IDs (UniProt Accession numer or Gene ID)" help="Copy/paste or from a file (e.g. table)" >
+            <param name="ids" type="select" label="Enter your IDs (UniProt Accession number or Gene ID)" help="Copy/paste or from a file (e.g. table)" >
                 <option value="text">Copy/paste your IDs</option>
                 <option value="file" selected="true">Input file containing your IDs</option>
             </param>
@@ -82,8 +82,8 @@
         </conditional>
         <conditional name="idti" >
             <param name="idtypein" type="select" label="Select type/source of IDs" help="" >
-                <option value="Uniprot">UniProt accession number (e.g.:P31946)</option>
-                <option value="Entrez">Entrez Gene ID (e.g.:4151)</option>
+                <option value="Uniprot">UniProt accession number (e.g. P31946)</option>
+                <option value="Entrez">Entrez Gene ID (e.g. 4151)</option>
             </param>
             <when value="Uniprot"/>
             <when value="Entrez"/>
@@ -101,7 +101,7 @@
         <conditional name="ggo">
             <param name="go_represent" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Perform GO categories representation analysis?"/>
             <when value="true">
-                <param name="level" type="select" label="Ontology level (the higher this number, the deeper the GO level)">
+                <param name="level" type="select" label="Ontology level (the higher this number, the deeper the GO level, up to 3)">
 				    <option value="1">1</option>
 				    <option value="2" selected="True">2</option>
 				    <option value="3">3</option>
@@ -143,7 +143,7 @@
                             </when>
                         </conditional>
                         <conditional name="universe_idti" >
-                            <param name="universe_idtypein" type="select" label="Select type of background IDs" help="" >
+                            <param name="universe_idtypein" type="select" label="Select type of IDs of your background" help="" >
                                 <option value="Uniprot">UniProt Accession number</option>
                                 <option value="Entrez">Entrez Gene ID</option>
                             </param>
@@ -238,7 +238,11 @@

 Two modes are allowed: either by supplying a tabular file (.csv, .tsv, .txt, .tab) including your IDs (identifiers) or by copy/pasting your IDs (separated by a space).

-"Select type/source of IDs": only entrez gene ID (e.g : 4151, 7412) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE.
+"Select type/source of IDs": only entrez gene ID (e.g. 4151, 7412) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE.
+
+.. class:: warningmark
+
+In copy/paste mode, the number of IDs considered in input is limited to 5000.

 -----

@@ -272,6 +276,7 @@
 -----

 **Authors**
+
 G Yu, LG Wang, Y Han, QY He. clusterProfiler: an R package for comparing biological themes among gene clusters.
 OMICS: A Journal of Integrative Biology 2012, 16(5):284-287. doi:[10.1089/omi.2011.0118](http://dx.doi.org/10.1089/omi.2011.0118)

@@ -282,15 +287,25 @@

 .. class:: infomark

+Bioconductor Packages used:
+
+    - bioconductor-org.hs.eg.db v3.5.0
+    - bioconductor-org.mm.eg.db v3.5.0
+    - bioconductor-org.rn.eg.db v3.5.0
+    - dose v3.2.0
+    - clusterprofiler v 3.4.4
+
+.. class:: infomark
+
 **Galaxy integration**

-T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

 This work has been partially funded through the French National Agency for Research (ANR) IFB project.

-Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
+Help: contact@proteore.org for any questions or concerns about this tool.


     ]]></help>