Mercurial > repos > proteore > proteore_clusterprofiler
changeset 10:d951677a50d4 draft
planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty
author | proteore |
---|---|
date | Fri, 28 Jun 2019 05:08:48 -0400 |
parents | 2f67202ffdb3 |
children | cc2bd0d2afa2 |
files | GO-enrich.R README.rst cluster_profiler.xml |
diffstat | 3 files changed, 61 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/GO-enrich.R Wed Feb 27 03:39:16 2019 -0500 +++ b/GO-enrich.R Fri Jun 28 05:08:48 2019 -0400 @@ -44,7 +44,7 @@ return (width) } -repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { +repartition_GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) { ggo<-groupGO(gene=geneid, OrgDb = orgdb, ont=ontology, @@ -66,7 +66,7 @@ } # GO over-representation test -enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { +enrich_GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) { ego<-enrichGO(gene=geneid, universe=universe, OrgDb=orgdb, @@ -107,6 +107,15 @@ } } +clean_ids <- function(ids){ + ids = gsub(" ","",ids) + ids = ids[which(ids!="")] + ids = ids[which(ids!="NA")] + ids = ids[!is.na(ids)] + + return(ids) +} + check_ids <- function(vector,type) { uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$" entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$" @@ -117,7 +126,7 @@ } } -clusterProfiler = function() { +get_args <- function(){ args <- commandArgs(TRUE) if(length(args)<1) { args <- c("--help") @@ -153,10 +162,18 @@ args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 + return(args) +} + + +main <- function() { + + #get args from command + args <- get_args() + #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda") - go_represent=str2bool(args$go_represent) go_enrich=str2bool(args$go_enrich) if (go_enrich){ @@ -179,7 +196,7 @@ id_type = args$id_type if (input_type == "text") { - input = strsplit(args$input, "[ \t\n]+")[[1]] + input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";")) } else if (input_type == "file") { filename = args$input ncol = args$ncol @@ -193,7 +210,7 @@ file = read_file(filename, header) # Extract Protein IDs list input = unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) } - + input = clean_ids(input) ## Get input gene list from input IDs #ID format Conversion @@ -222,7 +239,7 @@ if (!is.null(args$universe_type)) { universe_type = args$universe_type if (universe_type == "text") { - universe = strsplit(args$universe, "[ \t\n]+")[[1]] + universe = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";")) } else if (universe_type == "file") { universe_filename = args$universe universe_ncol = args$uncol @@ -238,6 +255,7 @@ # Extract Protein IDs list universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE)) } + universe = clean_ids(input) universe_id_type = args$universe_id_type ##to initialize if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) { @@ -265,14 +283,14 @@ ##enrichGO : GO over-representation test for (onto in ontology) { if (go_represent) { - ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) + ggo<-repartition_GO(gene, orgdb, onto, level, readable=TRUE) if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="") write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE ) } if (go_enrich) { - ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) + ego<-enrich_GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot) if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))} #convert "" and " " to NA output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="") write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE ) @@ -280,4 +298,6 @@ } } -clusterProfiler() +if(!interactive()) { + main() +}
--- a/README.rst Wed Feb 27 03:39:16 2019 -0500 +++ b/README.rst Fri Jun 28 05:08:48 2019 -0400 @@ -40,6 +40,13 @@ Text (tables) and graphics representing the repartition and/or enrichment of GO categories. +**Packages used** + - bioconductor-org.hs.eg.db v3.5.0 + - bioconductor-org.mm.eg.db v3.5.0 + - bioconductor-org.rn.eg.db v3.5.0 + - dose v3.2.0 + - clusterpofiler v 3.4.4 + **User manual / Documentation** of the clusterProfiler R package (functions and parameters): https://bioconductor.org/packages/3.7/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html (Very well explained) \ No newline at end of file
--- a/cluster_profiler.xml Wed Feb 27 03:39:16 2019 -0500 +++ b/cluster_profiler.xml Fri Jun 28 05:08:48 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="cluter_profiler" name="GO terms classification and enrichment analysis" version="2019.02.18"> +<tool id="cluter_profiler" name="GO terms classification and enrichment analysis" version="2019.06.27.1"> <description>(Human, Mouse, Rat)[clusterProfiler]</description> <requirements> <requirement type="package" version="3.4.1">R</requirement> @@ -56,7 +56,7 @@ ]]></command> <inputs> <conditional name="input" > - <param name="ids" type="select" label="Enter your IDs (UniProt Accession numer or Gene ID)" help="Copy/paste or from a file (e.g. table)" > + <param name="ids" type="select" label="Enter your IDs (UniProt Accession number or Gene ID)" help="Copy/paste or from a file (e.g. table)" > <option value="text">Copy/paste your IDs</option> <option value="file" selected="true">Input file containing your IDs</option> </param> @@ -82,8 +82,8 @@ </conditional> <conditional name="idti" > <param name="idtypein" type="select" label="Select type/source of IDs" help="" > - <option value="Uniprot">UniProt accession number (e.g.:P31946)</option> - <option value="Entrez">Entrez Gene ID (e.g.:4151)</option> + <option value="Uniprot">UniProt accession number (e.g. P31946)</option> + <option value="Entrez">Entrez Gene ID (e.g. 4151)</option> </param> <when value="Uniprot"/> <when value="Entrez"/> @@ -101,7 +101,7 @@ <conditional name="ggo"> <param name="go_represent" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Perform GO categories representation analysis?"/> <when value="true"> - <param name="level" type="select" label="Ontology level (the higher this number, the deeper the GO level)"> + <param name="level" type="select" label="Ontology level (the higher this number, the deeper the GO level, up to 3)"> <option value="1">1</option> <option value="2" selected="True">2</option> <option value="3">3</option> @@ -143,7 +143,7 @@ </when> </conditional> <conditional name="universe_idti" > - <param name="universe_idtypein" type="select" label="Select type of background IDs" help="" > + <param name="universe_idtypein" type="select" label="Select type of IDs of your background" help="" > <option value="Uniprot">UniProt Accession number</option> <option value="Entrez">Entrez Gene ID</option> </param> @@ -238,7 +238,11 @@ Two modes are allowed: either by supplying a tabular file (.csv, .tsv, .txt, .tab) including your IDs (identifiers) or by copy/pasting your IDs (separated by a space). -"Select type/source of IDs": only entrez gene ID (e.g : 4151, 7412) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE. +"Select type/source of IDs": only entrez gene ID (e.g. 4151, 7412) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE. + +.. class:: warningmark + +In copy/paste mode, the number of IDs considered in input is limited to 5000. ----- @@ -272,6 +276,7 @@ ----- **Authors** + G Yu, LG Wang, Y Han, QY He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology 2012, 16(5):284-287. doi:[10.1089/omi.2011.0118](http://dx.doi.org/10.1089/omi.2011.0118) @@ -282,15 +287,25 @@ .. class:: infomark +Bioconductor Packages used: + + - bioconductor-org.hs.eg.db v3.5.0 + - bioconductor-org.mm.eg.db v3.5.0 + - bioconductor-org.rn.eg.db v3.5.0 + - dose v3.2.0 + - clusterprofiler v 3.4.4 + +.. class:: infomark + **Galaxy integration** -T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR +Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR This work has been partially funded through the French National Agency for Research (ANR) IFB project. -Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. +Help: contact@proteore.org for any questions or concerns about this tool. ]]></help>