proteore_clusterprofiler: GO-enrich.R comparison

comparison GO-enrich.R @ 10:d951677a50d4 draft

planemo upload commit 4ba1ebe7b3f5e3fabf78b5fed7ed0b92e2cbf9e5-dirty

author	proteore
date	Fri, 28 Jun 2019 05:08:48 -0400
parents	2f67202ffdb3
children	f6107b8ae8f8

comparison

equal deleted inserted replaced

-:2f67202ffdb3
+:d951677a50d4
 width=1000
 }
 return (width)
 }
-repartition.GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
+repartition_GO <- function(geneid, orgdb, ontology, level=3, readable=TRUE) {
 ggo<-groupGO(gene=geneid,
 OrgDb = orgdb,
 ont=ontology,
 level=level,
 readable=TRUE)
 return(ggo)
 }
 }
 # GO over-representation test
-enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
+enrich_GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff,plot) {
 ego<-enrichGO(gene=geneid,
 universe=universe,
 OrgDb=orgdb,
 ont=ontology,
 pAdjustMethod="BH",
 } else {
 warning(paste("No Go terms enriched (EGO) found for ",ontology,"ontology"),immediate. = TRUE,noBreaks. = TRUE,call. = FALSE)
 }
 }
+clean_ids <- function(ids){
+ids = gsub(" ","",ids)
+ids = ids[which(ids!="")]
+ids = ids[which(ids!="NA")]
+ids = ids[!is.na(ids)]
+return(ids)
+}
 check_ids <- function(vector,type) {
 uniprot_pattern = "^([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})$"
 entrez_id = "^([0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+)$"
 if (type == "entrez")
 return(grepl(entrez_id,vector))
 else if (type == "uniprot") {
 return(grepl(uniprot_pattern,vector))
 }
 }
-clusterProfiler = function() {
+get_args <- function(){
 args <- commandArgs(TRUE)
 if(length(args)<1) {
 args <- c("--help")
 }
 parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
 argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
 args <- as.list(as.character(argsDF$V2))
 names(args) <- argsDF$V1
+return(args)
+}
+main <- function() {
+#get args from command
+args <- get_args()
 #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
 #load("/home/dchristiany/proteore_project/ProteoRE/tools/cluster_profiler/args.Rda")
 go_represent=str2bool(args$go_represent)
 go_enrich=str2bool(args$go_enrich)
 if (go_enrich){
 plot = unlist(strsplit(args$plot,","))
 # Extract input IDs
 input_type = args$input_type
 id_type = args$id_type
 if (input_type == "text") {
-input = strsplit(args$input, "[ \t\n]+")[[1]]
+input = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
 } else if (input_type == "file") {
 filename = args$input
 ncol = args$ncol
 # Check ncol
 if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
 }
 header = str2bool(args$header)                  # Get file content
 file = read_file(filename, header)              # Extract Protein IDs list
 input =  unlist(sapply(as.character(file[,ncol]),function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
 }
+input = clean_ids(input)
 ## Get input gene list from input IDs
 #ID format Conversion
 #This case : from UNIPROT (protein id) to ENTREZ (gene id)
 #bitr = conversion function from clusterProfiler
 qval_cutoff <- as.numeric(args$qval_cutoff)
 # Extract universe background genes (same as input file)
 if (!is.null(args$universe_type)) {
 universe_type = args$universe_type
 if (universe_type == "text") {
-universe = strsplit(args$universe, "[ \t\n]+")[[1]]
+universe = unlist(strsplit(strsplit(args$input, "[ \t\n]+")[[1]],";"))
 } else if (universe_type == "file") {
 universe_filename = args$universe
 universe_ncol = args$uncol
 # Check ncol
 if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) {
 # Get file content
 universe_file = read_file(universe_filename, universe_header)
 # Extract Protein IDs list
 universe <- unlist(sapply(universe_file[,universe_ncol], function(x) rapply(strsplit(x,";"),c),USE.NAMES = FALSE))
 }
+universe = clean_ids(input)
 universe_id_type = args$universe_id_type
 ##to initialize
 if (universe_id_type=="Uniprot" & any(check_ids(universe,"uniprot"))) {
 idFrom<-"UNIPROT"
 idTo<-"ENTREZID"
 }
 ##enrichGO : GO over-representation test
 for (onto in ontology) {
 if (go_represent) {
-ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE)
+ggo<-repartition_GO(gene, orgdb, onto, level, readable=TRUE)
 if (is.list(ggo)){ggo <- as.data.frame(apply(ggo, c(1,2), function(x) gsub("^$|^ $", NA, x)))}  #convert "" and " " to NA
 output_path = paste("cluster_profiler_GGO_",onto,".tsv",sep="")
 write.table(ggo, output_path, sep="\t", row.names = FALSE, quote = FALSE )
 }
 if (go_enrich) {
-ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
+ego<-enrich_GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff,plot)
 if (is.list(ego)){ego <- as.data.frame(apply(ego, c(1,2), function(x) gsub("^$|^ $", NA, x)))}  #convert "" and " " to NA
 output_path = paste("cluster_profiler_EGO_",onto,".tsv",sep="")
 write.table(ego, output_path, sep="\t", row.names = FALSE, quote = FALSE )
 }
 }
 }
-clusterProfiler()
+if(!interactive()) {
+main()
+}

Mercurial > repos > proteore > proteore_clusterprofiler

comparison GO-enrich.R @ 10:d951677a50d4 draft