# HG changeset patch # User proteore # Date 1522338208 14400 # Node ID 5e16cec551464f5a3cbc192d91b183593817b79e # Parent 8a91f58782df0c0f65bb20d94a77ba92729d85e2 planemo upload commit 2da0aec067fd35a8ec102ce27ec4bac8f54b1c30-dirty diff -r 8a91f58782df -r 5e16cec55146 GO-enrich.R --- a/GO-enrich.R Fri Mar 23 10:01:41 2018 -0400 +++ b/GO-enrich.R Thu Mar 29 11:43:28 2018 -0400 @@ -39,8 +39,9 @@ } # GO over-representation test -enrich.GO <- function(geneid, orgdb, ontology, pval_cutoff, qval_cutoff) { +enrich.GO <- function(geneid, universe, orgdb, ontology, pval_cutoff, qval_cutoff) { ego<-enrichGO(gene=geneid, + universe=universe, OrgDb=orgdb, keytype="ENTREZID", ont=ontology, @@ -48,6 +49,7 @@ pvalueCutoff=pval_cutoff, qvalueCutoff=qval_cutoff, readable=TRUE) + # Plot bar & dot plots bar_name <- paste("EGO.", ontology, ".bar.png", sep = "") png(bar_name) p <- barplot(ego) @@ -73,9 +75,14 @@ Arguments: --input_type: type of input (list of id or filename) --input: input - --ncol: the column number which you would like to apply... + --ncol: the column number which contains list of input IDs --header: true/false if your file contains a header --id_type: the type of input IDs (UniProt/EntrezID) + --universe_type: list or filename + --universe: background IDs list + --uncol: the column number which contains background IDs list + --uheader: true/false if the background IDs file contains header + --universe_id_type: the type of universe IDs (UniProt/EntrezID) --species --onto_opt: ontology options --go_function: groupGO/enrichGO @@ -90,7 +97,20 @@ argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) args <- as.list(as.character(argsDF$V2)) names(args) <- argsDF$V1 + #print(args) + # Extract OrgDb + if (args$species=="human") { + orgdb<-org.Hs.eg.db + } + else if (args$species=="mouse") { + orgdb<-org.Mm.eg.db + } + else if (args$species=="rat") { + orgdb<-org.Rn.eg.db + } + + # Extract input IDs input_type = args$input_type if (input_type == "text") { input = strsplit(args$input, "[ \t\n]+")[[1]] @@ -100,7 +120,7 @@ ncol = args$ncol # Check ncol if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { - stop("Please enter an integer for level") + stop("Please enter the right format for column number: c[number]") } else { ncol = as.numeric(gsub("c", "", ncol)) @@ -115,33 +135,22 @@ } } id_type = args$id_type - - + ## Get input gene list from input IDs #ID format Conversion #This case : from UNIPROT (protein id) to ENTREZ (gene id) #bitr = conversion function from clusterProfiler - - if (args$species=="human") { - orgdb<-org.Hs.eg.db - } - else if (args$species=="mouse") { - orgdb<-org.Mm.eg.db - } - else if (args$species=="rat") { - orgdb<-org.Rn.eg.db - } - - ##to initialize if (id_type=="Uniprot") { idFrom<-"UNIPROT" idTo<-"ENTREZID" gene<-bitr(input, fromType=idFrom, toType=idTo, OrgDb=orgdb) + gene<-unique(gene$ENTREZID) } else if (id_type=="Entrez") { - gene<-input + gene<-unique(input) } ontology <- strsplit(args$onto_opt, ",")[[1]] + ## Extract GGO/EGO arguments if (args$go_represent == "true") { go_represent <- args$go_represent level <- as.numeric(args$level) @@ -150,16 +159,56 @@ go_enrich <- args$go_enrich pval_cutoff <- as.numeric(args$pval_cutoff) qval_cutoff <- as.numeric(args$qval_cutoff) + # Extract universe background genes (same as input file) + if (!is.null(args$universe_type)) { + universe_type = args$universe_type + if (universe_type == "text") { + universe = strsplit(args$universe, "[ \t\n]+")[[1]] + } + else if (universe_type == "file") { + universe_filename = args$universe + universe_ncol = args$uncol + # Check ncol + if (! as.numeric(gsub("c", "", universe_ncol)) %% 1 == 0) { + stop("Please enter the right format for column number: c[number]") + } + else { + universe_ncol = as.numeric(gsub("c", "", universe_ncol)) + } + universe_header = args$uheader + # Get file content + universe_file = readfile(universe_filename, universe_header) + # Extract Protein IDs list + universe = c() + for (row in as.character(universe_file[,universe_ncol])) { + universe = c(universe, strsplit(row, ";")[[1]][1]) + } + } + universe_id_type = args$universe_id_type + ##to initialize + if (universe_id_type=="Uniprot") { + idFrom<-"UNIPROT" + idTo<-"ENTREZID" + universe_gene<-bitr(universe, fromType=idFrom, toType=idTo, OrgDb=orgdb) + universe_gene<-unique(universe_gene$ENTREZID) + } + else if (universe_id_type=="Entrez") { + universe_gene<-unique(universe) + } + } + else { + universe_gene = NULL + } } ##enrichGO : GO over-representation test for (onto in ontology) { if (args$go_represent == "true") { - ggo<-repartition.GO(gene$ENTREZID, orgdb, onto, level, readable=TRUE) + ggo<-repartition.GO(gene, orgdb, onto, level, readable=TRUE) write.table(ggo, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) } if (args$go_enrich == "true") { - ego<-enrich.GO(gene$ENTREZID, orgdb, onto, pval_cutoff, qval_cutoff) + ego<-enrich.GO(gene, universe_gene, orgdb, onto, pval_cutoff, qval_cutoff) write.table(ego, args$text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) } } diff -r 8a91f58782df -r 5e16cec55146 cluster_profiler.xml --- a/cluster_profiler.xml Fri Mar 23 10:01:41 2018 -0400 +++ b/cluster_profiler.xml Thu Mar 29 11:43:28 2018 -0400 @@ -36,6 +36,18 @@ --go_enrich="true" --pval_cutoff="$ego.pval" --qval_cutoff="$ego.qval" + #if $ego.universe.universe_option == "true" + #if $ego.universe.universe_input.universe_ids == "text" + --universe_type="text" + --universe="$ego.universe.universe_input.txt" + #else + --universe_type="file" + --universe="$ego.universe.universe_input.file" + --uncol="$ego.universe.universe_input.ncol" + --uheader="$ego.universe.universe_input.header" + #end if + --universe_id_type="$ego.universe.universe_idti.universe_idtypein" + #end if #else --go_enrich="false" #end if @@ -76,6 +88,7 @@ + @@ -97,6 +110,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +