Mercurial > repos > proteore > proteore_goprofiles
diff goprofiles.R @ 1:1236ee08ccb8 draft
planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
author | proteore |
---|---|
date | Fri, 16 Feb 2018 03:40:36 -0500 |
parents | d89c09253c8d |
children | 58a8ddd58dde |
line wrap: on
line diff
--- a/goprofiles.R Sun Nov 26 19:19:39 2017 -0500 +++ b/goprofiles.R Fri Feb 16 03:40:36 2018 -0500 @@ -5,16 +5,12 @@ # Read file and return file content as data.frame? readfile = function(filename, header) { if (header == "true") { - # Read only the first two lines of the files as data (without headers): + # Read only the first line of the files as data (without headers): headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) - #print("header") - #print(headers) - # Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here : - #headers_names <- sapply(headers, paste, collapse = "_") - #print(headers_names) - #Read the data of the files (skipping the first 2 rows): + #Read the data of the files (skipping the first row): file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) - #print(file[1,]) + # Remove empty rows + file <- file[!apply(is.na(file) | file == "", 1, all),] #And assign the headers of step two to the data: names(file) <- headers } @@ -24,10 +20,6 @@ return(file) } -#filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt" -#test = readfile(filename) -#str(test) -#str(test$Gene.names) getprofile = function(ids, id_type, level, duplicate) { #################################################################### # Arguments @@ -64,27 +56,6 @@ print("IDs unable to convert to ENTREZID: ") print(NAs) } - #print(genes_ids) - # Convert Protein IDs into entrez ids - - # for (i in 1:length(id$UNIPROT)) { - # print(i) - # if (is.na(id[[2]][i])) { - # print(id[[2]][i]) - # } - # } - # a = id[which(id$ENTREZID == "NA"),] - # print(a) - # print(a$UNIPROT) - #print(id[[1]][which(is.na(id$ENTREZID))]) - #print(genes_ids) - # for (gene in genes) { - # #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA)) - # id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT") - # print(id) - # genes_ids = append(genes_ids, id$ENTREZID) - # } - #print(genes_ids) # Create basic profiles profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T) @@ -172,103 +143,117 @@ } goprofiles = function() { - args = commandArgs(trailingOnly = TRUE) - #print(args) - # arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title" - if (length(args) != 9) { - stop("Not enough/Too many arguments", call. = FALSE) + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") } - else { - input_type = args[2] - if (input_type == "text") { - input = strsplit(args[1], "\\s+")[[1]] - } - else if (input_type == "file") { - filename = strsplit(args[1], ",")[[1]][1] - ncol = strsplit(args[1], ",")[[1]][2] - # Check ncol - if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { - stop("Please enter an integer for level") - } - else { - ncol = as.numeric(gsub("c", "", ncol)) - } - header = strsplit(args[1], ",")[[1]][3] - # Get file content - file = readfile(filename, header) - # Extract Protein IDs list - input = c() - for (row in as.character(file[,ncol])) { - input = c(input, strsplit(row, ";")[[1]][1]) - } - } - id_type = args[3] - ontoopt = strsplit(args[4], ",")[[1]] - #print(ontoopt) - #plotopt = strsplit(args[3], ",") - plotopt = args[5] - level = args[6] - per = as.logical(args[7]) - title = args[8] - duplicate = args[9] - - profiles = getprofile(input, id_type, level, duplicate) - profile.CC = profiles[1] - #print(profile.CC) - profile.MF = profiles[2] - #print(profile.MF) - profile.BP = profiles[3] - #print(profile.BP) - profile.ALL = profiles[-3:-1] - #print(profile.ALL) - #c(profile.ALL, profile.CC, profile.MF, profile.BP) - if ("CC" %in% ontoopt) { - if (grepl("PNG", plotopt)) { - plotPNG(profile.CC=profile.CC, per=per, title=title) - } - if (grepl("JPEG", plotopt)) { - plotJPEG(profile.CC = profile.CC, per=per, title=title) - } - if (grepl("PDF", plotopt)) { - plotPDF(profile.CC = profile.CC, per=per, title=title) - } - } - if ("MF" %in% ontoopt) { - if (grepl("PNG", plotopt)) { - plotPNG(profile.MF = profile.MF, per=per, title=title) - } - if (grepl("JPEG", plotopt)) { - plotJPEG(profile.MF = profile.MF, per=per, title=title) - } - if (grepl("PDF", plotopt)) { - plotPDF(profile.MF = profile.MF, per=per, title=title) - } - } - if ("BP" %in% ontoopt) { - if (grepl("PNG", plotopt)) { - plotPNG(profile.BP = profile.BP, per=per, title=title) - } - if (grepl("JPEG", plotopt)) { - plotJPEG(profile.BP = profile.BP, per=per, title=title) - } - if (grepl("PDF", plotopt)) { - plotPDF(profile.BP = profile.BP, per=per, title=title) - } - } - - #if (grepl("PNG", plotopt)) { - # plotPNG(profile.ALL = profile.ALL, per=per, title=title) - #} - #if (grepl("JPEG", plotopt)) { - # plotJPEG(profile.ALL = profile.ALL, per=per, title=title) - #} - #if (grepl("PDF", plotopt)) { - # plotPDF(profile.ALL = profile.ALL, per=per, title=title) - #} + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --input_type: type of input (list of id or filename) + --input: input + --ncol: the column number which you would like to apply... + --header: true/false if your file contains a header + --id_type: the type of input IDs (UniProt/EntrezID) + --onto_opt: ontology options + --plot_opt: plot extension options (PDF/JPEG/PNG) + --level: 1-3 + --per + --title: title of the plot + --duplicate: remove dupliate input IDs (true/false) + --text_output: text output filename \n") + q(save="no") } + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + input_type = args$input_type + if (input_type == "text") { + input = strsplit(args$input, " ")[[1]] + } + else if (input_type == "file") { + filename = args$input + ncol = args$ncol + # Check ncol + if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { + stop("Please enter an integer for level") + } + else { + ncol = as.numeric(gsub("c", "", ncol)) + } + header = args$header + # Get file content + file = readfile(filename, header) + # Extract Protein IDs list + input = c() + for (row in as.character(file[,ncol])) { + input = c(input, strsplit(row, ";")[[1]][1]) + } + } + id_type = args$id_type + ontoopt = strsplit(args$onto_opt, ",")[[1]] + #print(ontoopt) + #plotopt = strsplit(args[3], ",") + plotopt = args$plot_opt + level = args$level + per = as.logical(args$per) + title = args$title + duplicate = args$duplicate + text_output = args$text_output + + profiles = getprofile(input, id_type, level, duplicate) + profile.CC = profiles[1] + #print(profile.CC) + profile.MF = profiles[2] + #print(profile.MF) + profile.BP = profiles[3] + #print(profile.BP) + profile.ALL = profiles[-3:-1] + #print(profile.ALL) + #c(profile.ALL, profile.CC, profile.MF, profile.BP) + + if ("CC" %in% ontoopt) { + write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + if (grepl("PNG", plotopt)) { + plotPNG(profile.CC=profile.CC, per=per, title=title) + } + if (grepl("JPEG", plotopt)) { + plotJPEG(profile.CC = profile.CC, per=per, title=title) + } + if (grepl("PDF", plotopt)) { + plotPDF(profile.CC = profile.CC, per=per, title=title) + } + } + if ("MF" %in% ontoopt) { + write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + if (grepl("PNG", plotopt)) { + plotPNG(profile.MF = profile.MF, per=per, title=title) + } + if (grepl("JPEG", plotopt)) { + plotJPEG(profile.MF = profile.MF, per=per, title=title) + } + if (grepl("PDF", plotopt)) { + plotPDF(profile.MF = profile.MF, per=per, title=title) + } + } + if ("BP" %in% ontoopt) { + write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE) + if (grepl("PNG", plotopt)) { + plotPNG(profile.BP = profile.BP, per=per, title=title) + } + if (grepl("JPEG", plotopt)) { + plotJPEG(profile.BP = profile.BP, per=per, title=title) + } + if (grepl("PDF", plotopt)) { + plotPDF(profile.BP = profile.BP, per=per, title=title) + } + } } goprofiles() - -#Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"