proteore_goprofiles: goprofiles.R comparison

comparison goprofiles.R @ 1:1236ee08ccb8 draft

planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty

author	proteore
date	Fri, 16 Feb 2018 03:40:36 -0500
parents	d89c09253c8d
children	58a8ddd58dde

comparison

equal deleted inserted replaced

-:d89c09253c8d
+:1236ee08ccb8
 library("goProfiles", quietly=TRUE)
 # Read file and return file content as data.frame?
 readfile = function(filename, header) {
 if (header == "true") {
-# Read only the first two lines of the files as data (without headers):
+# Read only the first line of the files as data (without headers):
 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
-#print("header")
+#Read the data of the files (skipping the first row):
-#print(headers)
-# Create the headers names with the two (or more) first rows, sappy allows to make operations over the columns (in this case paste) - read more about sapply here :
-#headers_names <- sapply(headers, paste, collapse = "_")
-#print(headers_names)
-#Read the data of the files (skipping the first 2 rows):
 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
-#print(file[1,])
+# Remove empty rows
+file <- file[!apply(is.na(file) | file == "", 1, all),]
 #And assign the headers of step two to the data:
 names(file) <- headers
 }
 else {
 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE)
 }
 return(file)
 }
-#filename = "/Users/LinCun/Documents/ProteoRE/usecase1/Check/HPA.Selection.134.txt"
-#test = readfile(filename)
-#str(test)
-#str(test$Gene.names)
 getprofile = function(ids, id_type, level, duplicate) {
 ####################################################################
 # Arguments
 #   - ids: list of input IDs
 #   - id_type: type of input IDs (UniProt/ENTREZID)
 # IDs that have NA ENTREZID
 NAs = id$UNIPROT[which(is.na(id$ENTREZID))]
 print("IDs unable to convert to ENTREZID: ")
 print(NAs)
 }
-#print(genes_ids)
-# Convert Protein IDs into entrez ids
-# for (i in 1:length(id$UNIPROT)) {
-#   print(i)
-#   if (is.na(id[[2]][i])) {
-#     print(id[[2]][i])
-#   }
-# }
-# a = id[which(id$ENTREZID == "NA"),]
-# print(a)
-# print(a$UNIPROT)
-#print(id[[1]][which(is.na(id$ENTREZID))])
-#print(genes_ids)
-# for (gene in genes) {
-#   #id = as.character(mget(gene, org.Hs.egALIAS2EG, ifnotfound = NA))
-#   id = select(org.Hs.eg.db, genes, "ENTREZID", "UNIPROT")
-#   print(id)
-#   genes_ids = append(genes_ids, id$ENTREZID)
-# }
-#print(genes_ids)
 # Create basic profiles
 profile.CC = basicProfile(genes_ids, onto='CC', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
 profile.BP = basicProfile(genes_ids, onto='BP', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
 profile.MF = basicProfile(genes_ids, onto='MF', level=level, orgPackage="org.Hs.eg.db", empty.cats=F, ord=T, na.rm=T)
 dev.off()
 }
 }
 goprofiles = function() {
-args = commandArgs(trailingOnly = TRUE)
+args <- commandArgs(TRUE)
-#print(args)
+if(length(args)<1) {
-# arguments: filename.R inputfile ncol "CC,MF,BP,ALL" "PNG,JPEG,PDF" level "TRUE"(percentage) "Title"
+args <- c("--help")
-if (length(args) != 9) {
+}
-stop("Not enough/Too many arguments", call. = FALSE)
-}
+# Help section
-else {
+if("--help" %in% args) {
-input_type = args[2]
+cat("Selection and Annotation HPA
-if (input_type == "text") {
+Arguments:
-input = strsplit(args[1], "\\s+")[[1]]
+--input_type: type of input (list of id or filename)
-}
+--input: input
-else if (input_type == "file") {
+--ncol: the column number which you would like to apply...
-filename = strsplit(args[1], ",")[[1]][1]
+--header: true/false if your file contains a header
-ncol = strsplit(args[1], ",")[[1]][2]
+--id_type: the type of input IDs (UniProt/EntrezID)
-# Check ncol
+--onto_opt: ontology options
-if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+--plot_opt: plot extension options (PDF/JPEG/PNG)
-stop("Please enter an integer for level")
+--level: 1-3
-}
+--per
-else {
+--title: title of the plot
-ncol = as.numeric(gsub("c", "", ncol))
+--duplicate: remove dupliate input IDs (true/false)
-}
+--text_output: text output filename \n")
-header = strsplit(args[1], ",")[[1]][3]
+q(save="no")
-# Get file content
+}
-file = readfile(filename, header)
-# Extract Protein IDs list
+# Parse arguments
-input = c()
+parseArgs <- function(x) strsplit(sub("^--", "", x), "=")
-for (row in as.character(file[,ncol])) {
+argsDF <- as.data.frame(do.call("rbind", parseArgs(args)))
-input = c(input, strsplit(row, ";")[[1]][1])
+args <- as.list(as.character(argsDF$V2))
-}
+names(args) <- argsDF$V1
-}
-id_type = args[3]
+input_type = args$input_type
-ontoopt = strsplit(args[4], ",")[[1]]
+if (input_type == "text") {
-#print(ontoopt)
+input = strsplit(args$input, " ")[[1]]
-#plotopt = strsplit(args[3], ",")
+}
-plotopt = args[5]
+else if (input_type == "file") {
-level = args[6]
+filename = args$input
-per = as.logical(args[7])
+ncol = args$ncol
-title = args[8]
+# Check ncol
-duplicate = args[9]
+if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) {
+stop("Please enter an integer for level")
-profiles = getprofile(input, id_type, level, duplicate)
+}
-profile.CC = profiles[1]
+else {
-#print(profile.CC)
+ncol = as.numeric(gsub("c", "", ncol))
-profile.MF = profiles[2]
+}
-#print(profile.MF)
+header = args$header
-profile.BP = profiles[3]
+# Get file content
-#print(profile.BP)
+file = readfile(filename, header)
-profile.ALL = profiles[-3:-1]
+# Extract Protein IDs list
-#print(profile.ALL)
+input = c()
-#c(profile.ALL, profile.CC, profile.MF, profile.BP)
+for (row in as.character(file[,ncol])) {
-if ("CC" %in% ontoopt) {
+input = c(input, strsplit(row, ";")[[1]][1])
-if (grepl("PNG", plotopt)) {
+}
-plotPNG(profile.CC=profile.CC, per=per, title=title)
+}
-}
+id_type = args$id_type
-if (grepl("JPEG", plotopt)) {
+ontoopt = strsplit(args$onto_opt, ",")[[1]]
-plotJPEG(profile.CC = profile.CC, per=per, title=title)
+#print(ontoopt)
-}
+#plotopt = strsplit(args[3], ",")
-if (grepl("PDF", plotopt)) {
+plotopt = args$plot_opt
-plotPDF(profile.CC = profile.CC, per=per, title=title)
+level = args$level
-}
+per = as.logical(args$per)
-}
+title = args$title
-if ("MF" %in% ontoopt) {
+duplicate = args$duplicate
-if (grepl("PNG", plotopt)) {
+text_output = args$text_output
-plotPNG(profile.MF = profile.MF, per=per, title=title)
-}
+profiles = getprofile(input, id_type, level, duplicate)
-if (grepl("JPEG", plotopt)) {
+profile.CC = profiles[1]
-plotJPEG(profile.MF = profile.MF, per=per, title=title)
+#print(profile.CC)
-}
+profile.MF = profiles[2]
-if (grepl("PDF", plotopt)) {
+#print(profile.MF)
-plotPDF(profile.MF = profile.MF, per=per, title=title)
+profile.BP = profiles[3]
-}
+#print(profile.BP)
-}
+profile.ALL = profiles[-3:-1]
-if ("BP" %in% ontoopt) {
+#print(profile.ALL)
-if (grepl("PNG", plotopt)) {
+#c(profile.ALL, profile.CC, profile.MF, profile.BP)
-plotPNG(profile.BP = profile.BP, per=per, title=title)
-}
-if (grepl("JPEG", plotopt)) {
-plotJPEG(profile.BP = profile.BP, per=per, title=title)
-}
-if (grepl("PDF", plotopt)) {
-plotPDF(profile.BP = profile.BP, per=per, title=title)
-}
-}
-#if (grepl("PNG", plotopt)) {
+if ("CC" %in% ontoopt) {
-# plotPNG(profile.ALL = profile.ALL, per=per, title=title)
+write.table(profile.CC, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
-#}
+if (grepl("PNG", plotopt)) {
-#if (grepl("JPEG", plotopt)) {
+plotPNG(profile.CC=profile.CC, per=per, title=title)
-# plotJPEG(profile.ALL = profile.ALL, per=per, title=title)
+}
-#}
+if (grepl("JPEG", plotopt)) {
-#if (grepl("PDF", plotopt)) {
+plotJPEG(profile.CC = profile.CC, per=per, title=title)
-# plotPDF(profile.ALL = profile.ALL, per=per, title=title)
+}
-#}
+if (grepl("PDF", plotopt)) {
-}
+plotPDF(profile.CC = profile.CC, per=per, title=title)
+}
+}
+if ("MF" %in% ontoopt) {
+write.table(profile.MF, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+if (grepl("PNG", plotopt)) {
+plotPNG(profile.MF = profile.MF, per=per, title=title)
+}
+if (grepl("JPEG", plotopt)) {
+plotJPEG(profile.MF = profile.MF, per=per, title=title)
+}
+if (grepl("PDF", plotopt)) {
+plotPDF(profile.MF = profile.MF, per=per, title=title)
+}
+}
+if ("BP" %in% ontoopt) {
+write.table(profile.BP, text_output, append = TRUE, sep="\t", row.names = FALSE, quote=FALSE)
+if (grepl("PNG", plotopt)) {
+plotPNG(profile.BP = profile.BP, per=per, title=title)
+}
+if (grepl("JPEG", plotopt)) {
+plotJPEG(profile.BP = profile.BP, per=per, title=title)
+}
+if (grepl("PDF", plotopt)) {
+plotPDF(profile.BP = profile.BP, per=per, title=title)
+}
+}
 }
 goprofiles()
-#Rscript go.R ../proteinGroups_Maud.txt "1" "CC" "PDF" 2 "TRUE" "Title"

Mercurial > repos > proteore > proteore_goprofiles

comparison goprofiles.R @ 1:1236ee08ccb8 draft