Mercurial > repos > proteore > proteore_prot_features
comparison protein_features.R @ 6:fc0118aa432a draft
planemo upload commit 5221e042cb207f593b144ed857106235b8f5fbde-dirty
| author | proteore |
|---|---|
| date | Tue, 20 Mar 2018 11:13:49 -0400 |
| parents | 867d47ff782c |
| children | af7089d1c7c0 |
comparison
equal
deleted
inserted
replaced
| 5:867d47ff782c | 6:fc0118aa432a |
|---|---|
| 1 # Read file and return file content as data.frame | 1 # Read file and return file content as data.frame |
| 2 readfile = function(filename, header) { | 2 readfile = function(filename, header) { |
| 3 if (header == "true") { | 3 if (header == "true") { |
| 4 # Read only first line of the file as header: | 4 # Read only first line of the file as header: |
| 5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | 5 headers <- read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") |
| 6 #Read the data of the files (skipping the first row) | 6 #Read the data of the files (skipping the first row) |
| 7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | 7 file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") |
| 8 # Remove empty rows | 8 # Remove empty rows |
| 9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 9 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
| 10 #And assign the header to the data | 10 #And assign the header to the data |
| 11 names(file) <- headers | 11 names(file) <- headers |
| 12 } | 12 } |
| 13 else { | 13 else { |
| 14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE) | 14 file <- read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") |
| 15 # Remove empty rows | 15 # Remove empty rows |
| 16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] | 16 file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] |
| 17 } | 17 } |
| 18 return(file) | 18 return(file) |
| 19 } | 19 } |
| 47 args <- as.list(as.character(argsDF$V2)) | 47 args <- as.list(as.character(argsDF$V2)) |
| 48 names(args) <- argsDF$V1 | 48 names(args) <- argsDF$V1 |
| 49 | 49 |
| 50 inputtype = args$inputtype | 50 inputtype = args$inputtype |
| 51 if (inputtype == "copypaste") { | 51 if (inputtype == "copypaste") { |
| 52 input = strsplit(args$input, " ")[[1]] | 52 input = strsplit(args$input, "[ \t\n]+")[[1]] |
| 53 } | 53 } |
| 54 else if (inputtype == "tabfile") { | 54 else if (inputtype == "tabfile") { |
| 55 filename = args$input | 55 filename = args$input |
| 56 ncol = args$column | 56 ncol = args$column |
| 57 # Check ncol | 57 # Check ncol |
| 88 input = gsub("^","NX_",input) | 88 input = gsub("^","NX_",input) |
| 89 } | 89 } |
| 90 | 90 |
| 91 # Select user input protein ids in nextprot | 91 # Select user input protein ids in nextprot |
| 92 if ((length(input[input %in% nextprot[,1]]))==0){ | 92 if ((length(input[input %in% nextprot[,1]]))==0){ |
| 93 write.table("None of the input ids are can be found in Nextprot",file=filename,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) | 93 write.table("None of the input ids are can be found in Nextprot",file=output,sep="\t",quote=FALSE,col.names=TRUE,row.names=FALSE) |
| 94 } else { | 94 } else { |
| 95 names = c() | 95 names = c() |
| 96 res = matrix(nrow=length(input), ncol=0) | 96 res = matrix(nrow=length(input), ncol=0) |
| 97 | 97 |
| 98 # Get information from neXtProt | 98 # Get information from neXtProt |
| 115 names = c(names, arg) | 115 names = c(names, arg) |
| 116 info = nextprot[match(input, nextprot["NextprotID"][,]),][arg][,] | 116 info = nextprot[match(input, nextprot["NextprotID"][,]),][arg][,] |
| 117 res = cbind(res, info) | 117 res = cbind(res, info) |
| 118 } | 118 } |
| 119 } | 119 } |
| 120 --inputtype="tabfile" --input="Galaxy50-[ID_Converter_on_data_47].tabular" --header='true' --natlas="proteinatlas.csv" --column='c7' --select='Gene,Gene.description,Evidence,RNA.tissue.category,Reliability.IH,TPM.max.in.non.specific' --output="test.txt" | |
| 120 | 121 |
| 121 # Write output | 122 # Write output |
| 122 if (inputtype == "copypaste") { | 123 if (inputtype == "copypaste") { |
| 123 res = cbind(as.matrix(input), res) | 124 res = cbind(as.matrix(input), res) |
| 124 names = c(typeid, names) | 125 names = c(typeid, names) |
