comparison seek_otu.R @ 0:e889010415a1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
author iuc
date Mon, 04 Mar 2024 19:55:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e889010415a1
1 #!/usr/bin/env Rscript
2
3 ## Redirect R error handling to stderr.
4 options(show.error.messages = FALSE, error = function() {
5 cat(geterrmessage(), file = stderr())
6 q("no", 1, FALSE)
7 })
8
9 ## Avoid crashing Galaxy with a UTF8 error on German LC settings
10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
11
12 args <- commandArgs(trailingOnly = TRUE)
13 if (length(args) == 0) {
14 stop("Arguments missing for Rscrpit", call. = FALSE)
15 } else {
16 # percentage of identity
17 id_threshold <- as.numeric(args[3])
18 # get input data (matrix)
19 data <- read.csv(args[1], header = FALSE, sep = ",", row.names = 1)
20 # remove last 2 columns
21 data_length <- length(data)
22 # create matrix
23 mat <- as.matrix(data[, 1:data_length], fill = TRUE)
24 # create coordinate matrix
25 d <- as.dist(1 - mat)
26 # create tree
27 hc <- hclust(d, method = "single")
28 # assign otu based on identity value
29 otu <- cutree(hc, h = -id_threshold)
30 # group contigs by otu
31 # Print results to output file
32 output <- args[2]
33 # unique is used to know the number of different otu
34 for (i in unique(otu)) {
35 # retrieve contigs belonging to the same otu
36 clust <- which(otu == i)
37 # write otu number and number of contigs in this otu
38 cat(
39 paste("OTU_", i, ",", length(clust), ",", sep = ""),
40 file = output, append = TRUE
41 )
42 for (n in names(clust)) {
43 # write contigs name
44 cat(paste(gsub(" ", "", n), ",", sep = ""), file = output, append = TRUE)
45 }
46 cat("\n", sep = "", file = output, append = TRUE)
47 }
48 }