Mercurial > repos > iuc > virannot_blast2tsv
diff seek_otu.R @ 0:e889010415a1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
author | iuc |
---|---|
date | Mon, 04 Mar 2024 19:55:52 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seek_otu.R Mon Mar 04 19:55:52 2024 +0000 @@ -0,0 +1,48 @@ +#!/usr/bin/env Rscript + +## Redirect R error handling to stderr. +options(show.error.messages = FALSE, error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) +}) + +## Avoid crashing Galaxy with a UTF8 error on German LC settings +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +args <- commandArgs(trailingOnly = TRUE) +if (length(args) == 0) { + stop("Arguments missing for Rscrpit", call. = FALSE) +} else { + # percentage of identity + id_threshold <- as.numeric(args[3]) + # get input data (matrix) + data <- read.csv(args[1], header = FALSE, sep = ",", row.names = 1) + # remove last 2 columns + data_length <- length(data) + # create matrix + mat <- as.matrix(data[, 1:data_length], fill = TRUE) + # create coordinate matrix + d <- as.dist(1 - mat) + # create tree + hc <- hclust(d, method = "single") + # assign otu based on identity value + otu <- cutree(hc, h = -id_threshold) + # group contigs by otu + # Print results to output file + output <- args[2] + # unique is used to know the number of different otu + for (i in unique(otu)) { + # retrieve contigs belonging to the same otu + clust <- which(otu == i) + # write otu number and number of contigs in this otu + cat( + paste("OTU_", i, ",", length(clust), ",", sep = ""), + file = output, append = TRUE + ) + for (n in names(clust)) { + # write contigs name + cat(paste(gsub(" ", "", n), ",", sep = ""), file = output, append = TRUE) + } + cat("\n", sep = "", file = output, append = TRUE) + } +}