view seek_otu.R @ 2:77c3ef9b0ed7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit ab5e1189217b6ed5f1c5d7c5ff6b79b6a4c18cff
author iuc
date Wed, 21 Aug 2024 13:13:39 +0000
parents e889010415a1
children
line wrap: on
line source

#!/usr/bin/env Rscript

## Redirect R error handling to stderr.
options(show.error.messages = FALSE, error = function() {
    cat(geterrmessage(), file = stderr())
    q("no", 1, FALSE)
})

## Avoid crashing Galaxy with a UTF8 error on German LC settings
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0) {
    stop("Arguments missing for Rscrpit", call. = FALSE)
} else {
    # percentage of identity
    id_threshold <- as.numeric(args[3])
    # get input data (matrix)
    data <- read.csv(args[1], header = FALSE, sep = ",", row.names = 1)
    # remove last 2 columns
    data_length <- length(data)
    # create matrix
    mat <- as.matrix(data[, 1:data_length], fill = TRUE)
    # create coordinate matrix
    d <- as.dist(1 - mat)
    # create tree
    hc <- hclust(d, method = "single")
    # assign otu based on identity value
    otu <- cutree(hc, h = -id_threshold)
    # group contigs by otu
    # Print results to output file
    output <- args[2]
    # unique is used to know the number of different otu
    for (i in unique(otu)) {
        # retrieve contigs belonging to the same otu
        clust <- which(otu == i)
        # write otu number and number of contigs in this otu
        cat(
            paste("OTU_", i, ",", length(clust), ",", sep = ""),
            file = output, append = TRUE
        )
        for (n in names(clust)) {
            # write contigs name
            cat(paste(gsub(" ", "", n), ",", sep = ""), file = output, append = TRUE)
        }
        cat("\n", sep = "", file = output, append = TRUE)
    }
}