Mercurial > repos > dfornika > mentalist
view tools/mentalist_distance/mentalist_distance @ 5:2c742bf44180 draft
planemo upload for repository https://github.com/WGS-TB/MentaLiST/tree/master/galaxy commit e25241b08d12362f7cbd988ef1b67b8b01e0031e-dirty
author | dfornika |
---|---|
date | Thu, 24 May 2018 18:07:56 -0400 |
parents | af77acce864b |
children |
line wrap: on
line source
#!/usr/bin/env Rscript args = commandArgs(trailingOnly=TRUE) #' extract the MLST data #' @param call_file -> MLST call file #' @param exclude (optional) -> columns to exclude in the MLST call files. #' @return a MLST call matrix, with samples on the rows and loci on the columns. mlst_calls = function(call_file, exclude=c()){ mlst_calls = read.csv(call_file, row.names=1, sep="\t") # exclude non allele calls: exclude <- c(exclude, c("ST", "clonal_complex")) if(any(exclude %in% names(mlst_calls))) { return(mlst_calls[ , -which(names(mlst_calls) %in% exclude)]) } else { return(mlst_calls) } } #' ham_distance compute the hamming distance matrix (row by row) #' @param X matrix #' @return dist_matrix the distance matrix of the X's rows with the haming distance ham_distance = function(X){ X[is.na(X)] <- -1 X <- as.matrix(X) characs = unique(as.vector(X)) U = X == characs[1] Cumu = U %*% t(U) for (chara in characs[-1]){ U = X == chara Cumu = Cumu + U %*% t(U) } dist = ncol(X) - Cumu return(dist) } usage <- function() { cat("usage: mentalist_distance <input.tsv>\n") } main <- function(args) { if(is.na(args[1])){ usage() return() } mlst <- mlst_calls(args[1]) d_matrix <- ham_distance(mlst) cat('', colnames(d_matrix), sep='\t') cat('\n') write.table(d_matrix, quote=FALSE, sep='\t', col.names=FALSE) } if(!interactive()) { main(args) }