view tools/mentalist_distance/mentalist_distance @ 5:2c742bf44180 draft

planemo upload for repository https://github.com/WGS-TB/MentaLiST/tree/master/galaxy commit e25241b08d12362f7cbd988ef1b67b8b01e0031e-dirty
author dfornika
date Thu, 24 May 2018 18:07:56 -0400
parents af77acce864b
children
line wrap: on
line source

#!/usr/bin/env Rscript

args = commandArgs(trailingOnly=TRUE)

#' extract the MLST data
#' @param call_file -> MLST call file
#' @param exclude (optional) -> columns to exclude in the MLST call files.
#' @return a MLST call matrix, with samples on the rows and loci on the columns.
mlst_calls = function(call_file, exclude=c()){
  mlst_calls = read.csv(call_file, row.names=1, sep="\t")
  # exclude non allele calls:
  exclude <- c(exclude, c("ST", "clonal_complex"))
  if(any(exclude %in% names(mlst_calls))) {
    return(mlst_calls[ , -which(names(mlst_calls) %in% exclude)])
  } else {
    return(mlst_calls)
  }
}

#' ham_distance  compute the hamming distance matrix (row by row)
#' @param X matrix
#' @return dist_matrix the distance matrix of the X's rows with the haming distance
ham_distance = function(X){
  X[is.na(X)] <- -1
  X <- as.matrix(X)
  characs = unique(as.vector(X))
  U = X == characs[1]
  Cumu = U %*% t(U)
  for (chara in characs[-1]){
    U = X == chara
    Cumu = Cumu + U %*% t(U)
  }
  dist = ncol(X) - Cumu
  return(dist)
}

usage <- function() {
  cat("usage: mentalist_distance <input.tsv>\n")
}


main <- function(args) {
  if(is.na(args[1])){
    usage()
    return()
  }
  mlst <- mlst_calls(args[1])
  d_matrix <- ham_distance(mlst)
  cat('', colnames(d_matrix), sep='\t')
  cat('\n')
  write.table(d_matrix, quote=FALSE, sep='\t', col.names=FALSE)
}

if(!interactive()) {
    main(args)
}