diff chromeister/bin/compute_score.R @ 1:3d1fbde7e0cc draft default tip

Deleted selected files
author alvarofaure
date Thu, 13 Dec 2018 03:41:58 -0500
parents 7fdf47a0bae8
children
line wrap: on
line diff
--- a/chromeister/bin/compute_score.R	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,478 +0,0 @@
-
-#!/usr/bin/env Rscript
-args = commandArgs(trailingOnly=TRUE)
-
-if(length(args) < 2){
-  stop("USE: Rscript --vanilla plot.R <matrix> <matsize>")
-}
-
-
-
-
-
-
-growing_regions <- function(mat, reward = 6, penalty = 5, sidePenalty = 3, MAXHSPS = 500, TH = 10, WSIZE = 7){
-
-  #write(mat, file ="data.txt", ncolumns = 200)
-  
-  len <- min(length(mat[1,]), length(mat[,1]))
-  HSPS <- matrix(0, nrow=MAXHSPS, ncol=5)
-  idx <- 1
-  lH <- round(WSIZE/2) - 1
-  rH <- round(WSIZE/2) + 1
-  
-  
-  #print(paste("lH: ", lH, "rH: ", rH))
-  
-  if(WSIZE %% 2 == 0){
-    print("WSIZE MUST BE ODD")
-    stop()
-  }
-  
-  
-  
-  i <- 1
-  #readline(prompt="Press [enter] to continue")
-  while(i < len){
-
-    value <- max(mat[i,]) * reward
-    if(value == 0) i <- i + 1
-    pos <- which.max(mat[i,])
-    # these two hold ending frag
-    endfrag <- pos
-    j <- i
-    count_penalties <- 1
-
-    #print("-----------------")
-
-    while(value > 0 && j < len){
-
-      #print(paste("took values ", j, endfrag, "which have score of ", mat[j, endfrag], "current value is: ", value))
-      
-      
-      
-
-      # Reset position used
-      
-      mat[max(1,j-1), max(1,endfrag-2)] <- 0
-      mat[max(1,j-1), max(1,endfrag-1)] <- 0
-      mat[max(1,j-1), endfrag] <- 0
-      mat[max(1,j-1), min(len, endfrag+1)] <- 0
-      mat[max(1,j-1), min(len, endfrag+2)] <- 0
-      
-      mat[j, max(1,endfrag-2)] <- 0
-      mat[j, max(1,endfrag-1)] <- 0
-      mat[j, endfrag] <- 0
-      mat[j, min(len, endfrag+1)] <- 0
-      mat[j, min(len, endfrag+2)] <- 0
-      
-      #print(paste("Erasing: (",max(1,j-1), max(1,endfrag-2),")(",max(1,j-1), max(1,endfrag-1),")(",max(1,j-1), endfrag,
-      #            ")(",max(1,j-1), min(len, endfrag+1),")(",max(1,j-1), min(len, endfrag+2),")(",j, max(1,endfrag-2),
-      #            ")(",j, max(1,endfrag-1),")(",j, endfrag,")(",j, min(len, endfrag+1),")(",j, min(len, endfrag+2),")"))
-      
-      
-      # Go for next
-      j <- j + 1
-      # Check next, could be reverse or forward
-      mleft <- max(1, endfrag-lH)
-      mright <- min(len, endfrag+lH)
-      window <- mat[j, mleft:mright]
-      
-
-      #print(paste("mleft: ", mleft, "mright", mright, "j is: ", j))
-      #print(window)
-
-      v <- max(window)
-      selected <- which.max(window)
-      # Make it rather go diagonally
-      #print(paste("WIndow len: ", length(window)))
-      chose_diagonal <- FALSE
-      if(length(window) == WSIZE && v == window[lH]){
-        selected <- lH
-        chose_diagonal <- TRUE
-      }
-      
-      if(length(window) == WSIZE && v == window[rH]){
-        selected <- rH
-        chose_diagonal <- TRUE
-      }
-      
-      #print(paste("Selected value be like ", selected))
-      
-
-      if(v != 0){
-        
-        
-        
-        endfrag <- (mleft + selected ) # To make the indexing
-        if(length(window) == WSIZE) endfrag <- endfrag - 1
-        endfrag <- max(1, min(len, endfrag))
-        #print(paste("\t", " endfragnew = ", endfrag, "max of window: ", max(window), "on position", which.max(window)))
-      }
-
-      #print(paste("Chose diagonal is ", chose_diagonal))
-      
-      # If no similarity is found
-      if(v == 0){
-        value <- value - count_penalties * penalty
-        count_penalties <- count_penalties + 1
-        #print("Got penalty #########")
-        
-      }else{
-        
-        # Similarity is found
-        
-        if(!chose_diagonal){
-          value <- value + count_penalties * (-sidePenalty)
-          count_penalties <- count_penalties + 1
-          #print("Got SIDE penalty @@@ #########")
-        }else{
-          count_penalties <- 1
-          value <- value + reward
-          #print("Got reward thou #########")
-        }
-        
-      }
-      #readline(prompt="Press [enter] to continue")
-
-    }
-    # Check len of frag
-    if(j-i > TH){
-      # HSPS[idx, 1] <- i
-      # HSPS[idx, 2] <- pos
-      # HSPS[idx, 3] <- j
-      # HSPS[idx, 4] <- endfrag
-      # HSPS[idx, 5] <- abs(i-j)
-      
-      HSPS[idx, 1] <- pos
-      HSPS[idx, 2] <- i
-      HSPS[idx, 3] <- endfrag
-      HSPS[idx, 4] <- j
-      HSPS[idx, 5] <- abs(i-j)
-      
-      
-      
-      #print(paste("ACCEPT", i, pos, j, endfrag, "x-y", j-i, abs(endfrag-pos), sep = " "))
-      idx <- idx + 1
-    }else{
-      #print("REJECT")
-    }
-    # This will prevent overlappign lines, I think
-    #i <- j
-    
-    if(idx == MAXHSPS) break
-  }
-  
-  
-  
-  
-  return (HSPS)
-}
-
-detect_events <- function(HSPS, sampling){
-  
-  DIAG_SEPARATION <- 10
-  # same as HSPS but adding the event
-  output <- matrix(0, nrow=length(HSPS[,1]), ncol=1+length(HSPS[1,]))
-  colnames(output) <- c("x1", "y1", "x2", "y2", "len", "event")
-  j <- 0
-  for(i in 1:(length(HSPS[,1]))){
-    
-    if(sum(HSPS[i,]) > 0){
-      j <- j + 1
-      is_inverted = FALSE
-      is_diagonal = TRUE
-      
-      if(HSPS[i,1] > HSPS[i,3]) is_inverted = TRUE
-      if(abs(HSPS[i,1] - HSPS[i,2]) > DIAG_SEPARATION && abs(HSPS[i,3] - HSPS[i,4]) > DIAG_SEPARATION) is_diagonal = FALSE
-      
-      output[i,1] <- HSPS[i,1] * sampling
-      output[i,2] <- HSPS[i,2] * sampling
-      output[i,3] <- HSPS[i,3] * sampling
-      output[i,4] <- HSPS[i,4] * sampling
-      output[i,5] <- HSPS[i,5] * sampling
-      
-      if(is_diagonal) output[i,6] <- "synteny block"
-      if(is_diagonal && is_inverted) output[i,6] <- "inversion"
-      if(!is_diagonal && !is_inverted) output[i,6] <- "transposition"
-      if(!is_diagonal && is_inverted) output[i,6] <-"inverted transposition"
-    }
-    
-    
-  }
-  
-  return (output[1:j,])
-}
-
-
-
-paint_frags <- function(HSPS, l, sampling){
-  
-  plot(c(HSPS[1,1]*sampling, HSPS[1,3]*sampling), c(HSPS[1,2]*sampling, HSPS[1,4]*sampling), xlim = c(1,l*sampling), ylim = c(1,l*sampling), type="l", xlab="X-genome",ylab="Y-genome")
-  for(i in 2:length(HSPS[,1])){
-    if(sum(HSPS[i,]) > 0){
-      lines(c(HSPS[i,1]*sampling, HSPS[i,3]*sampling), c(HSPS[i,2]*sampling, HSPS[i,4]*sampling))
-    }
-  }
-}
-
-supersample <- function(mat, upscale){
-  l <- min(length(mat[1,]), length(mat[,1]))
-  size <- round(l*upscale)
-  m <- matrix(0, nrow=size, ncol=size)
-  
-  for(i in 1:size){
-    for(j in 1:size){
-      mleft <- max(1, i-1)
-      mright <- min(l, i+1)
-      mup <- max(1, j-1)
-      mdown <- min(l, j+1)
-      
-      ri <- max(1, i/upscale)
-      rj <- max(1, j/upscale)
-      
-      if(mat[ri, rj] > 0){
-        m[i, j] <- 1
-      }
-    }
-  }
-  return (m)
-}
-
-
-downsample <- function(mat, downscale){
-  l <- min(length(mat[1,]), length(mat[,1]))
-  size <- round(l/downscale)
-  m <- matrix(0, nrow=size, ncol=size)
-  
-  for(i in 1:l){
-    for(j in 1:l){
-      mup <- max(1, i-1)
-      mdown <- min(l, i+1)
-      mleft <- max(1, j-1)
-      mright <- min(l, j+1)
-      
-      #print(paste("Matrix at ", i, j))
-      #print(mat[mup:mdown, mleft:mright])
-      
-      if(sum(mat[mup:mdown, mleft:mright]) > 0){
-        #print(paste("goes to ", round(i/downscale), round(j/downscale)))
-        m[round(i/downscale), round(j/downscale)] <- 1
-      }
-    }
-  }
-  
-  return (m)
-}
-
-
-
-
-
-
-
-path_mat = args[1]
-
-
-fancy_name <- strsplit(path_mat, "/")
-fancy_name <- (fancy_name[[1]])[length(fancy_name[[1]])]
-
-# Read sequence lenghts
-con <- file(path_mat,"r")
-seq_lengths <- readLines(con, n=2)
-seq_x_len <- as.numeric(seq_lengths[1])
-seq_y_len <- as.numeric(seq_lengths[2])
-close(con)
-
-
-data <- as.matrix(read.csv(path_mat, sep = " ", skip=2))
-
-# Get sequence names
-name_x <- strsplit(fancy_name, "-")[[1]][1]
-name_y <- strsplit(fancy_name, "-")[[1]][2]
-
-# Max of columns
-
-len_i <- as.numeric(args[2])
-len_j <- as.numeric(args[2])
-
-
-score_density <- data
-aux_density <- data
-
-
-pmax_pos <- which.max(aux_density[,1])
-for(i in 1:len_i){
-  
-  cmax_pos <- which.max(aux_density[i,]) # get max of row
-  
-  if((aux_density[i,cmax_pos]) > 0){ # if it has value
-    #row_from_col <- which.max(aux_density[,cmax_pos]) # get max of the column pointed by maximum of row
-    score_density[i,] <- 0 # put all others in row to 0 i.e. only use this max, EXCEPT for the maximum in the column
-    #score_density[row_from_col, cmax_pos] <- 1
-    score_density[i,cmax_pos] <- 1
-    pmax_pos <- cmax_pos
-  }
-}
-
-
-pmax_pos <- which.max(aux_density[1,])
-
-
-for(i in 1:len_i){
-
-  cmax_pos <- which.max(aux_density[,i]) # get max of column
-
-  if((aux_density[cmax_pos,i]) > 0){
-    score_density[,i] <- 0
-    score_density[cmax_pos,i] <- 1
-    pmax_pos <- cmax_pos
-  }
-}
-
-
-
-
-
-
-
-
-
-score_copy <- score_density
-diag_len <- 4
-
-for(i in 6:(len_i-5)){
-  for(j in 6:(len_j-5)){
-    
-    value <- 0
-    for(w in (-diag_len/2):(diag_len/2)){
-      if(score_density[i+w,j+w] > 0){
-        value <- value + 1
-      }
-    }
-    
-    if(value >= diag_len){
-      for(k in 1:5){
-        score_copy[i+k,j+k] <- 1
-      }
-      for(k in 1:5){
-        score_copy[i-k,j-k] <- 1
-      }
-    }else if(score_copy[i,j]==0){
-      score_copy[i,j] <- 0
-    }
-  }
-}
-
-for(i in 6:(len_i-5)){
-  for(j in 6:(len_j-5)){
-    
-    value <- 0
-    for(w in (-diag_len/2):(diag_len/2)){
-      if(score_density[i-w,j+w] > 0){
-        value <- value + 1
-      }
-    }
-    
-    if(value >= diag_len){
-      for(k in 1:5){
-        score_copy[i-k,j+k] <- 1
-      }
-      for(k in 1:5){
-        score_copy[i+k,j-k] <- 1
-      }
-    }else if(score_copy[i,j]==0){
-      score_copy[i,j] <- 0
-    }
-  }
-}
-
-
-# Kernel to remove single points
-
-for(i in 1:(length(score_copy[,1]))){
-  for(j in 1:(length(score_copy[1,]))){
-    
-    value <- 0
-    
-    min_i <- max(1, i-1)
-    max_i <- min(len_i, i+1)
-    min_j <- max(1, j-1)
-    max_j <- min(len_j, j+1)
-    
-    value <- sum(score_copy[min_i:max_i, min_j:max_j])
-    
-    if(value < 2) score_copy[i,j] <- 0
-  }
-}
-
-# To compute the score
-score <- 0
-pmax_pos <- which.max(score_copy[,1])
-dist_th <- 1.5
-besti <- 1
-bestj <- pmax_pos
-dvec1 <- abs(which.max(score_copy[,2]) - which.max(score_copy[,1]))
-dvec2 <- abs(which.max(score_copy[,3]) - which.max(score_copy[,2]))
-dvec3 <- abs(which.max(score_copy[,4]) - which.max(score_copy[,3]))
-for(i in 5:len_i){
-  
-  #print(paste(paste(paste(dvec1, dvec2), dvec3), mean(c(dvec1, dvec2, dvec3))))
-  distance <- mean(c(dvec1, dvec2, dvec3))
-
-  
-  
-  dvec1 <- dvec2
-  dvec2 <- dvec3
-  dvec3 <- abs(which.max(score_copy[,i]) - which.max(score_copy[,i-1]))
-  
-  # If there is a 0 or we are too far away just add max distance!
-  if(distance > dist_th || distance == 0){
-    score <- score + len_i
-  }
-  
-}
-
-
-score <- (score/(len_i^2))
-print(score)
-
-
-sampling_value <- 5
-submat <- downsample(score_copy, sampling_value)
-m <- growing_regions((submat), WSIZE = 7, TH = 5, penalty = 15)
-events <- detect_events(m, sampling_value)
-events <- rbind(events, c(0,0,0,0,0,"Null event"))
-write(as.character(c(seq_x_len, seq_y_len)), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=2)
-write(as.character(c("x1", "y1", "x2", "y2", "len", "event")), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=6)
-for(i in 1:length(events[,1])){
-  write(as.character(events[i,]), file=paste(path_mat,".events.txt", sep=""), append = TRUE, sep =",", ncolumns=6)  
-}
-
-
-
-coords1 <- round(seq(from=0, to=1, by=0.2)*seq_x_len)
-coords2 <- round(seq(from=0, to=1, by=0.2)*seq_y_len)
-
-final_image <- apply((t(score_copy)), 2, rev)
-
-png(paste(path_mat, ".filt.png", sep=""), width = length(data[,1]), height = length(data[,1]))
-image(t(final_image), col = grey(seq(1, 0, length = 256)), xaxt='n', yaxt='n', main = paste(fancy_name, paste("filt. score=", score)), xlab = name_x, ylab = name_y, axes = FALSE)
-axis(1, tick = TRUE, labels = (coords1), at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1))
-axis(2, tick = TRUE, labels = rev(coords2), at = c(0.0, 0.2, 0.4, 0.6, 0.8, 1))
-dev.off()
-
-
-# Output pixel coordinates of highly conserved signals
-
-# To clear it in case it existed
-write(c(paste("X", "Y")), file = paste("hits-XY-", paste(fancy_name, ".hits", sep=""), sep=""), append = FALSE, sep = "\n")
-
-
-for(i in 1:(length(score_copy[,1]))){
-  for(j in 1:(length(score_copy[1,]))){
-    if(score_copy[i,j] != 0){
-      write(c(paste(i, j)), file = paste("hits-XY-", paste(fancy_name, ".hits", sep=""), sep=""), append = TRUE, sep = "\n")
-    }
-  }
-}