Mercurial > repos > petr-novak > dante_ltr
diff R/ltr_utils.R @ 3:6ae4a341d1f3 draft
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
author | petr-novak |
---|---|
date | Tue, 03 May 2022 12:38:12 +0000 |
parents | f131886ea194 |
children | c33d6583e548 |
line wrap: on
line diff
--- a/R/ltr_utils.R Tue Apr 12 12:55:32 2022 +0000 +++ b/R/ltr_utils.R Tue May 03 12:38:12 2022 +0000 @@ -160,7 +160,6 @@ bl_table <- do.call(rbind, bl_list) unlink(qf) #unlink(outf) - print(outf) unlink(dbf) unlink(script) return(bl_table) @@ -185,7 +184,7 @@ analyze_TE <- function(seqs, ncpus = 10, word_size = 20){ - blt <- blast_all2all(seqs, ncpus = ncpus, word_size = word_size) + blt <- blast_all2all(seqs, ncpus = ncpus, word_size = word_size, perc_identity = 90) te_conflict_info <- identify_conflicts(blt) blt_te_ok <- blast_table_subset(blt, te_conflict_info$ok) te_ok_lineages <- split(blt_te_ok, @@ -284,7 +283,9 @@ return(bl[bl$qaccver %in% id & bl$saccver %in% id,, drop = FALSE]) } -get_representative_ranges <- function(bl, min_length = 60){ +get_representative_ranges <- function(bl, min_length = 200, min_identity = 98){ + bl <- bl[bl$pident>=min_identity, , drop=FALSE] + bl <- bl[bl$pident>=min_identity & bl$length >= min_length, , drop=FALSE] score <- sort(unlist(by(bl$bitscore, bl$qaccver, sum, simplify = FALSE)), decreasing = TRUE) L <- bl$qlen[!duplicated(bl$qaccver)] @@ -320,7 +321,6 @@ L <- nchar(seqs) R <- matrix(ncol = niter, nrow = length(seqs)) for (i in 1:niter){ - print(i) seqs_rnd <- DNAStringSet(sapply(L, function(n) paste(sample(c("A", "C", "T", "G"), n, replace=TRUE), collapse=""))) R[,i] <- seq_diversity(seqs_rnd, km = km)$richness } @@ -623,9 +623,13 @@ return(out) } -getSeqNamed <- function(s, gr) { +getSeqNamed <- function(s, gr, name = NULL) { spart <- getSeq(s, gr) - id1 <- paste0(seqnames(gr), '_', start(gr), "_", end(gr)) + if (is.null(name)){ + id1 <- paste0(seqnames(gr), '_', start(gr), "_", end(gr)) + }else{ + id1 <- mcols(gr)[,name] + } id2 <- gr$Final_Classification names(spart) <- paste0(id1, "#", id2) spart