Mercurial > repos > petr-novak > dante_ltr
comparison R/ltr_utils.R @ 3:6ae4a341d1f3 draft
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
author | petr-novak |
---|---|
date | Tue, 03 May 2022 12:38:12 +0000 |
parents | f131886ea194 |
children | c33d6583e548 |
comparison
equal
deleted
inserted
replaced
2:f131886ea194 | 3:6ae4a341d1f3 |
---|---|
158 | 158 |
159 bl_list <- lapply(outf, read.table, stringsAsFactors = FALSE, col.names = unlist(strsplit(cols, " ")), sep="\t", comment.char = "") | 159 bl_list <- lapply(outf, read.table, stringsAsFactors = FALSE, col.names = unlist(strsplit(cols, " ")), sep="\t", comment.char = "") |
160 bl_table <- do.call(rbind, bl_list) | 160 bl_table <- do.call(rbind, bl_list) |
161 unlink(qf) | 161 unlink(qf) |
162 #unlink(outf) | 162 #unlink(outf) |
163 print(outf) | |
164 unlink(dbf) | 163 unlink(dbf) |
165 unlink(script) | 164 unlink(script) |
166 return(bl_table) | 165 return(bl_table) |
167 } | 166 } |
168 | 167 |
183 ) | 182 ) |
184 } | 183 } |
185 | 184 |
186 | 185 |
187 analyze_TE <- function(seqs, ncpus = 10, word_size = 20){ | 186 analyze_TE <- function(seqs, ncpus = 10, word_size = 20){ |
188 blt <- blast_all2all(seqs, ncpus = ncpus, word_size = word_size) | 187 blt <- blast_all2all(seqs, ncpus = ncpus, word_size = word_size, perc_identity = 90) |
189 te_conflict_info <- identify_conflicts(blt) | 188 te_conflict_info <- identify_conflicts(blt) |
190 blt_te_ok <- blast_table_subset(blt, te_conflict_info$ok) | 189 blt_te_ok <- blast_table_subset(blt, te_conflict_info$ok) |
191 te_ok_lineages <- split(blt_te_ok, | 190 te_ok_lineages <- split(blt_te_ok, |
192 gsub( | 191 gsub( |
193 ".+[|]", | 192 ".+[|]", |
282 | 281 |
283 blast_table_subset <- function(bl,id){ | 282 blast_table_subset <- function(bl,id){ |
284 return(bl[bl$qaccver %in% id & bl$saccver %in% id,, drop = FALSE]) | 283 return(bl[bl$qaccver %in% id & bl$saccver %in% id,, drop = FALSE]) |
285 } | 284 } |
286 | 285 |
287 get_representative_ranges <- function(bl, min_length = 60){ | 286 get_representative_ranges <- function(bl, min_length = 200, min_identity = 98){ |
287 bl <- bl[bl$pident>=min_identity, , drop=FALSE] | |
288 bl <- bl[bl$pident>=min_identity & bl$length >= min_length, , drop=FALSE] | |
288 score <- sort(unlist(by(bl$bitscore, bl$qaccver, sum, simplify = FALSE)), | 289 score <- sort(unlist(by(bl$bitscore, bl$qaccver, sum, simplify = FALSE)), |
289 decreasing = TRUE) | 290 decreasing = TRUE) |
290 L <- bl$qlen[!duplicated(bl$qaccver)] | 291 L <- bl$qlen[!duplicated(bl$qaccver)] |
291 names(L) <- bl$qaccver[!duplicated(bl$qaccver)] | 292 names(L) <- bl$qaccver[!duplicated(bl$qaccver)] |
292 gr <- GRanges(seqnames = bl$qaccver, | 293 gr <- GRanges(seqnames = bl$qaccver, |
318 | 319 |
319 expected_diversity <- function(seqs, niter=100, km = 6){ | 320 expected_diversity <- function(seqs, niter=100, km = 6){ |
320 L <- nchar(seqs) | 321 L <- nchar(seqs) |
321 R <- matrix(ncol = niter, nrow = length(seqs)) | 322 R <- matrix(ncol = niter, nrow = length(seqs)) |
322 for (i in 1:niter){ | 323 for (i in 1:niter){ |
323 print(i) | |
324 seqs_rnd <- DNAStringSet(sapply(L, function(n) paste(sample(c("A", "C", "T", "G"), n, replace=TRUE), collapse=""))) | 324 seqs_rnd <- DNAStringSet(sapply(L, function(n) paste(sample(c("A", "C", "T", "G"), n, replace=TRUE), collapse=""))) |
325 R[,i] <- seq_diversity(seqs_rnd, km = km)$richness | 325 R[,i] <- seq_diversity(seqs_rnd, km = km)$richness |
326 } | 326 } |
327 R | 327 R |
328 | 328 |
621 total <- colSums(out) | 621 total <- colSums(out) |
622 out <- rbind(out, Total = total) | 622 out <- rbind(out, Total = total) |
623 return(out) | 623 return(out) |
624 } | 624 } |
625 | 625 |
626 getSeqNamed <- function(s, gr) { | 626 getSeqNamed <- function(s, gr, name = NULL) { |
627 spart <- getSeq(s, gr) | 627 spart <- getSeq(s, gr) |
628 id1 <- paste0(seqnames(gr), '_', start(gr), "_", end(gr)) | 628 if (is.null(name)){ |
629 id1 <- paste0(seqnames(gr), '_', start(gr), "_", end(gr)) | |
630 }else{ | |
631 id1 <- mcols(gr)[,name] | |
632 } | |
629 id2 <- gr$Final_Classification | 633 id2 <- gr$Final_Classification |
630 names(spart) <- paste0(id1, "#", id2) | 634 names(spart) <- paste0(id1, "#", id2) |
631 spart | 635 spart |
632 } | 636 } |
633 | 637 |