# HG changeset patch # User petr-novak # Date 1651581492 0 # Node ID 6ae4a341d1f3a6dcc8c084a0ebb71eee21e9ac11 # Parent f131886ea1940849243c57410078e35e8629f120 "planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a" diff -r f131886ea194 -r 6ae4a341d1f3 R/ltr_utils.R --- a/R/ltr_utils.R Tue Apr 12 12:55:32 2022 +0000 +++ b/R/ltr_utils.R Tue May 03 12:38:12 2022 +0000 @@ -160,7 +160,6 @@ bl_table <- do.call(rbind, bl_list) unlink(qf) #unlink(outf) - print(outf) unlink(dbf) unlink(script) return(bl_table) @@ -185,7 +184,7 @@ analyze_TE <- function(seqs, ncpus = 10, word_size = 20){ - blt <- blast_all2all(seqs, ncpus = ncpus, word_size = word_size) + blt <- blast_all2all(seqs, ncpus = ncpus, word_size = word_size, perc_identity = 90) te_conflict_info <- identify_conflicts(blt) blt_te_ok <- blast_table_subset(blt, te_conflict_info$ok) te_ok_lineages <- split(blt_te_ok, @@ -284,7 +283,9 @@ return(bl[bl$qaccver %in% id & bl$saccver %in% id,, drop = FALSE]) } -get_representative_ranges <- function(bl, min_length = 60){ +get_representative_ranges <- function(bl, min_length = 200, min_identity = 98){ + bl <- bl[bl$pident>=min_identity, , drop=FALSE] + bl <- bl[bl$pident>=min_identity & bl$length >= min_length, , drop=FALSE] score <- sort(unlist(by(bl$bitscore, bl$qaccver, sum, simplify = FALSE)), decreasing = TRUE) L <- bl$qlen[!duplicated(bl$qaccver)] @@ -320,7 +321,6 @@ L <- nchar(seqs) R <- matrix(ncol = niter, nrow = length(seqs)) for (i in 1:niter){ - print(i) seqs_rnd <- DNAStringSet(sapply(L, function(n) paste(sample(c("A", "C", "T", "G"), n, replace=TRUE), collapse=""))) R[,i] <- seq_diversity(seqs_rnd, km = km)$richness } @@ -623,9 +623,13 @@ return(out) } -getSeqNamed <- function(s, gr) { +getSeqNamed <- function(s, gr, name = NULL) { spart <- getSeq(s, gr) - id1 <- paste0(seqnames(gr), '_', start(gr), "_", end(gr)) + if (is.null(name)){ + id1 <- paste0(seqnames(gr), '_', start(gr), "_", end(gr)) + }else{ + id1 <- mcols(gr)[,name] + } id2 <- gr$Final_Classification names(spart) <- paste0(id1, "#", id2) spart diff -r f131886ea194 -r 6ae4a341d1f3 clean_dante_ltr.xml --- a/clean_dante_ltr.xml Tue Apr 12 12:55:32 2022 +0000 +++ b/clean_dante_ltr.xml Tue May 03 12:38:12 2022 +0000 @@ -1,4 +1,4 @@ - + r-optparse @@ -12,7 +12,16 @@ && mv output_clean.gff3 $dante_ltr_clean && - mv output_RM_lib.fasta $rm_lib + mv output_RM_lib_non_redundant.fasta $rm_lib + && + mv output_RM_lib_full_TE.fasta $te_full + && + mv output_RM_lib_5LTR.fasta $ltr5 + && + mv output_RM_lib_3LTR.fasta $ltr3 + && + mv output_summary.pdf $summary + ]]> @@ -23,6 +32,19 @@ elements based on annotation $dante_ltr.hid and reference $reference.hid"/> + + + + + + + + + + blast r-optparse