diff extract_putative_ltr.R @ 2:f131886ea194 draft

"planemo upload commit 891bfe9acf7349c2b887aff6d7e52a7f4ebf3b3a"
author petr-novak
date Tue, 12 Apr 2022 12:55:32 +0000
parents 7b0bbe7477c4
children c33d6583e548
line wrap: on
line diff
--- a/extract_putative_ltr.R	Wed Mar 09 09:31:31 2022 +0000
+++ b/extract_putative_ltr.R	Tue Apr 12 12:55:32 2022 +0000
@@ -72,6 +72,11 @@
   s <- readDNAStringSet("/mnt/ceph/454_data/Vicia_faba_assembly/assembly/ver_210910
   /fasta_parts/211010_Vfaba_chr5.fasta")
 
+  g <- rtracklayer::import("/mnt/raid/users/petr/workspace/dante_ltr/test_data/big_test_data//Cocoa_theobroma_DANTE_filtered.gff3")
+  s <- readDNAStringSet("/mnt/raid/users/petr/workspace/dante_ltr/test_data/big_test_data/Cocoa_theobroma_chr1.fasta.gz")
+
+  source("R/ltr_utils.R")
+
   g <- rtracklayer::import("./test_data/sample_DANTE.gff3")
   s <- readDNAStringSet("./test_data/sample_genome.fasta")
   outfile <- "/mnt/raid/users/petr/workspace/ltr_finder_test/te_with_domains_2.gff3"
@@ -159,6 +164,7 @@
                                                 grR[x]),
                mc.set.seed = TRUE, mc.cores = opt$cpu, mc.preschedule = FALSE
 )
+
 cat('done.\n')
 
 good_TE <- TE[!sapply(TE, is.null)]
@@ -177,14 +183,13 @@
 src <- as.character(gff3_out$source)
 src[is.na(src)] <- "dante_ltr"
 gff3_out$source <- src
-
-# TODO export all files to single directory
-# TODO export individual groups DL, DLT, DLP DLPT gff3
+gff3_out$Rank <- get_te_rank(gff3_out)
+# TODO add attributte specifying individual groups DL, DLT, DLP DLPT gff3
 
 export(gff3_out, con = paste0(outfile, ".gff3"), format = 'gff3')
-# summary statistics
+
 all_tbl <- get_te_statistics(gff3_out, RT)
-write.table(all_tbl, file = paste0(outfile, "_statistics.csv"), sep = "\t", quote = FALSE, row.names = FALSE)
+write.table(all_tbl, file = paste0(outfile, "_statistics.csv"), sep = "\t", quote = FALSE, row.names = TRUE)
 # export fasta files:
 s_te <- get_te_sequences(gff3_out, s)