Mercurial > repos > petr-novak > dante
diff fasta2database.R @ 10:d0431a839606 draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 14 Aug 2019 11:24:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta2database.R Wed Aug 14 11:24:15 2019 -0400 @@ -0,0 +1,14 @@ +library(Biostrings) +input_fasta = commandArgs(T)[1] +## for testing input_fasta="/mnt/raid/454_data/RE2_benchmark/REPET_annotation/Prunus_persica/DANTE_proteins_filtered.fasta" +s = readAAStringSet(input_fasta) +names_table = do.call("rbind", strsplit(names(s)," ")) +head(names_table) +classification_table = paste(names_table[,1], gsub("|","\t",names_table[,3], fixed = TRUE), sep="\t") +cat(unique(classification_table), sep="\n", file = paste(input_fasta, ".classification", sep = "")) + +new_fasta_names = paste("NA-", names_table[,2], "__", names_table[,1], sep="") + +names(s) = new_fasta_names + +writeXStringSet(s, filepath = paste(input_fasta, ".db",sep=''))