diff fasta2database.R @ 10:d0431a839606 draft

Uploaded
author petr-novak
date Wed, 14 Aug 2019 11:24:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta2database.R	Wed Aug 14 11:24:15 2019 -0400
@@ -0,0 +1,14 @@
+library(Biostrings)
+input_fasta = commandArgs(T)[1]
+## for testing input_fasta="/mnt/raid/454_data/RE2_benchmark/REPET_annotation/Prunus_persica/DANTE_proteins_filtered.fasta"
+s = readAAStringSet(input_fasta)
+names_table = do.call("rbind", strsplit(names(s)," "))
+head(names_table)
+classification_table = paste(names_table[,1], gsub("|","\t",names_table[,3], fixed = TRUE), sep="\t")
+cat(unique(classification_table), sep="\n", file = paste(input_fasta, ".classification", sep = ""))
+
+new_fasta_names = paste("NA-", names_table[,2], "__", names_table[,1], sep="")
+
+names(s) = new_fasta_names
+
+writeXStringSet(s, filepath = paste(input_fasta, ".db",sep=''))