view fasta2database.R @ 12:ddc6bab20889 draft

Uploaded
author petr-novak
date Thu, 15 Aug 2019 07:20:12 -0400
parents d0431a839606
children
line wrap: on
line source

library(Biostrings)
input_fasta = commandArgs(T)[1]
## for testing input_fasta="/mnt/raid/454_data/RE2_benchmark/REPET_annotation/Prunus_persica/DANTE_proteins_filtered.fasta"
s = readAAStringSet(input_fasta)
names_table = do.call("rbind", strsplit(names(s)," "))
head(names_table)
classification_table = paste(names_table[,1], gsub("|","\t",names_table[,3], fixed = TRUE), sep="\t")
cat(unique(classification_table), sep="\n", file = paste(input_fasta, ".classification", sep = ""))

new_fasta_names = paste("NA-", names_table[,2], "__", names_table[,1], sep="")

names(s) = new_fasta_names

writeXStringSet(s, filepath = paste(input_fasta, ".db",sep=''))