Mercurial > repos > petr-novak > dante
view fasta2database.R @ 14:a6c55d1bdb6c draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 28 Aug 2019 08:08:47 -0400 |
parents | d0431a839606 |
children |
line wrap: on
line source
library(Biostrings) input_fasta = commandArgs(T)[1] ## for testing input_fasta="/mnt/raid/454_data/RE2_benchmark/REPET_annotation/Prunus_persica/DANTE_proteins_filtered.fasta" s = readAAStringSet(input_fasta) names_table = do.call("rbind", strsplit(names(s)," ")) head(names_table) classification_table = paste(names_table[,1], gsub("|","\t",names_table[,3], fixed = TRUE), sep="\t") cat(unique(classification_table), sep="\n", file = paste(input_fasta, ".classification", sep = "")) new_fasta_names = paste("NA-", names_table[,2], "__", names_table[,1], sep="") names(s) = new_fasta_names writeXStringSet(s, filepath = paste(input_fasta, ".db",sep=''))