# HG changeset patch # User petr-novak # Date 1565796282 14400 # Node ID 1d5883a9ec3a562053a37fc4ac887ff5215de6fc # Parent d0431a8396067d247aea4ae45cdf95c6d4a52bfc Deleted selected files diff -r d0431a839606 -r 1d5883a9ec3a fasta2database.R --- a/fasta2database.R Wed Aug 14 11:24:15 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -library(Biostrings) -input_fasta = commandArgs(T)[1] -## for testing input_fasta="/mnt/raid/454_data/RE2_benchmark/REPET_annotation/Prunus_persica/DANTE_proteins_filtered.fasta" -s = readAAStringSet(input_fasta) -names_table = do.call("rbind", strsplit(names(s)," ")) -head(names_table) -classification_table = paste(names_table[,1], gsub("|","\t",names_table[,3], fixed = TRUE), sep="\t") -cat(unique(classification_table), sep="\n", file = paste(input_fasta, ".classification", sep = "")) - -new_fasta_names = paste("NA-", names_table[,2], "__", names_table[,1], sep="") - -names(s) = new_fasta_names - -writeXStringSet(s, filepath = paste(input_fasta, ".db",sep=''))