annotate fasta2database.R @ 22:1eabd42e00ef draft

Uploaded
author petr-novak
date Fri, 03 Apr 2020 07:27:59 -0400
parents d0431a839606
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
1 library(Biostrings)
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
2 input_fasta = commandArgs(T)[1]
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
3 ## for testing input_fasta="/mnt/raid/454_data/RE2_benchmark/REPET_annotation/Prunus_persica/DANTE_proteins_filtered.fasta"
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
4 s = readAAStringSet(input_fasta)
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
5 names_table = do.call("rbind", strsplit(names(s)," "))
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
6 head(names_table)
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
7 classification_table = paste(names_table[,1], gsub("|","\t",names_table[,3], fixed = TRUE), sep="\t")
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
8 cat(unique(classification_table), sep="\n", file = paste(input_fasta, ".classification", sep = ""))
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
9
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
10 new_fasta_names = paste("NA-", names_table[,2], "__", names_table[,1], sep="")
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
11
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
12 names(s) = new_fasta_names
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
13
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
14 writeXStringSet(s, filepath = paste(input_fasta, ".db",sep=''))