Mercurial > repos > petr-novak > dante
comparison summarize_gff.R @ 10:d0431a839606 draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 14 Aug 2019 11:24:15 -0400 |
parents | |
children | ddc6bab20889 |
comparison
equal
deleted
inserted
replaced
9:ed4d9ede9cb4 | 10:d0431a839606 |
---|---|
1 ## summarize hits | |
2 output = commandArgs(T)[2] ## output table | |
3 filepath = commandArgs(T)[1] ## input dante gff3 | |
4 summarized_by = commandArgs(T)[-(1:2)] | |
5 | |
6 readGFF3fromDante = function(filepath){ | |
7 dfraw=read.table(filepath, as.is = TRUE) | |
8 gff_df = dfraw[,1:8] | |
9 colnames(gff_df) = c("seqid", "source", "type", "start", "end", "score", | |
10 "strand", "phase") | |
11 ## assume same order, same attributes names | |
12 gffattr = do.call(rbind, | |
13 lapply( | |
14 strsplit(dfraw[,9],split=c("=|;")), | |
15 function(x)x[c(FALSE,TRUE)] | |
16 ) | |
17 ) | |
18 gff_df$Name = gffattr[,1] | |
19 gff_df$Final_Classification = gffattr[,2] | |
20 gff_df$Region_Hits_Classifications = gffattr[,3] | |
21 gff_df$Best_Hit = gffattr[,4] | |
22 gff_df$Best_Hit_DB_Pos = gffattr[,5] | |
23 gff_df$DB_Seq = gffattr[,6] | |
24 gff_df$Query_Seq = gffattr[,7] | |
25 gff_df$Identity = as.numeric(gffattr[,8]) | |
26 gff_df$Similarity = as.numeric(gffattr[,9]) | |
27 gff_df$Relat_Length = as.numeric(gffattr[,10]) | |
28 gff_df$Relat_Interruptions = as.numeric(gffattr[,11]) | |
29 gff_df$Hit_to_DB_Length = as.numeric(gffattr[,12]) | |
30 return(gff_df) | |
31 } | |
32 | |
33 gff = readGFF3fromDante(filepath) | |
34 # summarize_by = c("Final_Classification", "Name", "seqid") | |
35 | |
36 tbl = data.frame(table(gff[,summarize_by])) | |
37 write.table(tbl, file = filepath, row.names =FALSE, quote = FALSE) |