annotate test-data/test.r @ 0:0a0bba8e1823 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
author rnateam
date Wed, 22 Feb 2017 07:32:08 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
1 library(NASTIseq)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
2
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
3 ## generation of test set
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
4 # data(WholeRoot)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
5 # WholeRoot$genepos$feature <- 'gene'
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
6 #
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
7 # set_attri <- function(attri){
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
8 # attri = paste('gene_id ', '"', attri, '"', ';', sep = '')
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
9 # return(attri)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
10 # }
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
11 #
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
12 # WholeRoot$genepos$attributes = as.character(lapply(as.character(WholeRoot$genepos$attributes), set_attri))
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
13 #
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
14 # write.table(WholeRoot$genepos, file = "input_TAIR10_annotation.gtf", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
15 # write.table(WholeRoot$smat, file = "input_read_count_smt.tsv", col.names = FALSE, sep = "\t", quote = FALSE)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
16 # write.table(WholeRoot$asmat, file = "input_read_count_asmt.tsv", col.names = FALSE, sep = "\t", quote = FALSE)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
17 #
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
18 # write.table(WholeRoot$pospairs, file = "input_positive_pair.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
19
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
20 genepos = read.delim("input_TAIR10_annotation.gtf", header=FALSE, comment.char="#")
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
21 colnames(genepos) = c("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attributes")
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
22 genepos = subset(genepos, feature=="gene")
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
23
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
24 get_id = function(attri){
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
25 gene_info = strsplit(attri, ";")[[1]][1]
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
26 gene_id = strsplit(gene_info, " ")[[1]][2]
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
27 gene_id = gsub("\"", "", gene_id)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
28 return(gene_id)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
29 }
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
30
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
31 genepos$attributes = as.character(lapply(as.character(genepos$attributes), get_id))
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
32
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
33 pospairs = read.table("input_positive_pair.tsv", sep = "\t", as.is = TRUE)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
34
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
35 smat = as.matrix(read.table("input_read_count_smt.tsv", sep = "\t", row.names = 1))
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
36
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
37 asmat = as.matrix(read.table("input_read_count_asmt.tsv", sep = "\t", row.names = 1))
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
38
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
39 WRscore = getNASTIscore(smat, asmat)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
40
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
41 negpairs = getnegativepairs(genepos)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
42
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
43 WRpred = NASTIpredict(smat,asmat, pospairs, negpairs)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
44
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
45 WRpred_rocr = prediction(WRpred$predictions,WRpred$labels)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
46
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
47 thr = defineFDR(WRpred_rocr,0.05)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
48
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
49 WR_names = FindNATs(WRscore, thr, pospairs, genepos)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
50
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
51 write.table(WR_names$newpairs, file = "output_newpairs.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
52
0a0bba8e1823 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/nastiseq commit 8b472e8680bb0ae5d11ee48b642ab305f9333a48
rnateam
parents:
diff changeset
53 write.table(WR_names$neworphan, file = "output_neworphan.tsv", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)