comparison sequence_overview.r @ 7:ad9be244b104 draft

Uploaded
author davidvanzessen
date Mon, 07 Nov 2016 03:04:07 -0500
parents c33d93683a09
children 3968d04b5724
comparison
equal deleted inserted replaced
6:2ddb9a21f635 7:ad9be244b104
8 gene.classes = unlist(strsplit(args[4], ",")) 8 gene.classes = unlist(strsplit(args[4], ","))
9 hotspot.analysis.sum.file = args[5] 9 hotspot.analysis.sum.file = args[5]
10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") 10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") 11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
12 main.html = "index.html" 12 main.html = "index.html"
13 empty.region.filter = args[6]
14
13 15
14 setwd(outputdir) 16 setwd(outputdir)
15 17
16 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") 18 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
17 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") 19 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
18 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") 20 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
19 21
20 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] 22 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
21 23
22 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) 24 if(empty.region.filter == "leader"){
25 before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
26 } else if(empty.region.filter == "FR1"){
27 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
28 } else if(empty.region.filter == "CDR1"){
29 before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
30 } else if(empty.region.filter == "FR2"){
31 before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
32 }
23 33
24 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] 34 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
25 IDs$best_match = as.character(IDs$best_match) 35 IDs$best_match = as.character(IDs$best_match)
26 36
27 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
28
29 dat = data.frame(table(before.unique$seq_conc)) 37 dat = data.frame(table(before.unique$seq_conc))
30 #dat = data.frame(table(merged$seq_conc, merged$Functionality)) 38
31
32 #dat = dat[dat$Freq > 1,]
33
34 #names(dat) = c("seq_conc", "Functionality", "Freq")
35 names(dat) = c("seq_conc", "Freq") 39 names(dat) = c("seq_conc", "Freq")
36 40
37 dat$seq_conc = factor(dat$seq_conc) 41 dat$seq_conc = factor(dat$seq_conc)
38 42
39 dat = dat[order(as.character(dat$seq_conc)),] 43 dat = dat[order(as.character(dat$seq_conc)),]
65 } 69 }
66 res = paste(res, "</table>") 70 res = paste(res, "</table>")
67 } 71 }
68 72
69 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=F) 73 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=F)
70 #cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) 74
75 if(empty.region.filter == "leader"){
76 cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
77 } else if(empty.region.filter == "FR1"){
78 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
79 } else if(empty.region.filter == "CDR1"){
80 cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
81 } else if(empty.region.filter == "FR2"){
82 cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
83 }
84
71 cat("<tr>", file=main.html, append=T) 85 cat("<tr>", file=main.html, append=T)
72 cat("<th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th><th>un</th>", file=main.html, append=T) 86 cat("<th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th><th>un</th>", file=main.html, append=T)
73 cat("<th>total CA</th><th>total CG</th><th>number of subclasses</th><th>present in both Ca and Cg</th><th>Ca1+Ca2</th>", file=main.html, append=T) 87 cat("<th>total CA</th><th>total CG</th><th>number of subclasses</th><th>present in both Ca and Cg</th><th>Ca1+Ca2</th>", file=main.html, append=T)
74 cat("<th>Cg1+Cg2</th><th>Cg1+Cg3</th><th>Cg1+Cg4</th><th>Cg2+Cg3</th><th>Cg2+Cg4</th><th>Cg3+Cg4</th>", file=main.html, append=T) 88 cat("<th>Cg1+Cg2</th><th>Cg1+Cg3</th><th>Cg1+Cg4</th><th>Cg2+Cg3</th><th>Cg2+Cg4</th><th>Cg3+Cg4</th>", file=main.html, append=T)
75 cat("<th>Cg1+Cg2+Cg3</th><th>Cg2+Cg3+Cg4</th><th>Cg1+Cg2+Cg4</th><th>Cg1+Cg3+Cg4</th><th>Cg1+Cg2+Cg3+Cg4</th>", file=main.html, append=T) 89 cat("<th>Cg1+Cg2+Cg3</th><th>Cg2+Cg3+Cg4</th><th>Cg1+Cg2+Cg4</th><th>Cg1+Cg3+Cg4</th><th>Cg1+Cg2+Cg3+Cg4</th>", file=main.html, append=T)
238 print(paste("Matched with unmatched:", some.unmatched)) 252 print(paste("Matched with unmatched:", some.unmatched))
239 print(paste("Count that should match 'matched' sequences:", matched)) 253 print(paste("Count that should match 'matched' sequences:", matched))
240 254
241 #ACGT overview 255 #ACGT overview
242 256
243 NToverview = merged[!grepl("^unmatched", merged$best_match),] 257 #NToverview = merged[!grepl("^unmatched", merged$best_match),]
244 258 NToverview = merged
245 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") 259
260 if(empty.region.filter == "leader"){
261 NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
262 } else if(empty.region.filter == "FR1"){
263 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
264 } else if(empty.region.filter == "CDR1"){
265 NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
266 } else if(empty.region.filter == "FR2"){
267 NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
268 }
246 269
247 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) 270 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
248 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) 271 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
249 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) 272 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
250 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) 273 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))