Mercurial > repos > davidvanzessen > shm_csr
comparison sequence_overview.r @ 7:ad9be244b104 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 07 Nov 2016 03:04:07 -0500 |
| parents | c33d93683a09 |
| children | 3968d04b5724 |
comparison
equal
deleted
inserted
replaced
| 6:2ddb9a21f635 | 7:ad9be244b104 |
|---|---|
| 8 gene.classes = unlist(strsplit(args[4], ",")) | 8 gene.classes = unlist(strsplit(args[4], ",")) |
| 9 hotspot.analysis.sum.file = args[5] | 9 hotspot.analysis.sum.file = args[5] |
| 10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | 10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") |
| 11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") | 11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") |
| 12 main.html = "index.html" | 12 main.html = "index.html" |
| 13 empty.region.filter = args[6] | |
| 14 | |
| 13 | 15 |
| 14 setwd(outputdir) | 16 setwd(outputdir) |
| 15 | 17 |
| 16 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | 18 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
| 17 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | 19 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
| 18 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") | 20 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") |
| 19 | 21 |
| 20 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] | 22 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] |
| 21 | 23 |
| 22 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) | 24 if(empty.region.filter == "leader"){ |
| 25 before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq) | |
| 26 } else if(empty.region.filter == "FR1"){ | |
| 27 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq) | |
| 28 } else if(empty.region.filter == "CDR1"){ | |
| 29 before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq) | |
| 30 } else if(empty.region.filter == "FR2"){ | |
| 31 before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq) | |
| 32 } | |
| 23 | 33 |
| 24 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | 34 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] |
| 25 IDs$best_match = as.character(IDs$best_match) | 35 IDs$best_match = as.character(IDs$best_match) |
| 26 | 36 |
| 27 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) | |
| 28 | |
| 29 dat = data.frame(table(before.unique$seq_conc)) | 37 dat = data.frame(table(before.unique$seq_conc)) |
| 30 #dat = data.frame(table(merged$seq_conc, merged$Functionality)) | 38 |
| 31 | |
| 32 #dat = dat[dat$Freq > 1,] | |
| 33 | |
| 34 #names(dat) = c("seq_conc", "Functionality", "Freq") | |
| 35 names(dat) = c("seq_conc", "Freq") | 39 names(dat) = c("seq_conc", "Freq") |
| 36 | 40 |
| 37 dat$seq_conc = factor(dat$seq_conc) | 41 dat$seq_conc = factor(dat$seq_conc) |
| 38 | 42 |
| 39 dat = dat[order(as.character(dat$seq_conc)),] | 43 dat = dat[order(as.character(dat$seq_conc)),] |
| 65 } | 69 } |
| 66 res = paste(res, "</table>") | 70 res = paste(res, "</table>") |
| 67 } | 71 } |
| 68 | 72 |
| 69 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=F) | 73 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=F) |
| 70 #cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | 74 |
| 75 if(empty.region.filter == "leader"){ | |
| 76 cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | |
| 77 } else if(empty.region.filter == "FR1"){ | |
| 78 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | |
| 79 } else if(empty.region.filter == "CDR1"){ | |
| 80 cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | |
| 81 } else if(empty.region.filter == "FR2"){ | |
| 82 cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | |
| 83 } | |
| 84 | |
| 71 cat("<tr>", file=main.html, append=T) | 85 cat("<tr>", file=main.html, append=T) |
| 72 cat("<th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th><th>un</th>", file=main.html, append=T) | 86 cat("<th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th><th>un</th>", file=main.html, append=T) |
| 73 cat("<th>total CA</th><th>total CG</th><th>number of subclasses</th><th>present in both Ca and Cg</th><th>Ca1+Ca2</th>", file=main.html, append=T) | 87 cat("<th>total CA</th><th>total CG</th><th>number of subclasses</th><th>present in both Ca and Cg</th><th>Ca1+Ca2</th>", file=main.html, append=T) |
| 74 cat("<th>Cg1+Cg2</th><th>Cg1+Cg3</th><th>Cg1+Cg4</th><th>Cg2+Cg3</th><th>Cg2+Cg4</th><th>Cg3+Cg4</th>", file=main.html, append=T) | 88 cat("<th>Cg1+Cg2</th><th>Cg1+Cg3</th><th>Cg1+Cg4</th><th>Cg2+Cg3</th><th>Cg2+Cg4</th><th>Cg3+Cg4</th>", file=main.html, append=T) |
| 75 cat("<th>Cg1+Cg2+Cg3</th><th>Cg2+Cg3+Cg4</th><th>Cg1+Cg2+Cg4</th><th>Cg1+Cg3+Cg4</th><th>Cg1+Cg2+Cg3+Cg4</th>", file=main.html, append=T) | 89 cat("<th>Cg1+Cg2+Cg3</th><th>Cg2+Cg3+Cg4</th><th>Cg1+Cg2+Cg4</th><th>Cg1+Cg3+Cg4</th><th>Cg1+Cg2+Cg3+Cg4</th>", file=main.html, append=T) |
| 238 print(paste("Matched with unmatched:", some.unmatched)) | 252 print(paste("Matched with unmatched:", some.unmatched)) |
| 239 print(paste("Count that should match 'matched' sequences:", matched)) | 253 print(paste("Count that should match 'matched' sequences:", matched)) |
| 240 | 254 |
| 241 #ACGT overview | 255 #ACGT overview |
| 242 | 256 |
| 243 NToverview = merged[!grepl("^unmatched", merged$best_match),] | 257 #NToverview = merged[!grepl("^unmatched", merged$best_match),] |
| 244 | 258 NToverview = merged |
| 245 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") | 259 |
| 260 if(empty.region.filter == "leader"){ | |
| 261 NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
| 262 } else if(empty.region.filter == "FR1"){ | |
| 263 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
| 264 } else if(empty.region.filter == "CDR1"){ | |
| 265 NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
| 266 } else if(empty.region.filter == "FR2"){ | |
| 267 NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
| 268 } | |
| 246 | 269 |
| 247 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | 270 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) |
| 248 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | 271 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) |
| 249 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | 272 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) |
| 250 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) | 273 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) |
