diff sequence_overview.r @ 7:ad9be244b104 draft

Uploaded
author davidvanzessen
date Mon, 07 Nov 2016 03:04:07 -0500
parents c33d93683a09
children 3968d04b5724
line wrap: on
line diff
--- a/sequence_overview.r	Tue Nov 01 10:48:38 2016 -0400
+++ b/sequence_overview.r	Mon Nov 07 03:04:07 2016 -0500
@@ -10,6 +10,8 @@
 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
 main.html = "index.html"
+empty.region.filter = args[6]
+
 
 setwd(outputdir)
 
@@ -19,19 +21,21 @@
 
 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
 
-before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+if(empty.region.filter == "leader"){
+	before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
+} else if(empty.region.filter == "FR1"){
+	before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
+} else if(empty.region.filter == "CDR1"){
+	before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
+} else if(empty.region.filter == "FR2"){
+	before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq)
+}
 
 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
 IDs$best_match = as.character(IDs$best_match)
 
-#dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
+dat = data.frame(table(before.unique$seq_conc))
 
-dat = data.frame(table(before.unique$seq_conc))
-#dat = data.frame(table(merged$seq_conc, merged$Functionality))
-
-#dat = dat[dat$Freq > 1,]
-
-#names(dat) = c("seq_conc", "Functionality", "Freq")
 names(dat) = c("seq_conc", "Freq")
 
 dat$seq_conc = factor(dat$seq_conc)
@@ -67,7 +71,17 @@
 }
 
 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=F)
-#cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+
+if(empty.region.filter == "leader"){
+	cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+} else if(empty.region.filter == "FR1"){
+	cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+} else if(empty.region.filter == "CDR1"){
+	cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+} else if(empty.region.filter == "FR2"){
+	cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+}
+
 cat("<tr>", file=main.html, append=T)
 cat("<th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th><th>un</th>", file=main.html, append=T)
 cat("<th>total CA</th><th>total CG</th><th>number of subclasses</th><th>present in both Ca and Cg</th><th>Ca1+Ca2</th>", file=main.html, append=T)
@@ -240,9 +254,18 @@
 
 #ACGT overview
 
-NToverview = merged[!grepl("^unmatched", merged$best_match),]
+#NToverview = merged[!grepl("^unmatched", merged$best_match),]
+NToverview = merged
 
-NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_")
+if(empty.region.filter == "leader"){
+	NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+} else if(empty.region.filter == "FR1"){
+	NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+} else if(empty.region.filter == "CDR1"){
+	NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+} else if(empty.region.filter == "FR2"){
+	NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+}
 
 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))