annotate sequence_overview.r @ 78:aff3ba86ef7a draft

Uploaded
author davidvanzessen
date Mon, 31 Aug 2020 11:20:08 -0400
parents cfc9a442e59d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
1 library(reshape2)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
2
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
3 args <- commandArgs(trailingOnly = TRUE)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
4
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
5 before.unique.file = args[1]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
6 merged.file = args[2]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
7 outputdir = args[3]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
8 gene.classes = unlist(strsplit(args[4], ","))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
9 hotspot.analysis.sum.file = args[5]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
12 main.html = "index.html"
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
13 empty.region.filter = args[6]
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
14
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
15
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
16 setwd(outputdir)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
17
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
18 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
19 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
20 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
21
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
22 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
23
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
24 if(empty.region.filter == "leader"){
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 8
diff changeset
25 before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
26 } else if(empty.region.filter == "FR1"){
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 8
diff changeset
27 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
28 } else if(empty.region.filter == "CDR1"){
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 8
diff changeset
29 before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
30 } else if(empty.region.filter == "FR2"){
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 8
diff changeset
31 before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
32 }
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
33
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
34 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
35 IDs$best_match = as.character(IDs$best_match)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
36
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
37 dat = data.frame(table(before.unique$seq_conc))
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
38
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
39 names(dat) = c("seq_conc", "Freq")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
40
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
41 dat$seq_conc = factor(dat$seq_conc)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
42
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
43 dat = dat[order(as.character(dat$seq_conc)),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
44
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
45 #writing html from R...
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
46 get.bg.color = function(val){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
47 if(val %in% c("TRUE", "FALSE", "T", "F")){ #if its a logical value, give the background a green/red color
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
48 return(ifelse(val,"#eafaf1","#f9ebea"))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
49 } else if (!is.na(as.numeric(val))) { #if its a numerical value, give it a grey tint if its >0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
50 return(ifelse(val > 0,"#eaecee","white"))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
51 } else {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
52 return("white")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
53 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
54 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
55 td = function(val) {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
56 return(paste("<td bgcolor='", get.bg.color(val), "'>", val, "</td>", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
57 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
58 tr = function(val) {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
59 return(paste(c("<tr>", sapply(val, td), "</tr>"), collapse=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
60 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
61
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
62 make.link = function(id, clss, val) {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
63 paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
64 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
65 tbl = function(df) {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
66 res = "<table border='1'>"
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
67 for(i in 1:nrow(df)){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
68 res = paste(res, tr(df[i,]), sep="")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
69 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
70 res = paste(res, "</table>")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
71 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
72
45
b66e8946ba75 Uploaded
davidvanzessen
parents: 39
diff changeset
73 cat("<center><img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAPCAYAAAA71pVKAAAAzElEQVQoka2TwQ2CQBBFpwTshw4ImW8ogJMlUIMmhNCDxgasAi50oSXA8XlAjCG7aqKTzGX/vsnM31mzR0gk7tTudO5MEizpzvQ4ryUSe408J3Xn+grE0p1rnpOamVmWsZG4rS+dzzAMsN8Hi9yyjI1JNGtxu4VxBJgLRLpoTKIPiW0LlwtUVRTubW2OBGUJu92cZRmdfbKQMAw8o+vi5v0fLorZ7Y9waGYJjsf38DJz0O1PsEQffOcv4Sa6YYfDDJ5Obzbsp93+5VfdATueO1fdLdI0AAAAAElFTkSuQmCC'> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences occuring more than once are included. </center>", file=main.html, append=F)
39
a24f8c93583a Uploaded
davidvanzessen
parents: 33
diff changeset
74 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T)
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
75
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
76 if(empty.region.filter == "leader"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
77 cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
78 } else if(empty.region.filter == "FR1"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
79 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
80 } else if(empty.region.filter == "CDR1"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
81 cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
82 } else if(empty.region.filter == "FR2"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
83 cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
84 }
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
85
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
86 cat("<tr>", file=main.html, append=T)
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
87 cat("<th>Sequence</th><th>Functionality</th><th>IGA1</th><th>IGA2</th><th>IGG1</th><th>IGG2</th><th>IGG3</th><th>IGG4</th><th>IGM</th><th>IGE</th><th>UN</th>", file=main.html, append=T)
46
cfc9a442e59d Uploaded
davidvanzessen
parents: 45
diff changeset
88 cat("<th>total IGA</th><th>total IGG</th><th>total IGM</th><th>total IGE</th><th>number of subclasses</th><th>present in both IGA and IGG</th><th>present in IGA, IGG and IGM</th><th>present in IGA, IGG and IGE</th><th>present in IGA, IGG, IGM and IGE</th><th>IGA1+IGA2</th>", file=main.html, append=T)
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
89 cat("<th>IGG1+IGG2</th><th>IGG1+IGG3</th><th>IGG1+IGG4</th><th>IGG2+IGG3</th><th>IGG2+IGG4</th><th>IGG3+IGG4</th>", file=main.html, append=T)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
90 cat("<th>IGG1+IGG2+IGG3</th><th>IGG2+IGG3+IGG4</th><th>IGG1+IGG2+IGG4</th><th>IGG1+IGG3+IGG4</th><th>IGG1+IGG2+IGG3+IGG4</th>", file=main.html, append=T)
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
91 cat("</tr>", file=main.html, append=T)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
92
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
93
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
94
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
95 single.sequences=0 #sequence only found once, skipped
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
96 in.multiple=0 #same sequence across multiple subclasses
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
97 multiple.in.one=0 #same sequence multiple times in one subclass
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
98 unmatched=0 #all of the sequences are unmatched
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
99 some.unmatched=0 #one or more sequences in a clone are unmatched
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
100 matched=0 #should be the same als matched sequences
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
101
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
102 sequence.id.page="by_id.html"
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
103
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
104 for(i in 1:nrow(dat)){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
105
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
106 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA1", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
107 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA2", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
108
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
109 cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG1", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
110 cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG2", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
111 cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG3", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
112 cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG4", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
113
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
114 cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGM", IDs$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
115
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
116 ce = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGE", IDs$best_match),]
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
117
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
118 un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),]
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
119
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
120 allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, ce, un)
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
121
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
122 ca1.n = nrow(ca1)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
123 ca2.n = nrow(ca2)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
124
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
125 cg1.n = nrow(cg1)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
126 cg2.n = nrow(cg2)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
127 cg3.n = nrow(cg3)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
128 cg4.n = nrow(cg4)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
129
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
130 cm.n = nrow(cm)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
131
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
132 ce.n = nrow(ce)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
133
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
134 un.n = nrow(un)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
135
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
136 classes = c(ca1.n, ca2.n, cg1.n, cg2.n, cg3.n, cg4.n, cm.n, ce.n, un.n)
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
137
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
138 classes.sum = sum(classes)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
139
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
140 if(classes.sum == 1){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
141 single.sequences = single.sequences + 1
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
142 next
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
143 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
144
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
145 if(un.n == classes.sum){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
146 unmatched = unmatched + 1
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
147 next
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
148 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
149
46
cfc9a442e59d Uploaded
davidvanzessen
parents: 45
diff changeset
150 classes.no.un = classes[-length(classes)]
cfc9a442e59d Uploaded
davidvanzessen
parents: 45
diff changeset
151
cfc9a442e59d Uploaded
davidvanzessen
parents: 45
diff changeset
152 in.classes = sum(classes.no.un > 0)
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
153
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
154 matched = matched + in.classes #count in how many subclasses the sequence occurs.
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
155
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
156 if(any(classes == classes.sum)){
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
157 multiple.in.one = multiple.in.one + 1
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
158 } else if (un.n > 0) {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
159 some.unmatched = some.unmatched + 1
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
160 } else {
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
161 in.multiple = in.multiple + 1
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
162 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
163
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
164 id = as.numeric(dat[i,"seq_conc"])
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
165
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
166 functionality = paste(unique(allc[,"Functionality"]), collapse=",")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
167
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
168 by.id.row = c()
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
169
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
170 if(ca1.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
171 cat(tbl(ca1), file=paste("IGA1_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
172 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
173
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
174 if(ca2.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
175 cat(tbl(ca2), file=paste("IGA2_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
176 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
177
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
178 if(cg1.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
179 cat(tbl(cg1), file=paste("IGG1_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
180 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
181
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
182 if(cg2.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
183 cat(tbl(cg2), file=paste("IGG2_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
184 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
185
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
186 if(cg3.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
187 cat(tbl(cg3), file=paste("IGG3_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
188 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
189
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
190 if(cg4.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
191 cat(tbl(cg4), file=paste("IGG4_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
192 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
193
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
194 if(cm.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
195 cat(tbl(cm), file=paste("IGM_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
196 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
197
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
198 if(ce.n > 0){
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
199 cat(tbl(ce), file=paste("IGE_", id, ".html", sep=""))
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
200 }
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
201
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
202 if(un.n > 0){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
203 cat(tbl(un), file=paste("un_", id, ".html", sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
204 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
205
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
206 ca1.html = make.link(id, "IGA1", ca1.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
207 ca2.html = make.link(id, "IGA2", ca2.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
208
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
209 cg1.html = make.link(id, "IGG1", cg1.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
210 cg2.html = make.link(id, "IGG2", cg2.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
211 cg3.html = make.link(id, "IGG3", cg3.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
212 cg4.html = make.link(id, "IGG4", cg4.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
213
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
214 cm.html = make.link(id, "IGM", cm.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
215
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
216 ce.html = make.link(id, "IGE", ce.n)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
217
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
218 un.html = make.link(id, "un", un.n)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
219
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
220 #extra columns
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
221 ca.n = ca1.n + ca2.n
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
222
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
223 cg.n = cg1.n + cg2.n + cg3.n + cg4.n
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
224
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
225 #in.classes
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
226
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
227 in.ca.cg = (ca.n > 0 & cg.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
228
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
229 in.ca.cg.cm = (ca.n > 0 & cg.n > 0 & cm.n > 0)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
230
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
231 in.ca.cg.ce = (ca.n > 0 & cg.n > 0 & ce.n > 0)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
232
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
233 in.ca.cg.cm.ce = (ca.n > 0 & cg.n > 0 & cm.n > 0 & ce.n > 0)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
234
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
235 in.ca1.ca2 = (ca1.n > 0 & ca2.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
236
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
237 in.cg1.cg2 = (cg1.n > 0 & cg2.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
238 in.cg1.cg3 = (cg1.n > 0 & cg3.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
239 in.cg1.cg4 = (cg1.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
240 in.cg2.cg3 = (cg2.n > 0 & cg3.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
241 in.cg2.cg4 = (cg2.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
242 in.cg3.cg4 = (cg3.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
243
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
244 in.cg1.cg2.cg3 = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
245 in.cg2.cg3.cg4 = (cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
246 in.cg1.cg2.cg4 = (cg1.n > 0 & cg2.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
247 in.cg1.cg3.cg4 = (cg1.n > 0 & cg3.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
248
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
249 in.cg.all = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
250
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
251 #rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html)
32
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
252 rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, ce.html, un.html)
4c5ba6b5d10d Uploaded
davidvanzessen
parents: 31
diff changeset
253 rw = c(rw, ca.n, cg.n, cm.n, ce.n, in.classes, in.ca.cg, in.ca.cg.cm, in.ca.cg.ce, in.ca.cg.cm.ce, in.ca1.ca2, in.cg1.cg2, in.cg1.cg3, in.cg1.cg4, in.cg2.cg3, in.cg2.cg4, in.cg3.cg4, in.cg1.cg2.cg3, in.cg2.cg3.cg4, in.cg1.cg2.cg4, in.cg1.cg3.cg4, in.cg.all)
46
cfc9a442e59d Uploaded
davidvanzessen
parents: 45
diff changeset
254
cfc9a442e59d Uploaded
davidvanzessen
parents: 45
diff changeset
255
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
256
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
257 cat(tr(rw), file=main.html, append=T)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
258
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
259
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
260 for(i in 1:nrow(allc)){ #generate html by id
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
261 html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"])
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
262 cat(paste(html, "<br />"), file=sequence.id.page, append=T)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
263 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
264 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
265
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
266 cat("</table>", file=main.html, append=T)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
267
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
268 print(paste("Single sequences:", single.sequences))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
269 print(paste("Sequences in multiple subclasses:", in.multiple))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
270 print(paste("Multiple sequences in one subclass:", multiple.in.one))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
271 print(paste("Matched with unmatched:", some.unmatched))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
272 print(paste("Count that should match 'matched' sequences:", matched))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
273
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
274 #ACGT overview
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
275
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
276 #NToverview = merged[!grepl("^unmatched", merged$best_match),]
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
277 NToverview = merged
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
278
7
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
279 if(empty.region.filter == "leader"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
280 NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
281 } else if(empty.region.filter == "FR1"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
282 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
283 } else if(empty.region.filter == "CDR1"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
284 NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
285 } else if(empty.region.filter == "FR2"){
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
286 NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
ad9be244b104 Uploaded
davidvanzessen
parents: 0
diff changeset
287 }
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
288
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
289 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
290 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
291 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
292 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
293
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
294 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
295
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
296 #NToverview = rbind(NToverview, NTsum)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
297
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
298 NTresult = data.frame(nt=c("A", "C", "T", "G"))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
299
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
300 for(clazz in gene.classes){
8
3968d04b5724 Uploaded
davidvanzessen
parents: 7
diff changeset
301 print(paste("class:", clazz))
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
302 NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),]
8
3968d04b5724 Uploaded
davidvanzessen
parents: 7
diff changeset
303 print(paste("nrow:", nrow(NToverview.sub)))
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
304 new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
305 new.col.y = sum(new.col.x)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
306 new.col.z = round(new.col.x / new.col.y * 100, 2)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
307
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
308 tmp = names(NTresult)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
309 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
310 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
311 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
312
39
a24f8c93583a Uploaded
davidvanzessen
parents: 33
diff changeset
313 NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")]
a24f8c93583a Uploaded
davidvanzessen
parents: 33
diff changeset
314
a24f8c93583a Uploaded
davidvanzessen
parents: 33
diff changeset
315 names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T")
a24f8c93583a Uploaded
davidvanzessen
parents: 33
diff changeset
316
a24f8c93583a Uploaded
davidvanzessen
parents: 33
diff changeset
317 write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
318
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
319 NToverview = NToverview[!grepl("unmatched", NToverview$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
320
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
321 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
322 new.col.y = sum(new.col.x)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
323 new.col.z = round(new.col.x / new.col.y * 100, 2)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
324
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
325 tmp = names(NTresult)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
326 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
327 names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep=""))
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
328
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
329 names(hotspot.analysis.sum) = names(NTresult)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
330
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
331 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
332
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
333 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
334
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
335
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
336
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
337
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
338
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
339
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
340
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
341
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
342
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
343
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
344
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
345
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
346
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
347
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
348
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
349
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
350
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
351
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
352
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
353
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
354
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
355
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
356
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
357
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
358
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
359
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
360
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
361
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
362
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
363