annotate sequence_overview.r @ 90:6809c63d9161 draft

"planemo upload commit fd64827ff6e63df008f6f50ddb8576ad2b1dbb26"
author rhpvorderman
date Tue, 25 Jan 2022 11:28:29 +0000
parents b6f9a640e098
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
81
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
1 library(reshape2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
2
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
3 args <- commandArgs(trailingOnly = TRUE)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
4
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
5 before.unique.file = args[1]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
6 merged.file = args[2]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
7 outputdir = args[3]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
8 gene.classes = unlist(strsplit(args[4], ","))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
9 hotspot.analysis.sum.file = args[5]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
12 main.html = "index.html"
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
13 empty.region.filter = args[6]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
14
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
15
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
16 setwd(outputdir)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
17
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
18 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
19 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
20 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
21
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
22 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
23
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
24 if(empty.region.filter == "leader"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
25 before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
26 } else if(empty.region.filter == "FR1"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
27 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
28 } else if(empty.region.filter == "CDR1"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
29 before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
30 } else if(empty.region.filter == "FR2"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
31 before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
32 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
33
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
34 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
35 IDs$best_match = as.character(IDs$best_match)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
36
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
37 dat = data.frame(table(before.unique$seq_conc))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
38
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
39 names(dat) = c("seq_conc", "Freq")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
40
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
41 dat$seq_conc = factor(dat$seq_conc)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
42
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
43 dat = dat[order(as.character(dat$seq_conc)),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
44
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
45 #writing html from R...
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
46 get.bg.color = function(val){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
47 if(val %in% c("TRUE", "FALSE", "T", "F")){ #if its a logical value, give the background a green/red color
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
48 return(ifelse(val,"#eafaf1","#f9ebea"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
49 } else if (!is.na(as.numeric(val))) { #if its a numerical value, give it a grey tint if its >0
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
50 return(ifelse(val > 0,"#eaecee","white"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
51 } else {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
52 return("white")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
53 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
54 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
55 td = function(val) {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
56 return(paste("<td bgcolor='", get.bg.color(val), "'>", val, "</td>", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
57 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
58 tr = function(val) {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
59 return(paste(c("<tr>", sapply(val, td), "</tr>"), collapse=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
60 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
61
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
62 make.link = function(id, clss, val) {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
63 paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
64 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
65 tbl = function(df) {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
66 res = "<table border='1'>"
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
67 for(i in 1:nrow(df)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
68 res = paste(res, tr(df[i,]), sep="")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
69 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
70 res = paste(res, "</table>")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
71 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
72
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
73 cat("<center><img src='data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA8AAAAPCAYAAAA71pVKAAAAzElEQVQoka2TwQ2CQBBFpwTshw4ImW8ogJMlUIMmhNCDxgasAi50oSXA8XlAjCG7aqKTzGX/vsnM31mzR0gk7tTudO5MEizpzvQ4ryUSe408J3Xn+grE0p1rnpOamVmWsZG4rS+dzzAMsN8Hi9yyjI1JNGtxu4VxBJgLRLpoTKIPiW0LlwtUVRTubW2OBGUJu92cZRmdfbKQMAw8o+vi5v0fLorZ7Y9waGYJjsf38DJz0O1PsEQffOcv4Sa6YYfDDJ5Obzbsp93+5VfdATueO1fdLdI0AAAAAElFTkSuQmCC'> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences occuring more than once are included. </center>", file=main.html, append=F)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
74 cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
75
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
76 if(empty.region.filter == "leader"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
77 cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
78 } else if(empty.region.filter == "FR1"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
79 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
80 } else if(empty.region.filter == "CDR1"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
81 cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
82 } else if(empty.region.filter == "FR2"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
83 cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
84 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
85
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
86 cat("<tr>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
87 cat("<th>Sequence</th><th>Functionality</th><th>IGA1</th><th>IGA2</th><th>IGG1</th><th>IGG2</th><th>IGG3</th><th>IGG4</th><th>IGM</th><th>IGE</th><th>UN</th>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
88 cat("<th>total IGA</th><th>total IGG</th><th>total IGM</th><th>total IGE</th><th>number of subclasses</th><th>present in both IGA and IGG</th><th>present in IGA, IGG and IGM</th><th>present in IGA, IGG and IGE</th><th>present in IGA, IGG, IGM and IGE</th><th>IGA1+IGA2</th>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
89 cat("<th>IGG1+IGG2</th><th>IGG1+IGG3</th><th>IGG1+IGG4</th><th>IGG2+IGG3</th><th>IGG2+IGG4</th><th>IGG3+IGG4</th>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
90 cat("<th>IGG1+IGG2+IGG3</th><th>IGG2+IGG3+IGG4</th><th>IGG1+IGG2+IGG4</th><th>IGG1+IGG3+IGG4</th><th>IGG1+IGG2+IGG3+IGG4</th>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
91 cat("</tr>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
92
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
93
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
94
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
95 single.sequences=0 #sequence only found once, skipped
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
96 in.multiple=0 #same sequence across multiple subclasses
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
97 multiple.in.one=0 #same sequence multiple times in one subclass
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
98 unmatched=0 #all of the sequences are unmatched
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
99 some.unmatched=0 #one or more sequences in a clone are unmatched
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
100 matched=0 #should be the same als matched sequences
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
101
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
102 sequence.id.page="by_id.html"
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
103
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
104 for(i in 1:nrow(dat)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
105
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
106 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA1", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
107 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA2", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
108
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
109 cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG1", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
110 cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG2", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
111 cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG3", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
112 cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG4", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
113
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
114 cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGM", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
115
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
116 ce = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGE", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
117
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
118 un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
119
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
120 allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, ce, un)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
121
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
122 ca1.n = nrow(ca1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
123 ca2.n = nrow(ca2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
124
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
125 cg1.n = nrow(cg1)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
126 cg2.n = nrow(cg2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
127 cg3.n = nrow(cg3)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
128 cg4.n = nrow(cg4)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
129
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
130 cm.n = nrow(cm)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
131
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
132 ce.n = nrow(ce)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
133
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
134 un.n = nrow(un)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
135
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
136 classes = c(ca1.n, ca2.n, cg1.n, cg2.n, cg3.n, cg4.n, cm.n, ce.n, un.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
137
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
138 classes.sum = sum(classes)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
139
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
140 if(classes.sum == 1){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
141 single.sequences = single.sequences + 1
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
142 next
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
143 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
144
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
145 if(un.n == classes.sum){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
146 unmatched = unmatched + 1
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
147 next
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
148 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
149
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
150 classes.no.un = classes[-length(classes)]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
151
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
152 in.classes = sum(classes.no.un > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
153
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
154 matched = matched + in.classes #count in how many subclasses the sequence occurs.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
155
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
156 if(any(classes == classes.sum)){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
157 multiple.in.one = multiple.in.one + 1
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
158 } else if (un.n > 0) {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
159 some.unmatched = some.unmatched + 1
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
160 } else {
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
161 in.multiple = in.multiple + 1
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
162 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
163
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
164 id = as.numeric(dat[i,"seq_conc"])
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
165
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
166 functionality = paste(unique(allc[,"Functionality"]), collapse=",")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
167
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
168 by.id.row = c()
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
169
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
170 if(ca1.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
171 cat(tbl(ca1), file=paste("IGA1_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
172 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
173
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
174 if(ca2.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
175 cat(tbl(ca2), file=paste("IGA2_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
176 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
177
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
178 if(cg1.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
179 cat(tbl(cg1), file=paste("IGG1_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
180 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
181
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
182 if(cg2.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
183 cat(tbl(cg2), file=paste("IGG2_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
184 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
185
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
186 if(cg3.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
187 cat(tbl(cg3), file=paste("IGG3_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
188 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
189
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
190 if(cg4.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
191 cat(tbl(cg4), file=paste("IGG4_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
192 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
193
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
194 if(cm.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
195 cat(tbl(cm), file=paste("IGM_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
196 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
197
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
198 if(ce.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
199 cat(tbl(ce), file=paste("IGE_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
200 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
201
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
202 if(un.n > 0){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
203 cat(tbl(un), file=paste("un_", id, ".html", sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
204 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
205
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
206 ca1.html = make.link(id, "IGA1", ca1.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
207 ca2.html = make.link(id, "IGA2", ca2.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
208
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
209 cg1.html = make.link(id, "IGG1", cg1.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
210 cg2.html = make.link(id, "IGG2", cg2.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
211 cg3.html = make.link(id, "IGG3", cg3.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
212 cg4.html = make.link(id, "IGG4", cg4.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
213
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
214 cm.html = make.link(id, "IGM", cm.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
215
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
216 ce.html = make.link(id, "IGE", ce.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
217
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
218 un.html = make.link(id, "un", un.n)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
219
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
220 #extra columns
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
221 ca.n = ca1.n + ca2.n
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
222
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
223 cg.n = cg1.n + cg2.n + cg3.n + cg4.n
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
224
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
225 #in.classes
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
226
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
227 in.ca.cg = (ca.n > 0 & cg.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
228
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
229 in.ca.cg.cm = (ca.n > 0 & cg.n > 0 & cm.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
230
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
231 in.ca.cg.ce = (ca.n > 0 & cg.n > 0 & ce.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
232
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
233 in.ca.cg.cm.ce = (ca.n > 0 & cg.n > 0 & cm.n > 0 & ce.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
234
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
235 in.ca1.ca2 = (ca1.n > 0 & ca2.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
236
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
237 in.cg1.cg2 = (cg1.n > 0 & cg2.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
238 in.cg1.cg3 = (cg1.n > 0 & cg3.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
239 in.cg1.cg4 = (cg1.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
240 in.cg2.cg3 = (cg2.n > 0 & cg3.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
241 in.cg2.cg4 = (cg2.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
242 in.cg3.cg4 = (cg3.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
243
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
244 in.cg1.cg2.cg3 = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
245 in.cg2.cg3.cg4 = (cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
246 in.cg1.cg2.cg4 = (cg1.n > 0 & cg2.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
247 in.cg1.cg3.cg4 = (cg1.n > 0 & cg3.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
248
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
249 in.cg.all = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
250
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
251 #rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
252 rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, ce.html, un.html)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
253 rw = c(rw, ca.n, cg.n, cm.n, ce.n, in.classes, in.ca.cg, in.ca.cg.cm, in.ca.cg.ce, in.ca.cg.cm.ce, in.ca1.ca2, in.cg1.cg2, in.cg1.cg3, in.cg1.cg4, in.cg2.cg3, in.cg2.cg4, in.cg3.cg4, in.cg1.cg2.cg3, in.cg2.cg3.cg4, in.cg1.cg2.cg4, in.cg1.cg3.cg4, in.cg.all)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
254
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
255
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
256
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
257 cat(tr(rw), file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
258
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
259
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
260 for(i in 1:nrow(allc)){ #generate html by id
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
261 html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"])
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
262 cat(paste(html, "<br />"), file=sequence.id.page, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
263 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
264 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
265
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
266 cat("</table>", file=main.html, append=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
267
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
268 print(paste("Single sequences:", single.sequences))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
269 print(paste("Sequences in multiple subclasses:", in.multiple))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
270 print(paste("Multiple sequences in one subclass:", multiple.in.one))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
271 print(paste("Matched with unmatched:", some.unmatched))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
272 print(paste("Count that should match 'matched' sequences:", matched))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
273
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
274 #ACGT overview
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
275
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
276 #NToverview = merged[!grepl("^unmatched", merged$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
277 NToverview = merged
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
278
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
279 if(empty.region.filter == "leader"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
280 NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
281 } else if(empty.region.filter == "FR1"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
282 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
283 } else if(empty.region.filter == "CDR1"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
284 NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
285 } else if(empty.region.filter == "FR2"){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
286 NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
287 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
288
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
289 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
290 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
291 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
292 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
293
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
294 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
295
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
296 #NToverview = rbind(NToverview, NTsum)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
297
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
298 NTresult = data.frame(nt=c("A", "C", "T", "G"))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
299
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
300 for(clazz in gene.classes){
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
301 print(paste("class:", clazz))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
302 NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
303 print(paste("nrow:", nrow(NToverview.sub)))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
304 new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
305 new.col.y = sum(new.col.x)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
306 new.col.z = round(new.col.x / new.col.y * 100, 2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
307
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
308 tmp = names(NTresult)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
309 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
310 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
311 }
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
312
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
313 NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
314
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
315 names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
316
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
317 write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
318
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
319 NToverview = NToverview[!grepl("unmatched", NToverview$best_match),]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
320
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
321 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
322 new.col.y = sum(new.col.x)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
323 new.col.z = round(new.col.x / new.col.y * 100, 2)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
324
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
325 tmp = names(NTresult)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
326 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
327 names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep=""))
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
328
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
329 names(hotspot.analysis.sum) = names(NTresult)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
330
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
331 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
332
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
333 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0")
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
334
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
335
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
336
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
337
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
338
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
339
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
340
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
341
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
342
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
343
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
344
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
345
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
346
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
347
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
348
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
349
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
350
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
351
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
352
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
353
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
354
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
355
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
356
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
357
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
358
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
359
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
360
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
361
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
362
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
363