changeset 23:81453585dfc3 draft

Uploaded
author davidvanzessen
date Thu, 01 Dec 2016 09:32:06 -0500
parents 0bea8c187a90
children 9955b23db68c
files aa_histogram.r pattern_plots.r shm_csr.r wrapper.sh
diffstat 4 files changed, 141 insertions(+), 88 deletions(-) [+]
line wrap: on
line diff
--- a/aa_histogram.r	Mon Nov 28 10:27:22 2016 -0500
+++ b/aa_histogram.r	Thu Dec 01 09:32:06 2016 -0500
@@ -15,50 +15,49 @@
 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
 
 for(gene in genes){
+	if(gene == ""){
+			mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
+			absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
+	} else {
+			mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
+			absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
+	}
+	print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
+	if(nrow(mutations.by.id.gene) == 0){
+			next
+	}
 
-        if(gene == ""){
-                mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
-                absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
-        } else {
-                mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
-                absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
-        }
-        print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
-        if(nrow(mutations.by.id.gene) == 0){
-                next
-        }
-
-        mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
-        aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
+	mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
+	aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
 
-        dat_freq = mutations.at.position / aa.at.position
-        dat_freq[is.na(dat_freq)] = 0
-        dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
+	dat_freq = mutations.at.position / aa.at.position
+	dat_freq[is.na(dat_freq)] = 0
+	dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
 
-        print("---------------- plot ----------------")
+	print("---------------- plot ----------------")
 
-        m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black"))
-        m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i)
-        m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
-        m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
-        m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
-        m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
-        m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
-        m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) 
-        m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
-        m = m + scale_colour_manual(values=c("black"))
+	m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black"))
+	m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i)
+	m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
+	m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
+	m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
+	m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
+	m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
+	m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) 
+	m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+	#m = m + scale_colour_manual(values=c("black"))
 
-        print("---------------- write/print ----------------")
+	print("---------------- write/print ----------------")
 
 
-        dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
+	dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
 
-        write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-        write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-        write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-        write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-        
-        png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
-        print(m)
-        dev.off()
+	write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	
+	png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
+	print(m)
+	dev.off()
 }
--- a/pattern_plots.r	Mon Nov 28 10:27:22 2016 -0500
+++ b/pattern_plots.r	Thu Dec 01 09:32:06 2016 -0500
@@ -18,6 +18,8 @@
 plot3.png = paste(plot3.path, ".png", sep="")
 plot3.txt = paste(plot3.path, ".txt", sep="")
 
+clean.output = args[5]
+
 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
 
 
@@ -28,6 +30,11 @@
 
 names(dat) = new.names
 
+clean.dat = dat
+clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
+
+write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
+
 dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T)
 dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T)
 
@@ -51,26 +58,24 @@
 print(p)
 dev.off()
 
-data2 = dat[5:8,]
-
-data2["sum",] = colSums(data2, na.rm=T)
+data2 = dat[c(1, 5:8),]
 
 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
 names(data2) = gsub(".x", "", names(data2))
 
-data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1)
-data2["A/T",is.nan(unlist(data2["A/T",]))] = 0
+data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
 
-data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1)
-data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1)
+data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
 
+data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
+data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
 
 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
 
-data2 = melt(t(data2[6:8,]))
+data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
 
 names(data2) = c("Class", "Type", "value")
 
@@ -92,11 +97,11 @@
 data3[is.na(data3)] = 0
 #data3[is.infinite(data3)] = 0
 
-data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1)
+data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
 
-data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1)
+data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
 
-data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1)
+data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
 
 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
--- a/shm_csr.r	Mon Nov 28 10:27:22 2016 -0500
+++ b/shm_csr.r	Thu Dec 01 09:32:06 2016 -0500
@@ -15,7 +15,7 @@
 if(length(dat$Sequence.ID) == 0){
   setwd(outputdir)
   result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
-  row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)")
+  row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)")
   write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
   transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
   row.names(transitionTable) = c("A", "C", "G", "T")
@@ -296,9 +296,9 @@
 		transition2 = merge(transition2, base.order, by.x="variable", by.y="base")
 
 		transition2[is.na(transition2$value),]$value = 0
-
+		
 		if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad
-			print("Plotting stacked transition")
+			print("Plotting heatmap and transition")
 			png(filename=paste("transitions_stacked_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
 			p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
@@ -306,16 +306,13 @@
 			#p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
 			print(p)
 			dev.off()
-
-			print("Plotting heatmap transition")
-
 			png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(id, order.x)), factor(reorder(variable, order.y)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
 			p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base")  + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
 			print(p)
 			dev.off()
 		} else {
-			print("No data to plot")
+			#print("No data to plot")
 		}
 	}
 
@@ -338,22 +335,23 @@
 	func = funcs[[i]]
 	fname = fnames[[i]]
 	
+	print(paste("Creating table for", fname))
+	
 	rows = 9
 	if(fname == "sum"){
 		rows = 11
 	}
 	matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows)
 	for(i in 1:length(genes)){
-		print(paste("Creating table for", fname, genes[i]))
 		matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i])
 	}
 	matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all")
 
 	result = data.frame(matrx)
 	if(fname == "sum"){
-		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
+		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
 	} else {
-		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
+		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
 	}
 	write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F)
 }
@@ -368,7 +366,7 @@
 new.table[,1] = as.character(new.table[,1])
 new.table[2,1] = "Median of Number of Mutations (%)"
 
-#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
+#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
 
 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F)
 
@@ -467,7 +465,9 @@
 
 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
 
-frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class")
+frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")])
+
+frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match")
 
 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
 
--- a/wrapper.sh	Mon Nov 28 10:27:22 2016 -0500
+++ b/wrapper.sh	Thu Dec 01 09:32:06 2016 -0500
@@ -230,7 +230,7 @@
 echo "---------------- main tables ----------------<br />" >> $log
 
 echo "<div class='tabber'>" >> $output
-echo "<div class='tabbertab' title='SHM Overview'>" >> $output
+echo "<div class='tabbertab' title='SHM Overview' style='width: 3000px;'>" >> $output
 
 for func in ${funcs[@]}
 do
@@ -243,7 +243,7 @@
 	echo "---------------- pattern_plots.r ----------------"
 	echo "---------------- pattern_plots.r ----------------<br />" >> $log
 
-	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
+	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 $outdir/shm_overview.txt 2>&1
 	
 	echo "<table class='pure-table pure-table-striped'>" >> $output
 	echo "<thead><tr><th>info</th>" >> $output
@@ -301,7 +301,7 @@
 echo "---------------- images ----------------"
 echo "---------------- images ----------------<br />" >> $log
 
-echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
+echo "<div class='tabbertab' title='SHM Frequency' style='width: 3000px;'>" >> $output
 
 if [ -a $outdir/scatter.png ]
 then
@@ -314,7 +314,7 @@
 
 echo "</div>" >> $output #SHM frequency tab end
 
-echo "<div class='tabbertab' title='Transition tables'>" >> $output
+echo "<div class='tabbertab' title='Transition tables' style='width: 3000px;'>" >> $output
 
 echo "<table border='0'>" >> $output
 
@@ -322,8 +322,21 @@
 do
 	echo "<tr>" >> $output
 	echo "<td><h1>${gene}</h1></td>" >> $output
-	echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
-	echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
+	
+	if [ -e $outdir/transitions_heatmap_${gene}.png ]
+	then
+		echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
+	else
+		echo "<td></td>" >> $output
+	fi
+	
+	if [ -e $outdir/transitions_stacked_${gene}.png ]
+	then
+		echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
+	else
+		echo "<td></td>" >> $output
+	fi
+	
 	echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
 	echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
 	first="true"
@@ -367,30 +380,66 @@
 
 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
 
-if [ -a $outdir/aa_histogram.png ]
+if [ -e $outdir/aa_histogram.png ]
 then
 	echo "<img src='aa_histogram.png'/><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGA.png ]
+then
 	echo "<img src='aa_histogram_IGA.png'/><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGG.png ]
+then
 	echo "<img src='aa_histogram_IGG.png'/><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGM.png ]
+then
 	echo "<img src='aa_histogram_IGM.png'/><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGE.png ]
+then
 	echo "<img src='aa_histogram_IGE.png'/><br />" >> $output
 fi
 
-echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
-echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
-echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
-echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
-echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output
+
+if [ -e $outdir/baseline.png ]
+then
+	echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
+fi
+
+if [ -e $outdir/baseline_IGA.png ]
+then
+	echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
+fi
+
+if [ -e $outdir/baseline_IGG.png ]
+then
+	echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
+fi
+
+if [ -e $outdir/baseline_IGM.png ]
+then
+	echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
+fi
+
+if [ -e $outdir/baseline_IGE.png ]
+then
+	echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output
+fi
 
 echo "</div>" >> $output #antigen selection tab end
 
 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
 
-if [ -a $outdir/IGA.png ] 
+if [ -e $outdir/IGA.png ] 
 then
 	echo "<img src='IGA.png'/><br />" >> $output
 fi
-if [ -a $outdir/IGG.png ]
+if [ -e $outdir/IGG.png ]
 then
 	echo "<img src='IGG.png'/><br />" >> $output
 fi
@@ -492,7 +541,7 @@
 	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
 	echo "</div>" >> $output
 
-	echo "<div class='tabbertab' title='Overview'>" >> $output
+	echo "<div class='tabbertab' title='Overlap'>" >> $output
 	cat "$outdir/sequence_overview/index.html" >> $output
 	echo "</div>" >> $output
 
@@ -512,7 +561,7 @@
 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
 
 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Overview</td></tr>" >> $output
-echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='shm_overview.txt' download='shm_overview.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
@@ -526,17 +575,17 @@
 echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output
 
 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Transition Tables</td></tr>" >> $output
-echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transition_all_sum.txt' download='transition_all_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transition_IGA_sum.txt' download='transition_all_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transition_IGA1_sum.txt' download='transition_IGA1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transition_IGA1_sum.txt' download='transition_IGA1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transition_IGG_sum.txt' download='transition_IGG_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transition_IGG1_sum.txt' download='transition_IGG1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transition_IGG2_sum.txt' download='transition_IGG2_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transition_IGG3_sum.txt' download='transition_IGG3_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transition_IGG4_sum.txt' download='transition_IGG4_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transition_IGM_sum.txt' download='transition_IGM_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGE' transition plot</td><td><a href='transition_IGE_sum.txt' download='transition_IGE_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transitions_IGG3_sum.txt' download='transitions_IGG3_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transitions_IGG4_sum.txt' download='transitions_IGG4_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transitions_IGM_sum.txt' download='transitions_IGM_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGE' transition plot</td><td><a href='transitions_IGE_sum.txt' download='transitions_IGE_sum.txt' >Download</a></td></tr>" >> $output
 
 echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output
 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output