# HG changeset patch
# User davidvanzessen
# Date 1478009737 14400
# Node ID 012a738edf5acb20af56e92d7f5d99400ba664a1
# Parent  477e95b098fda88ee772b52c294a283705843c7f
Uploaded

diff -r 477e95b098fd -r 012a738edf5a gene_identification.py
--- a/gene_identification.py	Mon Oct 31 05:05:26 2016 -0400
+++ b/gene_identification.py	Tue Nov 01 10:15:37 2016 -0400
@@ -47,10 +47,12 @@
 #lambda/kappa reference sequence
 searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg",
                  "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc",
+                 "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc",
                  "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence
 
 compiledregex = {"ca": [],
                  "cg": [],
+                 "ce": [],
                  "cm": []}
 
 #lambda/kappa reference sequence variable nucleotides
@@ -102,6 +104,8 @@
 for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2):
   compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False))
 
+for i in range(0, len(searchstrings["ce"]) - chunklength, chunklength / 2):
+  compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False))
 
 
 def removeAndReturnMaxIndex(x): #simplifies a list comprehension
@@ -114,11 +118,11 @@
 start_location = dict()
 hits = dict()
 alltotal = 0
-for key in compiledregex.keys(): #for ca/cg/cm
+for key in compiledregex.keys(): #for ca/cg/cm/ce
 	regularexpressions = compiledregex[key] #get the compiled regular expressions
 	for ID in dic.keys()[0:]: #for every ID
 		if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists
-			hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0}
+			hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0}
 		currentIDHits = hits[ID]
 		seq = dic[ID]
 		lastindex = 0
@@ -130,7 +134,7 @@
 				break
 			regex, hasVar = regexp
 			matches = regex.finditer(seq[lastindex:])
-			for match in matches: #for every match with the current regex, only uses the first hit
+			for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop
 				lastindex += match.start()
 				start[relativeStartLocation + start_zero] += 1
 				if hasVar: #if the regex has a variable nt in it
@@ -144,7 +148,7 @@
 						currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]])
 						currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]])
 						currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]])
-					else: #key == "cm" #no variable regions in 'cm'
+					else: #key == "cm" #no variable regions in 'cm' or 'ce'
 						pass
 				break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped
 			else: #only runs if there were no hits
@@ -155,13 +159,10 @@
 		#start_location[ID + "_" + key] = str(start.index(max(start)))
 
 
-chunksInCA = len(compiledregex["ca"])
-chunksInCG = len(compiledregex["cg"])
-chunksInCM = len(compiledregex["cm"])
-requiredChunkPercentage = 0.7
 varsInCA = float(len(ca1.keys()) * 2)
 varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice
 varsInCM = 0
+varsInCE = 0
 
 
 
@@ -183,17 +184,19 @@
 			possibleca = float(len(compiledregex["ca"]))
 			possiblecg = float(len(compiledregex["cg"]))
 			possiblecm = float(len(compiledregex["cm"]))
+			possiblece = float(len(compiledregex["ce"]))
 			cahits = currentIDHits["ca_hits"]
 			cghits = currentIDHits["cg_hits"]
 			cmhits = currentIDHits["cm_hits"]
-			if cahits >= cghits and cahits >= cmhits: #its a ca gene
+			cehits = currentIDHits["ce_hits"]
+			if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene
 				ca1hits = currentIDHits["ca1"]
 				ca2hits = currentIDHits["ca2"]
 				if ca1hits >= ca2hits:
 					o.write(ID + "\tIGA1\t" + str(int(ca1hits / varsInCA * 100)) + "\t" + str(int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n")
 				else:
 					o.write(ID + "\tIGA2\t" + str(int(ca2hits / varsInCA * 100)) + "\t" + str(int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n")
-			elif cghits >= cahits and cghits >= cmhits: #its a cg gene
+			elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene
 				cg1hits = currentIDHits["cg1"]
 				cg2hits = currentIDHits["cg2"]
 				cg3hits = currentIDHits["cg3"]
@@ -206,8 +209,11 @@
 					o.write(ID + "\tIGG3\t" + str(int(cg3hits / varsInCG * 100)) + "\t" + str(int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
 				else: #cg4 gene
 					o.write(ID + "\tIGG4\t" + str(int(cg4hits / varsInCG * 100)) + "\t" + str(int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
-			else: #its a cm gene
-				o.write(ID + "\tIGM\t100\t" + str(int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
+			else: #its a cm or ce gene
+				if cmhits >= cehits:
+					o.write(ID + "\tIGM\t100\t" + str(int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n")
+				else:
+					o.write(ID + "\tIGE\t100\t" + str(int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n")
 			seq_write_count += 1
 
 print "Time: %i" % (int(time.time() * 1000) - starttime)
diff -r 477e95b098fd -r 012a738edf5a merge_and_filter.r
--- a/merge_and_filter.r	Mon Oct 31 05:05:26 2016 -0400
+++ b/merge_and_filter.r	Tue Nov 01 10:15:37 2016 -0400
@@ -141,44 +141,6 @@
   result[is.na(result[,col]),] = 0
 }
 
-write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T)
-
-if(filter.unique != "no"){
-	clmns = names(result)
-	
-	if(empty.region.filter == "leader"){
-		result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	} else if(empty.region.filter == "FR1"){
-		result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	} else if(empty.region.filter == "CDR1"){
-		rresult$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	} else if(empty.region.filter == "FR2"){
-		result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	}
-	
-	if(grepl("_c", filter.unique)){
-		result$unique.def = paste(result$unique.def, result$best_match)
-	}
-
-	#fltr = result$unique.def %in% result.filtered$unique.def
-
-	if(grepl("keep", filter.unique)){
-		result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes
-		result = result[!duplicated(result$unique.def),]
-	} else {
-		result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
-		result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes
-		result = result[!duplicated(result$unique.def),]
-	}
-	
-	#result = result[,clmns]
-	
-	#write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T)
-}
-
-filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
-
-
 splt = strsplit(class.filter, "_")[[1]]
 chunk_hit_threshold = as.numeric(splt[1])
 nt_hit_threshold = as.numeric(splt[2])
@@ -198,10 +160,39 @@
 	result$best_match = "all"
 }
 
-if(any(higher_than, na.rm=T)){
-	#summ = summ[higher_than,]
+write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T)
+
+if(filter.unique != "no"){
+	clmns = names(result)
+	
+	if(empty.region.filter == "leader"){
+		result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	} else if(empty.region.filter == "FR1"){
+		result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	} else if(empty.region.filter == "CDR1"){
+		result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	} else if(empty.region.filter == "FR2"){
+		result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	}
+
+	if(grepl("_c", filter.unique)){
+		result$unique.def = paste(result$unique.def, result$best_match)
+	}
+
+	if(grepl("keep", filter.unique)){
+		result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes
+		result = result[!duplicated(result$unique.def),]
+	} else {
+		result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
+		result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes
+		result = result[!duplicated(result$unique.def),]
+	}
 }
 
+write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T)
+
+filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
+
 if(nrow(summ) == 0){
 	stop("No data remaining after filter")
 }
diff -r 477e95b098fd -r 012a738edf5a pattern_plots.r
--- a/pattern_plots.r	Mon Oct 31 05:05:26 2016 -0400
+++ b/pattern_plots.r	Tue Nov 01 10:15:37 2016 -0400
@@ -22,7 +22,7 @@
 
 
 
-classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM")
+classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
 xyz = c("x", "y", "z")
 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
 
@@ -45,7 +45,7 @@
 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
 
 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL))
-p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
+p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
 #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
 png(filename=plot1.png, width=480, height=300)
 print(p)
@@ -79,7 +79,7 @@
 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
 
 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations")
-p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
+p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
 png(filename=plot2.png, width=480, height=300)
 print(p)
@@ -115,7 +115,7 @@
 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
 
 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL))
-p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
+p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
 #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
 png(filename=plot3.png, width=480, height=300)
 print(p)
diff -r 477e95b098fd -r 012a738edf5a shm_csr.r
--- a/shm_csr.r	Mon Oct 31 05:05:26 2016 -0400
+++ b/shm_csr.r	Tue Nov 01 10:15:37 2016 -0400
@@ -280,15 +280,13 @@
 		transition2 = merge(transition2, base.order, by.x="variable", by.y="base")
 
 		transition2[is.na(transition2$value),]$value = 0
-		
-		print(transition2)
 
 		if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad
 			print("Plotting stacked transition")
 			png(filename=paste("transitions_stacked_", name, ".png", sep=""))
 			p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
 			p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL))
-			p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
+			p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
 			#p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
 			print(p)
 			dev.off()
@@ -374,7 +372,7 @@
 	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
 	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4"))
 	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
-	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
+	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
 	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
 	write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
@@ -395,7 +393,7 @@
 	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
 	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred"))
 	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
-	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
+	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
 	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
 	write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
@@ -415,7 +413,7 @@
 
 p = ggplot(dat.clss, aes(best_match, percentage_mutations))
 p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
-p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
+p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
 p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4"))
 p = p + scale_colour_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4"))
 
@@ -442,7 +440,7 @@
 frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
 
 p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
-p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
+p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
 p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "black", "all" = "blue4"))
 
 png(filename="frequency_ranges.png")
diff -r 477e95b098fd -r 012a738edf5a shm_csr.xml
--- a/shm_csr.xml	Mon Oct 31 05:05:26 2016 -0400
+++ b/shm_csr.xml	Tue Nov 01 10:15:37 2016 -0400
@@ -1,7 +1,7 @@
 <tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
 	<description></description>
 	<command interpreter="bash">
-		wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter
+		wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter $fast
 	</command>
 	<inputs>
 		<param name="in_file" type="data" label="IMGT zip file to be analysed" />
@@ -33,7 +33,7 @@
 			<option value="CDR3.IMGT.seq">CDR3 (nt)</option>
 			<option value="Sequence.ID">Don't remove duplicates</option>
 		</param>
-		<param name="class_filter" type="select" label="Class/Subclass filter" help="" >
+		<param name="class_filter" type="select" label="Human Class/Subclass filter" help="" >
 			<option value="70_70" selected="true">>70% class and >70% subclass</option>
 			<option value="60_55">>60% class and >55% subclass</option>
 			<option value="70_0">>70% class</option>
@@ -46,6 +46,14 @@
 				<option value="no" selected="true">No</option>
 			</param>
 		</conditional>
+		
+		
+		<param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" >
+			<option value="yes">Yes</option>
+			<option value="no" selected="true">No</option>
+		</param>
+		
+		
 	</inputs>
 	<outputs>
 		<data format="html" name="out_file" label = "SHM &amp; CSR on ${in_file.name}"/>
diff -r 477e95b098fd -r 012a738edf5a wrapper.sh
--- a/wrapper.sh	Mon Oct 31 05:05:26 2016 -0400
+++ b/wrapper.sh	Tue Nov 01 10:15:37 2016 -0400
@@ -16,6 +16,7 @@
 filter_unique=${12}
 class_filter=${13}
 empty_region_filter=${14}
+fast=${15}
 mkdir $outdir
 
 tar -xzf $dir/style.tar.gz -C $outdir
@@ -62,101 +63,112 @@
 
 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
 
-echo "---------------- creating new IMGT zips ----------------"
-echo "---------------- creating new IMGT zips ----------------<br />" >> $log
+if [[ "$fast" == "no" ]] ; then
 
-mkdir $outdir/new_IMGT
+	echo "---------------- creating new IMGT zips ----------------"
+	echo "---------------- creating new IMGT zips ----------------<br />" >> $log
+
+	mkdir $outdir/new_IMGT
 
-cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
-cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
-cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
-cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
-cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
-cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
-cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
-cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
-cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
-cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
+	cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
+	cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
+	cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
+	cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
+	cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
+	cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
+	cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
+	cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
+	cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
+	cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
 
-mkdir $outdir/new_IMGT_IGA
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA
+	mkdir $outdir/new_IMGT_IGA
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA
 
-mkdir $outdir/new_IMGT_IGA1
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1
+	mkdir $outdir/new_IMGT_IGA1
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1
 
-mkdir $outdir/new_IMGT_IGA2
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2
+	mkdir $outdir/new_IMGT_IGA2
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2
 
-mkdir $outdir/new_IMGT_IGG
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG
+	mkdir $outdir/new_IMGT_IGG
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG
 
-mkdir $outdir/new_IMGT_IGG1
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1
+	mkdir $outdir/new_IMGT_IGG1
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1
 
-mkdir $outdir/new_IMGT_IGG2
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2
+	mkdir $outdir/new_IMGT_IGG2
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2
 
-mkdir $outdir/new_IMGT_IGG3
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3
+	mkdir $outdir/new_IMGT_IGG3
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3
 
-mkdir $outdir/new_IMGT_IGG4
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4
+	mkdir $outdir/new_IMGT_IGG4
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4
+
+	mkdir $outdir/new_IMGT_IGM
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM
 
-mkdir $outdir/new_IMGT_IGM
-cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM
+	mkdir $outdir/new_IMGT_IGE
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE
 
-Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
+
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1
 
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1
 
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1
 
-Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1
 
 
-tmp="$PWD"
-cd $outdir/new_IMGT/ #tar weirdness...
-tar -cJf ../new_IMGT.txz *
+	tmp="$PWD"
+	cd $outdir/new_IMGT/ #tar weirdness...
+	tar -cJf ../new_IMGT.txz *
 
-cd $outdir/new_IMGT_IGA/
-tar -cJf ../new_IMGT_IGA.txz *
+	cd $outdir/new_IMGT_IGA/
+	tar -cJf ../new_IMGT_IGA.txz *
 
-cd $outdir/new_IMGT_IGA1/
-tar -cJf ../new_IMGT_IGA1.txz *
+	cd $outdir/new_IMGT_IGA1/
+	tar -cJf ../new_IMGT_IGA1.txz *
 
-cd $outdir/new_IMGT_IGA2/
-tar -cJf ../new_IMGT_IGA2.txz *
+	cd $outdir/new_IMGT_IGA2/
+	tar -cJf ../new_IMGT_IGA2.txz *
 
-cd $outdir/new_IMGT_IGG/
-tar -cJf ../new_IMGT_IGG.txz *
+	cd $outdir/new_IMGT_IGG/
+	tar -cJf ../new_IMGT_IGG.txz *
 
-cd $outdir/new_IMGT_IGG1/
-tar -cJf ../new_IMGT_IGG1.txz *
+	cd $outdir/new_IMGT_IGG1/
+	tar -cJf ../new_IMGT_IGG1.txz *
 
-cd $outdir/new_IMGT_IGG2/
-tar -cJf ../new_IMGT_IGG2.txz *
+	cd $outdir/new_IMGT_IGG2/
+	tar -cJf ../new_IMGT_IGG2.txz *
+
+	cd $outdir/new_IMGT_IGG3/
+	tar -cJf ../new_IMGT_IGG3.txz *
 
-cd $outdir/new_IMGT_IGG3/
-tar -cJf ../new_IMGT_IGG3.txz *
+	cd $outdir/new_IMGT_IGG4/
+	tar -cJf ../new_IMGT_IGG4.txz *
+
+	cd $outdir/new_IMGT_IGM/
+	tar -cJf ../new_IMGT_IGM.txz *
 
-cd $outdir/new_IMGT_IGG4/
-tar -cJf ../new_IMGT_IGG4.txz *
+	cd $outdir/new_IMGT_IGE/
+	tar -cJf ../new_IMGT_IGE.txz *
 
-cd $outdir/new_IMGT_IGM/
-tar -cJf ../new_IMGT_IGM.txz *
-
-cd $tmp
+	cd $tmp
+fi
 
 echo "---------------- shm_csr.r ----------------"
 echo "---------------- shm_csr.r ----------------<br />" >> $log
 
-classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,unmatched"
+classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched"
 echo "R mutation analysis"
 Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1
 
@@ -168,13 +180,13 @@
 echo "---------------- aa_histogram.r ----------------"
 echo "---------------- aa_histogram.r ----------------<br />" >> $log
 
-Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM" $outdir/ 2>&1
+Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1
 if [ -e "$outdir/aa_histogram_.png" ]; then
         mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
         mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
 fi
 
-genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM)
+genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE)
 
 funcs=(sum mean median)
 funcs=(sum)
@@ -247,14 +259,14 @@
 		tmp=`cat $outdir/unmatched_${func}_n.txt`
 		echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
 
-		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
+		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz 
 		do
 			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
-				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output
+				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${cex}/${cey} (${cez})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output
 			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
-				echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output
+				echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${cez}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output
 			else
-				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
+				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${cex}/${cey} (${cez}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
 			fi
 		done < $outdir/data_${func}.txt
 		
@@ -262,7 +274,7 @@
 		tmp=`cat $outdir/all_${func}_n.txt`
 		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
 		
-		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
+		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz
 		do
 			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
 				echo "<tr><td>$name</td><td>${allx}/${ally}</td></tr>" >> $output
@@ -381,46 +393,48 @@
 
 echo "</div>" >> $output #CSR tab end
 
-echo "---------------- change-o MakeDB ----------------"
+if [[ "$fast" == "no" ]] ; then
+
+	echo "---------------- change-o MakeDB ----------------"
 
-mkdir $outdir/change_o
+	mkdir $outdir/change_o
 
-tmp="$PWD"
+	tmp="$PWD"
 
-cd $outdir/change_o
+	cd $outdir/change_o
 
-bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
-bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
 
-Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
+	Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
 
-echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
+	echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
 
-if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
-	bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
-	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
-else
-	echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"
-	echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
-fi
+	if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
+	else
+		echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"
+		echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
+	fi
 
-if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
-	bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
-	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
-else
-	echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"
-	echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
-fi
+	if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
+	else
+		echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"
+		echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
+	fi
 
-if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
-	bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
-	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
-else
-	echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt"
-	echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
-fi
+	if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
+	else
+		echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt"
+		echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
+	fi
 
-PWD="$tmp"
+	PWD="$tmp"
 
 echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab
 
@@ -471,6 +485,8 @@
 
 echo "</div>" >> $output #clonality tab end
 
+fi
+
 echo "<div class='tabbertab' title='Downloads'>" >> $output
 
 echo "<table class='pure-table pure-table-striped'>" >> $output
@@ -553,54 +569,59 @@
 
 echo "</html>" >> $output
 
-echo "---------------- baseline ----------------"
-echo "---------------- baseline ----------------<br />" >> $log
-tmp="$PWD"
+
+if [[ "$fast" == "no" ]] ; then
 
-mkdir $outdir/baseline
+	echo "---------------- baseline ----------------"
+	echo "---------------- baseline ----------------<br />" >> $log
+	tmp="$PWD"
+
+	mkdir $outdir/baseline
 
 
-mkdir $outdir/baseline/IGA_IGG_IGM
-if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
-	cd $outdir/baseline/IGA_IGG_IGM
-	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
-else
-	echo "No sequences" > "$outdir/baseline.txt"
-fi
+	mkdir $outdir/baseline/IGA_IGG_IGM
+	if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGA_IGG_IGM
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
+	else
+		echo "No sequences" > "$outdir/baseline.txt"
+	fi
 
-mkdir $outdir/baseline/IGA
-if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
-	cd $outdir/baseline/IGA
-	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
-else
-	echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
-fi
+	mkdir $outdir/baseline/IGA
+	if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGA
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
+	else
+		echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
+	fi
 
-mkdir $outdir/baseline/IGG
-if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
-	cd $outdir/baseline/IGG
-	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
-else
-	echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
-fi
+	mkdir $outdir/baseline/IGG
+	if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGG
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
+	else
+		echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
+	fi
 
-mkdir $outdir/baseline/IGM
-if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
-	cd $outdir/baseline/IGM
-	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
-else
-	echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
+	mkdir $outdir/baseline/IGM
+	if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGM
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
+	else
+		echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
+	fi
+
+	cd $tmp
+
+	echo "Cleaning up *.RData files"
+	find $outdir/baseline -name "*.RData" -type f -delete
+
 fi
 
-cd $tmp
-
-echo "Cleaning up *.RData files"
-find $outdir/baseline -name "*.RData" -type f -delete
-
 echo "---------------- naive_output.r ----------------"
 echo "---------------- naive_output.r ----------------<br />" >> $log
 
-if [[ "$naive_output" != "None" ]]
+if [[ "$naive_output" == "yes" ]]
 then
 	cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
 	cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}