changeset 80:a4617f1d1d89 draft

Uploaded
author davidvanzessen
date Fri, 19 Feb 2021 15:08:51 +0000
parents 98e3fecedd2b
children b6f9a640e098
files .gitattributes .gitignore LICENSE README.md aa_histogram.r baseline/Baseline_Functions.r baseline/Baseline_Main.r baseline/FiveS_Mutability.RData baseline/FiveS_Substitution.RData baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa baseline/IMGTVHreferencedataset20161215.fa baseline/IMGTVHreferencedataset20161215.fasta baseline/baseline_url.txt baseline/comparePDFs.r baseline/filter.r baseline/script_imgt.py baseline/script_xlsx.py baseline/wrapper.sh change_o/change_o_url.txt change_o/define_clones.r change_o/define_clones.sh change_o/makedb.sh change_o/select_first_in_clone.r check_unique_id.r datatypes_conf.xml gene_identification.py imgt_loader.r merge.r merge_and_filter.r mutation_column_checker.py naive_output.r new_imgt.r pattern_plots.r plot_pdf.r sequence_overview.r shm_clonality.htm shm_csr.htm shm_csr.py shm_csr.r shm_csr.xml shm_csr/.gitattributes shm_csr/.gitignore shm_csr/LICENSE shm_csr/README.md shm_csr/aa_histogram.r shm_csr/baseline/Baseline_Functions.r shm_csr/baseline/Baseline_Main.r shm_csr/baseline/FiveS_Mutability.RData shm_csr/baseline/FiveS_Substitution.RData shm_csr/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa shm_csr/baseline/IMGTVHreferencedataset20161215.fa shm_csr/baseline/IMGTVHreferencedataset20161215.fasta shm_csr/baseline/baseline_url.txt shm_csr/baseline/comparePDFs.r shm_csr/baseline/filter.r shm_csr/baseline/script_imgt.py shm_csr/baseline/script_xlsx.py shm_csr/baseline/wrapper.sh shm_csr/change_o/change_o_url.txt shm_csr/change_o/define_clones.r shm_csr/change_o/define_clones.sh shm_csr/change_o/makedb.sh shm_csr/change_o/select_first_in_clone.r shm_csr/check_unique_id.r shm_csr/datatypes_conf.xml shm_csr/gene_identification.py shm_csr/imgt_loader.r shm_csr/merge.r shm_csr/merge_and_filter.r shm_csr/mutation_column_checker.py shm_csr/naive_output.r shm_csr/new_imgt.r shm_csr/pattern_plots.r shm_csr/plot_pdf.r shm_csr/sequence_overview.r shm_csr/shm_clonality.htm shm_csr/shm_csr.htm shm_csr/shm_csr.py shm_csr/shm_csr.r shm_csr/shm_csr.xml shm_csr/shm_downloads.htm shm_csr/shm_first.htm shm_csr/shm_frequency.htm shm_csr/shm_overview.htm shm_csr/shm_selection.htm shm_csr/shm_transition.htm shm_csr/style.tar.gz shm_csr/subclass_definition.db.nhr shm_csr/subclass_definition.db.nin shm_csr/subclass_definition.db.nsq shm_csr/summary_to_fasta.py shm_csr/wrapper.sh shm_downloads.htm shm_first.htm shm_frequency.htm shm_overview.htm shm_selection.htm shm_transition.htm style.tar.gz subclass_definition.db.nhr subclass_definition.db.nin subclass_definition.db.nsq summary_to_fasta.py wrapper.sh
diffstat 104 files changed, 9304 insertions(+), 9297 deletions(-) [+]
line wrap: on
line diff
--- a/.gitattributes	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-# Auto detect text files and perform LF normalization
-* text=auto
--- a/.gitignore	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-
-shm_csr\.tar\.gz
-
-\.vscode/settings\.json
--- a/LICENSE	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2019 david
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
--- a/README.md	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-# SHM CSR
-
-Somatic hypermutation and class switch recombination pipeline.  
-The docker version can be found [here](https://github.com/ErasmusMC-Bioinformatics/ARGalaxy-docker).
-
-# Dependencies
---------------------
-[Python 2.7](https://www.python.org/)  
-[Change-O](https://changeo.readthedocs.io/en/version-0.4.4/)  
-[Baseline](http://selection.med.yale.edu/baseline/)  
-[R data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf)
-[R ggplot2](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf)
-[R reshape2](https://cran.r-project.org/web/packages/reshape/reshape.pdf)
--- a/aa_histogram.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-library(ggplot2)
-
-args <- commandArgs(trailingOnly = TRUE)
-
-mutations.by.id.file = args[1]
-absent.aa.by.id.file = args[2]
-genes = strsplit(args[3], ",")[[1]]
-genes = c(genes, "")
-outdir = args[4]
-
-
-print("---------------- read input ----------------")
-
-mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="")
-absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
-
-for(gene in genes){
-	graph.title = paste(gene, "AA mutation frequency")
-	if(gene == ""){
-		mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
-		absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
-		
-		graph.title = "AA mutation frequency all"
-	} else {
-		mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
-		absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
-	}
-	print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
-	if(nrow(mutations.by.id.gene) == 0){
-		next
-	}
-
-	mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
-	aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
-
-	dat_freq = mutations.at.position / aa.at.position
-	dat_freq[is.na(dat_freq)] = 0
-	dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
-	
-
-	print("---------------- plot ----------------")
-
-	m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black"))
-	m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i)
-	m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
-	m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
-	m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
-	m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
-	m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
-	m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) 
-	m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
-	#m = m + scale_colour_manual(values=c("black"))
-
-	print("---------------- write/print ----------------")
-
-
-	dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
-
-	write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-	write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-	write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-	write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-	
-	png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
-	print(m)
-	dev.off()
-	
-	ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7)
-}
--- a/baseline/Baseline_Functions.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2287 +0,0 @@
-#########################################################################################
-# License Agreement
-# 
-# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE 
-# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER 
-# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE 
-# OR COPYRIGHT LAW IS PROHIBITED.
-# 
-# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE 
-# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED 
-# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN 
-# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
-#
-# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
-# Coded by: Mohamed Uduman & Gur Yaari
-# Copyright 2012 Kleinstein Lab
-# Version: 1.3 (01/23/2014)
-#########################################################################################
-
-# Global variables  
-  
-  FILTER_BY_MUTATIONS = 1000
-
-  # Nucleotides
-  NUCLEOTIDES = c("A","C","G","T")
-  
-  # Amino Acids
-  AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
-  names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
-  names(AMINO_ACIDS) <- names(AMINO_ACIDS)
-
-  #Amino Acid Traits
-  #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
-  #B = "Hydrophobic/Burried"  N = "Intermediate/Neutral"  S="Hydrophilic/Surface") 
-  TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
-  names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
-  TRAITS_AMINO_ACIDS <- array(NA,21)
-  
-  # Codon Table
-  CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))
-
-  # Substitution Model: Smith DS et al. 1996
-  substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
-  substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
-  substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
-  load("FiveS_Substitution.RData")
-
-  # Mutability Models: Shapiro GS et al. 2002
-  triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
-  triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
-  triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
-  load("FiveS_Mutability.RData")
-
-# Functions
-  
-  # Translate codon to amino acid
-  translateCodonToAminoAcid<-function(Codon){
-     return(AMINO_ACIDS[Codon])
-  }
-
-  # Translate amino acid to trait change
-  translateAminoAcidToTraitChange<-function(AminoAcid){
-     return(TRAITS_AMINO_ACIDS[AminoAcid])
-  }
-    
-  # Initialize Amino Acid Trait Changes
-  initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
-    if(!is.null(traitChangeFileName)){
-      tryCatch(
-          traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
-          , error = function(ex){
-            cat("Error|Error reading trait changes. Please check file name/path and format.\n")
-            q()
-          }
-        )
-    }else{
-      traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
-    }
-    TRAITS_AMINO_ACIDS <<- traitChange
- } 
-  
-  # Read in formatted nucleotide substitution matrix
-  initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
-    if(!is.null(subsMatFileName)){
-      tryCatch(
-          subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
-          , error = function(ex){
-            cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
-            q()
-          }
-        )
-      if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
-    }else{
-      if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
-      if(substitutionModel==2)subsMat <- substitution_Flu_Human      
-      if(substitutionModel==3)subsMat <- substitution_Flu25_Human      
-       
-    }
-
-    if(substitutionModel==0){
-      subsMat <- matrix(1,4,4)
-      subsMat[,] = 1/3
-      subsMat[1,1] = 0
-      subsMat[2,2] = 0
-      subsMat[3,3] = 0
-      subsMat[4,4] = 0
-    }
-    
-    
-    NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
-    if(substitutionModel==5){
-      subsMat <- FiveS_Substitution
-      return(subsMat)
-    }else{
-      subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
-      return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
-    }
-  }
-
-   
-  # Read in formatted Mutability file
-  initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
-    if(!is.null(mutabilityMatFileName)){
-        tryCatch(
-            mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
-            , error = function(ex){
-              cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
-              q()
-            }
-          )
-    }else{
-      mutabilityMat <- triMutability_Literature_Human
-      if(species==2) mutabilityMat <- triMutability_Literature_Mouse
-    }
-
-  if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}
-  
-    if(mutabilityModel==5){
-      mutabilityMat <- FiveS_Mutability
-      return(mutabilityMat)
-    }else{
-      return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
-    }
-  }
-
-  # Read FASTA file formats
-  # Modified from read.fasta from the seqinR package
-  baseline.read.fasta <-
-  function (file = system.file("sequences/sample.fasta", package = "seqinr"), 
-      seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, 
-      set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, 
-      strip.desc = FALSE,  sizeof.longlong = .Machine$sizeof.longlong, 
-      endian = .Platform$endian, apply.mask = TRUE) 
-  {
-      seqtype <- match.arg(seqtype)
-  
-          lines <- readLines(file)
-          
-          if (legacy.mode) {
-              comments <- grep("^;", lines)
-              if (length(comments) > 0) 
-                  lines <- lines[-comments]
-          }
-          
-          
-          ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
-          lines_mod<-lines
-  
-          if(!length(ind_groups)){
-              lines_mod<-c(">>>All sequences combined",lines)            
-          }
-          
-          ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")
-  
-          lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
-          id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
-          lines[id] <- "THIS IS A FAKE SEQUENCE"
-          lines[-id] <- lines_mod
-          rm(lines_mod)
-  
-  		ind <- which(substr(lines, 1L, 1L) == ">")
-          nseq <- length(ind)
-          if (nseq == 0) {
-               stop("no line starting with a > character found")
-          }        
-          start <- ind + 1
-          end <- ind - 1
-  
-          while( any(which(ind%in%end)) ){
-            ind=ind[-which(ind%in%end)]
-            nseq <- length(ind)
-            if (nseq == 0) {
-                stop("no line starting with a > character found")
-            }        
-            start <- ind + 1
-            end <- ind - 1        
-          }
-          
-          end <- c(end[-1], length(lines))
-          sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
-          if (seqonly) 
-              return(sequences)
-          nomseq <- lapply(seq_len(nseq), function(i) {
-          
-              #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
-              substr(lines[ind[i]], 2, nchar(lines[ind[i]]))
-          
-          })
-          if (seqtype == "DNA") {
-              if (forceDNAtolower) {
-                  sequences <- as.list(tolower(chartr(".","-",sequences)))
-              }else{
-                  sequences <- as.list(toupper(chartr(".","-",sequences)))
-              }
-          }
-          if (as.string == FALSE) 
-              sequences <- lapply(sequences, s2c)
-          if (set.attributes) {
-              for (i in seq_len(nseq)) {
-                  Annot <- lines[ind[i]]
-                  if (strip.desc) 
-                    Annot <- substr(Annot, 2L, nchar(Annot))
-                  attributes(sequences[[i]]) <- list(name = nomseq[[i]], 
-                    Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", 
-                      DNA = "SeqFastadna"))
-              }
-          }
-          names(sequences) <- nomseq
-          return(sequences)
-  }
-
-  
-  # Replaces non FASTA characters in input files with N  
-  replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
-    gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
-  }    
-  
-  # Find the germlines in the FASTA list
-  germlinesInFile <- function(seqIDs){
-    firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
-    secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
-    return(firstChar==">" & secondChar!=">")
-  }
-  
-  # Find the groups in the FASTA list
-  groupsInFile <- function(seqIDs){
-    sapply(seqIDs,function(x){substr(x,1,2)})==">>"
-  }
-
-  # In the process of finding germlines/groups, expand from the start to end of the group
-  expandTillNext <- function(vecPosToID){    
-    IDs = names(vecPosToID)
-    posOfInterests =  which(vecPosToID)
-  
-    expandedID = rep(NA,length(IDs))
-    expandedIDNames = gsub(">","",IDs[posOfInterests])
-    startIndexes = c(1,posOfInterests[-1])
-    stopIndexes = c(posOfInterests[-1]-1,length(IDs))
-    expandedID  = unlist(sapply(1:length(startIndexes),function(i){
-                                    rep(i,stopIndexes[i]-startIndexes[i]+1)
-                                  }))
-    names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
-                                    rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
-                                  }))  
-    return(expandedID)                                                                                                  
-  }
-    
-  # Process FASTA (list) to return a matrix[input, germline)
-  processInputAdvanced <- function(inputFASTA){
-  
-    seqIDs = names(inputFASTA)
-    numbSeqs = length(seqIDs)
-    posGermlines1 = germlinesInFile(seqIDs)
-    numbGermlines = sum(posGermlines1)
-    posGroups1 = groupsInFile(seqIDs)
-    numbGroups = sum(posGroups1)
-    consDef = NA
-    
-    if(numbGermlines==0){
-      posGermlines = 2
-      numbGermlines = 1  
-    }
-  
-      glPositionsSum = cumsum(posGermlines1)
-      glPositions = table(glPositionsSum)
-      #Find the position of the conservation row
-      consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1  
-    if( length(consDefPos)> 0 ){
-      consDefID =  match(consDefPos, glPositionsSum) 
-      #The coservation rows need to be pulled out and stores seperately 
-      consDef =  inputFASTA[consDefID]
-      inputFASTA =  inputFASTA[-consDefID]
-  
-      seqIDs = names(inputFASTA)
-      numbSeqs = length(seqIDs)
-      posGermlines1 = germlinesInFile(seqIDs)
-      numbGermlines = sum(posGermlines1)
-      posGroups1 = groupsInFile(seqIDs)
-      numbGroups = sum(posGroups1)
-      if(numbGermlines==0){
-        posGermlines = 2
-        numbGermlines = 1  
-      }    
-    }
-    
-    posGroups <- expandTillNext(posGroups1)
-    posGermlines <- expandTillNext(posGermlines1)
-    posGermlines[posGroups1] = 0
-    names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
-    posInput = rep(TRUE,numbSeqs)
-    posInput[posGroups1 | posGermlines1] = FALSE
-    
-    matInput = matrix(NA, nrow=sum(posInput), ncol=2)
-    rownames(matInput) = seqIDs[posInput]
-    colnames(matInput) = c("Input","Germline")
-    
-    vecInputFASTA = unlist(inputFASTA)  
-    matInput[,1] = vecInputFASTA[posInput]
-    matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]
-    
-    germlines = posGermlines[posInput]
-    groups = posGroups[posInput]
-    
-    return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))      
-  }
-
-
-  # Replace leading and trailing dashes in the sequence
-  replaceLeadingTrailingDashes <- function(x,readEnd){
-    iiGap = unlist(gregexpr("-",x[1]))
-    ggGap = unlist(gregexpr("-",x[2]))  
-    #posToChange = intersect(iiGap,ggGap)
-    
-    
-    seqIn = replaceLeadingTrailingDashesHelper(x[1])
-    seqGL = replaceLeadingTrailingDashesHelper(x[2])
-    seqTemplate = rep('N',readEnd)
-    seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
-    seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
-#    if(posToChange!=-1){
-#      seqIn[posToChange] = "-"
-#      seqGL[posToChange] = "-"
-#    }
-  
-    seqIn = c2s(seqIn[1:readEnd])
-    seqGL = c2s(seqGL[1:readEnd])
-  
-    lenGL = nchar(seqGL)
-    if(lenGL<readEnd){
-      seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="")
-    }
-  
-    lenInput = nchar(seqIn)
-    if(lenInput<readEnd){
-      seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="")
-    }    
-    return( c(seqIn,seqGL) )
-  }  
-
-  replaceLeadingTrailingDashesHelper <- function(x){
-    grepResults = gregexpr("-*",x)
-    grepResultsPos = unlist(grepResults)
-    grepResultsLen =  attr(grepResults[[1]],"match.length")   
-    #print(paste("x = '", x, "'", sep=""))
-    x = s2c(x)
-    if(x[1]=="-"){
-      x[1:grepResultsLen[1]] = "N"      
-    }
-    if(x[length(x)]=="-"){
-      x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N"      
-    }
-    return(x)
-  }
-
-
-
-  
-  # Check sequences for indels
-  checkForInDels <- function(matInputP){
-    insPos <- checkInsertion(matInputP)
-    delPos <- checkDeletions(matInputP)
-    return(list("Insertions"=insPos, "Deletions"=delPos))
-  }
-
-  # Check sequences for insertions
-  checkInsertion <- function(matInputP){
-    insertionCheck = apply( matInputP,1, function(x){
-                                          inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
-                                          glGaps <- as.vector( gregexpr("-",x[2])[[1]] )                                          
-                                          return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) )
-                                        })   
-    return(as.vector(insertionCheck))
-  }
-  # Fix inserstions
-  fixInsertions <- function(matInputP){
-    insPos <- checkInsertion(matInputP)
-    sapply((1:nrow(matInputP))[insPos],function(rowIndex){
-                                                x <- matInputP[rowIndex,]
-                                                inputGaps <- gregexpr("-",x[1])[[1]]
-                                                glGaps <- gregexpr("-",x[2])[[1]]
-                                                posInsertions <- glGaps[!(glGaps%in%inputGaps)]
-                                                inputInsertionToN <- s2c(x[2])
-                                                inputInsertionToN[posInsertions]!="-"
-                                                inputInsertionToN[posInsertions] <- "N"
-                                                inputInsertionToN <- c2s(inputInsertionToN)
-                                                matInput[rowIndex,2] <<- inputInsertionToN 
-                                              })                                                               
-    return(insPos)
-  } 
-    
-  # Check sequences for deletions
-  checkDeletions <-function(matInputP){
-    deletionCheck = apply( matInputP,1, function(x){
-                                          inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
-                                          glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
-                                          return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) )
-                                      })
-    return(as.vector(deletionCheck))                                      
-  }
-  # Fix sequences with deletions
-  fixDeletions <- function(matInputP){
-    delPos <- checkDeletions(matInputP)    
-    sapply((1:nrow(matInputP))[delPos],function(rowIndex){
-                                                x <- matInputP[rowIndex,]
-                                                inputGaps <- gregexpr("-",x[1])[[1]]
-                                                glGaps <- gregexpr("-",x[2])[[1]]
-                                                posDeletions <- inputGaps[!(inputGaps%in%glGaps)]
-                                                inputDeletionToN <- s2c(x[1])
-                                                inputDeletionToN[posDeletions] <- "N"
-                                                inputDeletionToN <- c2s(inputDeletionToN)
-                                                matInput[rowIndex,1] <<- inputDeletionToN 
-                                              })                                                                   
-    return(delPos)
-  }  
-    
-
-  # Trim DNA sequence to the last codon
-  trimToLastCodon <- function(seqToTrim){
-    seqLen = nchar(seqToTrim)  
-    trimmedSeq = s2c(seqToTrim)
-    poi = seqLen
-    tailLen = 0
-    
-    while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){
-      tailLen = tailLen + 1
-      poi = poi - 1   
-    }
-    
-    trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)])
-    seqLen = nchar(trimmedSeq)
-    # Trim sequence to last codon
-  	if( getCodonPos(seqLen)[3] > seqLen )
-  	  trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )
-    
-    return(trimmedSeq)
-  }
-  
-  # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
-  # e.g. nuc 86 is part of nucs 85,86,87
-  getCodonPos <- function(nucPos){
-    codonNum =  (ceiling(nucPos/3))*3
-    return( (codonNum-2):codonNum)
-  }
-  
-  # Given a nuclotide position, returns the codon number
-  # e.g. nuc 86  = codon 29
-  getCodonNumb <- function(nucPos){
-    return( ceiling(nucPos/3) )
-  }
-  
-  # Given a codon, returns all the nuc positions that make the codon
-  getCodonNucs <- function(codonNumb){
-    getCodonPos(codonNumb*3)
-  }  
-
-  computeCodonTable <- function(testID=1){
-                  
-    if(testID<=4){    
-      # Pre-compute every codons
-      intCounter = 1
-      for(pOne in NUCLEOTIDES){
-        for(pTwo in NUCLEOTIDES){
-          for(pThree in NUCLEOTIDES){
-            codon = paste(pOne,pTwo,pThree,sep="")
-            colnames(CODON_TABLE)[intCounter] =  codon
-            intCounter = intCounter + 1
-            CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
-          }  
-        }
-      }
-      chars = c("N","A","C","G","T", "-")
-      for(a in chars){
-        for(b in chars){
-          for(c in chars){
-            if(a=="N" | b=="N" | c=="N"){ 
-              #cat(paste(a,b,c),sep="","\n") 
-              CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
-            }
-          }  
-        }
-      }
-      
-      chars = c("-","A","C","G","T")
-      for(a in chars){
-        for(b in chars){
-          for(c in chars){
-            if(a=="-" | b=="-" | c=="-"){ 
-              #cat(paste(a,b,c),sep="","\n") 
-              CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
-            }
-          }  
-        }
-      }
-      CODON_TABLE <<- as.matrix(CODON_TABLE)
-    }
-  }
-  
-  collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
-  #print(length(vecInputSeqs))
-    vecInputSeqs = unique(vecInputSeqs) 
-    if(length(vecInputSeqs)==1){
-      return( list( c(vecInputSeqs,glSeq), F) )
-    }else{
-      charInputSeqs <- sapply(vecInputSeqs, function(x){
-                                              s2c(x)[1:readEnd]
-                                            })
-      charGLSeq <- s2c(glSeq)
-      matClone <- sapply(1:readEnd, function(i){
-                                            posNucs = unique(charInputSeqs[i,])
-                                            posGL = charGLSeq[i]
-                                            error = FALSE                                            
-                                            if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
-                                              return(c("-",error))
-                                            }
-                                            if(length(posNucs)==1)
-                                              return(c(posNucs[1],error))
-                                            else{
-                                              if("N"%in%posNucs){
-                                                error=TRUE
-                                              }
-                                              if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
-                                                return( c(posGL,error) )  
-                                              }else{
-                                                #return( c(sample(posNucs[posNucs!="N"],1),error) )  
-                                                if(nonTerminalOnly==0){
-                                                  return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )  
-                                                }else{
-                                                  posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
-                                                  posNucsTable = table(posNucs)
-                                                  if(sum(posNucsTable>1)==0){
-                                                    return( c(posGL,error) )
-                                                  }else{
-                                                    return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
-                                                  }
-                                                }
-                                                
-                                              }
-                                            } 
-                                          })
-      
-                                          
-      #print(length(vecInputSeqs))                                        
-      return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
-    }
-  }
-
-  # Compute the expected for each sequence-germline pair
-  getExpectedIndividual <- function(matInput){
-  if( any(grep("multicore",search())) ){ 
-    facGL <- factor(matInput[,2])
-    facLevels = levels(facGL)
-    LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
-                                                      computeMutabilities(facLevels[x])
-                                                    })
-    facIndex = match(facGL,facLevels)
-    
-    LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
-                                                      cInput = rep(NA,nchar(matInput[x,1]))
-                                                      cInput[s2c(matInput[x,1])!="N"] = 1
-                                                      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
-                                                    })
-                                                    
-    LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
-                                                      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
-                                                    })
-                                                    
-    LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
-                                                    #print(x)
-                                                    computeMutationTypes(matInput[x,2])
-                                                })
-    
-    LisGLs_Exp = mclapply(1:dim(matInput)[1],  function(x){
-                                                  computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
-                                                })
-    
-    ul_LisGLs_Exp =  unlist(LisGLs_Exp)                                            
-    return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
-  }else{
-    facGL <- factor(matInput[,2])
-    facLevels = levels(facGL)
-    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
-      computeMutabilities(facLevels[x])
-    })
-    facIndex = match(facGL,facLevels)
-    
-    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
-      cInput = rep(NA,nchar(matInput[x,1]))
-      cInput[s2c(matInput[x,1])!="N"] = 1
-      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
-    })
-    
-    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
-      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
-    })
-    
-    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
-      #print(x)
-      computeMutationTypes(matInput[x,2])
-    })
-    
-    LisGLs_Exp = lapply(1:dim(matInput)[1],  function(x){
-      computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
-    })
-    
-    ul_LisGLs_Exp =  unlist(LisGLs_Exp)                                            
-    return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
-    
-  }
-  }
-
-  # Compute mutabilities of sequence based on the tri-nucleotide model
-  computeMutabilities <- function(paramSeq){
-    seqLen = nchar(paramSeq)
-    seqMutabilites = rep(NA,seqLen)
-  
-    gaplessSeq = gsub("-", "", paramSeq)
-    gaplessSeqLen = nchar(gaplessSeq)
-    gaplessSeqMutabilites = rep(NA,gaplessSeqLen)
-    
-    if(mutabilityModel!=5){
-      pos<- 3:(gaplessSeqLen)
-      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
-      gaplessSeqMutabilites[pos] =      
-        tapply( c(
-                                        getMutability( substr(subSeq,1,3), 3) , 
-                                        getMutability( substr(subSeq,2,4), 2), 
-                                        getMutability( substr(subSeq,3,5), 1) 
-                                        ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
-                                      )
-      #Pos 1
-      subSeq =  substr(gaplessSeq,1,3)
-      gaplessSeqMutabilites[1] =  getMutability(subSeq , 1)
-      #Pos 2
-      subSeq =  substr(gaplessSeq,1,4)
-      gaplessSeqMutabilites[2] =  mean( c(
-                                            getMutability( substr(subSeq,1,3), 2) , 
-                                            getMutability( substr(subSeq,2,4), 1) 
-                                          ),na.rm=T
-                                      ) 
-      seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
-      return(seqMutabilites)
-    }else{
-      
-      pos<- 3:(gaplessSeqLen)
-      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
-      gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
-      seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
-      return(seqMutabilites)
-    }
-
-  }
-
-  # Returns the mutability of a triplet at a given position
-  getMutability <- function(codon, pos=1:3){
-    triplets <- rownames(mutability)
-    mutability[  match(codon,triplets) ,pos]
-  }
-
-  getMutability5 <- function(fivemer){
-    return(mutability[fivemer])
-  }
-
-  # Returns the substitution probabilty
-  getTransistionProb <- function(nuc){
-    substitution[nuc,]
-  }
-
-  getTransistionProb5 <- function(fivemer){    
-    if(any(which(fivemer==colnames(substitution)))){
-      return(substitution[,fivemer])
-    }else{
-      return(array(NA,4))
-    }
-  }
-
-  # Given a nuc, returns the other 3 nucs it can mutate to
-  canMutateTo <- function(nuc){
-    NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
-  }
-  
-  # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to 
-  canMutateToProb <- function(nuc){
-    substitution[nuc,canMutateTo(nuc)]
-  }
-
-  # Compute targeting, based on precomputed mutatbility & substitution  
-  computeTargeting <- function(param_strSeq,param_vecMutabilities){
-
-    if(substitutionModel!=5){
-      vecSeq = s2c(param_strSeq)
-      matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )  
-      #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
-      dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(length(vecSeq))) 
-      return (matTargeting)
-    }else{
-      
-      seqLen = nchar(param_strSeq)
-      seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
-      paramSeq <- param_strSeq
-      gaplessSeq = gsub("-", "", paramSeq)
-      gaplessSeqLen = nchar(gaplessSeq)
-      gaplessSeqSubstitution  = matrix(NA,ncol=gaplessSeqLen,nrow=4) 
-      
-      pos<- 3:(gaplessSeqLen)
-      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
-      gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
-      seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
-      #matTargeting <- param_vecMutabilities  %*% seqsubstitution
-      matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
-      dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(seqLen)) 
-      return (matTargeting)      
-    }
-  }  
-
-  # Compute the mutations types   
-  computeMutationTypes <- function(param_strSeq){
-  #cat(param_strSeq,"\n")
-    #vecSeq = trimToLastCodon(param_strSeq)
-    lenSeq = nchar(param_strSeq)
-    vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
-    matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
-    dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
-    return(matMutationTypes)   
-  }  
-  computeMutationTypesFast <- function(param_strSeq){
-    matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
-    #dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(length(vecSeq)))
-    return(matMutationTypes)   
-  }  
-  mutationTypeOptimized <- function( matOfCodons ){
-   apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) 
-  }  
-
-  # Returns a vector of codons 1 mutation away from the given codon
-  permutateAllCodon <- function(codon){
-    cCodon = s2c(codon)
-    matCodons = t(array(cCodon,dim=c(3,12)))
-    matCodons[1:4,1] = NUCLEOTIDES
-    matCodons[5:8,2] = NUCLEOTIDES
-    matCodons[9:12,3] = NUCLEOTIDES
-    apply(matCodons,1,c2s)
-  }
-
-  # Given two codons, tells you if the mutation is R or S (based on your definition)
-  mutationType <- function(codonFrom,codonTo){
-    if(testID==4){
-      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
-        return(NA)
-      }else{
-        mutationType = "S"
-        if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
-          mutationType = "R"                                                              
-        }
-        if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
-          mutationType = "Stop"
-        }
-        return(mutationType)
-      }  
-    }else if(testID==5){  
-      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
-        return(NA)
-      }else{
-        if(codonFrom==codonTo){
-          mutationType = "S"
-        }else{
-          codonFrom = s2c(codonFrom)
-          codonTo = s2c(codonTo)  
-          mutationType = "Stop"
-          nucOfI = codonFrom[which(codonTo!=codonFrom)]
-          if(nucOfI=="C"){
-            mutationType = "R"  
-          }else if(nucOfI=="G"){
-            mutationType = "S"
-          }
-        }
-        return(mutationType)
-      }
-    }else{
-      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
-        return(NA)
-      }else{
-        mutationType = "S"
-        if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
-          mutationType = "R"                                                              
-        }
-        if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
-          mutationType = "Stop"
-        }
-        return(mutationType)
-      }  
-    }    
-  }
-
-  
-  #given a mat of targeting & it's corresponding mutationtypes returns 
-  #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
-  computeExpected <- function(paramTargeting,paramMutationTypes){
-    # Replacements
-    RPos = which(paramMutationTypes=="R")  
-      #FWR
-      Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
-      #CDR
-      Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
-    # Silents
-    SPos = which(paramMutationTypes=="S")  
-      #FWR
-      Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
-      #CDR
-      Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)
-  
-      return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
-  }
-  
-  # Count the mutations in a sequence
-  # each mutation is treated independently 
-  analyzeMutations2NucUri_website <- function( rev_in_matrix ){
-    paramGL = rev_in_matrix[2,]
-    paramSeq = rev_in_matrix[1,]  
-    
-    #Fill seq with GL seq if gapped
-    #if( any(paramSeq=="-") ){
-    #  gapPos_Seq =  which(paramSeq=="-")
-    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
-    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
-    #}
-  
-  
-    #if( any(paramSeq=="N") ){
-    #  gapPos_Seq =  which(paramSeq=="N")
-    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
-    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
-    #}  
-      
-    analyzeMutations2NucUri(  matrix(c( paramGL, paramSeq  ),2,length(paramGL),byrow=T)  )
-    
-  }
-
-  #1 = GL 
-  #2 = Seq
-  analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
-    paramGL = in_matrix[2,]
-    paramSeq = in_matrix[1,]
-    paramSeqUri = paramGL
-    #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
-    mutations_val = paramGL != paramSeq   
-    if(any(mutations_val)){
-      mutationPos = {1:length(mutations_val)}[mutations_val]  
-      mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
-      length_mutations =length(mutationPos)
-      mutationInfo = rep(NA,length_mutations)
-      if(any(mutationPos)){  
-
-        pos<- mutationPos
-        pos_array<-array(sapply(pos,getCodonPos))
-        codonGL =  paramGL[pos_array]
-        
-        codonSeq = sapply(pos,function(x){
-                                  seqP = paramGL[getCodonPos(x)]
-                                  muCodonPos = {x-1}%%3+1 
-                                  seqP[muCodonPos] = paramSeq[x]
-                                  return(seqP)
-                                })      
-        GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
-        Seqcodons =   apply(codonSeq,2,c2s)
-        mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
-        names(mutationInfo) = mutationPos
-    }
-    if(any(!is.na(mutationInfo))){
-      return(mutationInfo[!is.na(mutationInfo)])    
-    }else{
-      return(NA)
-    }
-    
-    
-    }else{
-      return (NA)
-    }
-  }
-  
-  processNucMutations2 <- function(mu){
-    if(!is.na(mu)){
-      #R
-      if(any(mu=="R")){
-        Rs = mu[mu=="R"]
-        nucNumbs = as.numeric(names(Rs))
-        R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
-        R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)      
-      }else{
-        R_CDR = 0
-        R_FWR = 0
-      }    
-      
-      #S
-      if(any(mu=="S")){
-        Ss = mu[mu=="S"]
-        nucNumbs = as.numeric(names(Ss))
-        S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
-        S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)      
-      }else{
-        S_CDR = 0
-        S_FWR = 0
-      }    
-      
-      
-      retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
-      retVec[is.na(retVec)]=0
-      return(retVec)
-    }else{
-      return(rep(0,4))
-    }
-  }        
-  
-  
-  ## Z-score Test
-  computeZScore <- function(mat, test="Focused"){
-    matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
-    if(test=="Focused"){
-      #Z_Focused_CDR
-      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
-      P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
-      R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
-      R_sd=sqrt(R_mean*(1-P))
-      matRes[,1] = (mat[,1]-R_mean)/R_sd
-    
-      #Z_Focused_FWR
-      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
-      P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
-      R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
-      R_sd=sqrt(R_mean*(1-P))
-      matRes[,2] = (mat[,3]-R_mean)/R_sd
-    }
-  
-    if(test=="Local"){
-      #Z_Focused_CDR
-      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
-      P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
-      R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
-      R_sd=sqrt(R_mean*(1-P))
-      matRes[,1] = (mat[,1]-R_mean)/R_sd
-    
-      #Z_Focused_FWR
-      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
-      P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
-      R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
-      R_sd=sqrt(R_mean*(1-P))
-      matRes[,2] = (mat[,3]-R_mean)/R_sd
-    }
-    
-    if(test=="Imbalanced"){
-      #Z_Focused_CDR
-      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
-      P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
-      R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
-      R_sd=sqrt(R_mean*(1-P))
-      matRes[,1] = (mat[,1]-R_mean)/R_sd
-    
-      #Z_Focused_FWR
-      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
-      P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
-      R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
-      R_sd=sqrt(R_mean*(1-P))
-      matRes[,2] = (mat[,3]-R_mean)/R_sd
-    }    
-      
-    matRes[is.nan(matRes)] = NA
-    return(matRes)
-  }
-
-  # Return a p-value for a z-score
-  z2p <- function(z){
-    p=NA
-    if( !is.nan(z) && !is.na(z)){   
-      if(z>0){
-        p = (1 - pnorm(z,0,1))
-      } else if(z<0){
-        p = (-1 * pnorm(z,0,1))
-      } else{
-        p = 0.5
-      }
-    }else{
-      p = NA
-    }
-    return(p)
-  }    
-  
-  
-  ## Bayesian  Test
-
-  # Fitted parameter for the bayesian framework
-BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
-  CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))
-  
-  # Given x, M & p, returns a pdf 
-  calculate_bayes <- function ( x=3, N=10, p=0.33,
-                                i=CONST_i,
-                                max_sigma=20,length_sigma=4001
-                              ){
-    if(!0%in%N){
-      G <- max(length(x),length(N),length(p))
-      x=array(x,dim=G)
-      N=array(N,dim=G)
-      p=array(p,dim=G)
-      sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
-      sigma_1<-log({i/{1-i}}/{p/{1-p}})
-      index<-min(N,60)
-      y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
-      if(!sum(is.na(y))){
-        tmp<-approx(sigma_1,y,sigma_s)$y
-        tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
-      }else{
-        return(NA)
-      }
-    }else{
-      return(NA)
-    }
-  }  
-  # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
-  computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
-    flagOneSeq = F
-    if(nrow(mat)==1){
-      mat=rbind(mat,mat)
-      flagOneSeq = T
-    }
-    if(test=="Focused"){
-      #CDR
-      P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
-      N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
-      X = c(mat[,1],0)
-      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesCDR = bayesCDR[-length(bayesCDR)]
-  
-      #FWR
-      P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
-      N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
-      X = c(mat[,3],0)
-      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesFWR = bayesFWR[-length(bayesFWR)]     
-    }
-    
-    if(test=="Local"){
-      #CDR
-      P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
-      N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
-      X = c(mat[,1],0)
-      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesCDR = bayesCDR[-length(bayesCDR)]
-  
-      #FWR
-      P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
-      N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
-      X = c(mat[,3],0)
-      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesFWR = bayesFWR[-length(bayesFWR)]     
-    } 
-     
-    if(test=="Imbalanced"){
-      #CDR
-      P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
-      N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
-      X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
-      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesCDR = bayesCDR[-length(bayesCDR)]
-  
-      #FWR
-      P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
-      N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
-      X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
-      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesFWR = bayesFWR[-length(bayesFWR)]     
-    }
-
-    if(test=="ImbalancedSilent"){
-      #CDR
-      P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
-      N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
-      X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
-      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesCDR = bayesCDR[-length(bayesCDR)]
-  
-      #FWR
-      P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
-      N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
-      X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
-      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
-      bayesFWR = bayesFWR[-length(bayesFWR)]     
-    }
-        
-    if(flagOneSeq==T){
-      bayesCDR = bayesCDR[1]  
-      bayesFWR = bayesFWR[1]
-    }
-    return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
-  }
-  
-  ##Covolution
-  break2chunks<-function(G=1000){
-  base<-2^round(log(sqrt(G),2),0)
-  return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
-  }  
-  
-  PowersOfTwo <- function(G=100){
-    exponents <- array()
-    i = 0
-    while(G > 0){
-      i=i+1
-      exponents[i] <- floor( log2(G) )
-      G <- G-2^exponents[i]
-    }
-    return(exponents)
-  }
-  
-  convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
-    G = ncol(cons)
-    if(G>1){
-      for(gen in log(G,2):1){
-        ll<-seq(from=2,to=2^gen,by=2)
-        sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
-      }
-    }
-    return( cons[,1] )
-  }
-  
-  convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
-    if(length(ncol(cons))) G<-ncol(cons)
-    groups <- PowersOfTwo(G)
-    matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
-    startIndex = 1
-    for( i in 1:length(groups) ){
-      stopIndex <- 2^groups[i] + startIndex - 1
-      if(stopIndex!=startIndex){
-        matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
-        startIndex = stopIndex + 1
-      }
-      else {
-        if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
-        else matG[,i] <- cons
-        #startIndex = stopIndex + 1
-      }
-    }
-    return( list( matG, groups ) )
-  }
-  
-  weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
-    lx<-length(x)
-    ly<-length(y)
-    if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
-      if(w<1){
-        y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
-        x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
-        lx<-length(x1)
-        ly<-length(y1)
-      }
-      else {
-        y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
-        x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
-        lx<-length(x1)
-        ly<-length(y1)
-      }
-    }
-    else{
-      x1<-x
-      y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
-      ly<-length(y1)
-    }
-    tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
-    tmp[tmp<=0] = 0
-    return(tmp/sum(tmp))
-  }
-  
-  calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
-    matG <- listMatG[[1]]
-    groups <- listMatG[[2]]
-    i = 1
-    resConv <- matG[,i]
-    denom <- 2^groups[i]
-    if(length(groups)>1){
-      while( i<length(groups) ){
-        i = i + 1
-        resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
-        #cat({{2^groups[i]}/denom},"\n")
-        denom <- denom + 2^groups[i]
-      }
-    }
-    return(resConv)
-  }
-  
-  # Given a list of PDFs, returns a convoluted PDF    
-  groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){  
-    listPosteriors = listPosteriors[ !is.na(listPosteriors) ]
-    Length_Postrior<-length(listPosteriors)
-    if(Length_Postrior>1 & Length_Postrior<=Threshold){
-      cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
-      listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
-      y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
-      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
-    }else if(Length_Postrior==1) return(listPosteriors[[1]])
-    else  if(Length_Postrior==0) return(NA)
-    else {
-      cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
-      y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
-      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
-    }
-  }
-
-  fastConv<-function(cons, max_sigma=20, length_sigma=4001){
-    chunks<-break2chunks(G=ncol(cons))
-    if(ncol(cons)==3) chunks<-2:1
-    index_chunks_end <- cumsum(chunks)
-    index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
-    index_chunks <- cbind(index_chunks_start,index_chunks_end)
-    
-    case <- sum(chunks!=chunks[1])
-    if(case==1) End <- max(1,((length(index_chunks)/2)-1))
-    else End <- max(1,((length(index_chunks)/2)))
-    
-    firsts <- sapply(1:End,function(i){
-          	    indexes<-index_chunks[i,1]:index_chunks[i,2]
-          	    convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
-          	  })
-    if(case==0){
-    	result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
-    }else if(case==1){
-      last<-list(calculate_bayesGHelper(
-      convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
-                                      ),0)
-      result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
-      result<-calculate_bayesGHelper(
-        list(
-          cbind(
-          result_first,last[[1]]),
-          c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
-        )
-      )
-    }
-    return(as.vector(result))
-  }
-    
-  # Computes the 95% CI for a pdf
-  calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
-    if(length(Pdf)!=length_sigma) return(NA)
-    sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
-    cdf = cumsum(Pdf)
-    cdf = cdf/cdf[length(cdf)]  
-    return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) 
-  }
-  
-  # Computes a mean for a pdf
-  calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
-    if(length(Pdf)!=length_sigma) return(NA)
-    sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
-    norm = {length_sigma-1}/2/max_sigma
-    return( (Pdf%*%sigma_s/norm)  ) 
-  }
-  
-  # Returns the mean, and the 95% CI for a pdf
-  calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
-    if(is.na(Pdf)) 
-     return(rep(NA,3))  
-    bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
-    bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
-    return(c(bMean, bCI))
-  }   
-
-  # Computes the p-value of a pdf
-  computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
-    if(length(Pdf)>1){
-      norm = {length_sigma-1}/2/max_sigma
-      pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
-      if(pVal>0.5){
-        pVal = pVal-1
-      }
-      return(pVal)
-    }else{
-      return(NA)
-    }
-  }    
-  
-  # Compute p-value of two distributions
-  compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
-  #print(c(length(dens1),length(dens2)))
-  if(length(dens1)>1 & length(dens2)>1 ){
-    dens1<-dens1/sum(dens1)
-    dens2<-dens2/sum(dens2)
-    cum2 <- cumsum(dens2)-dens2/2
-    tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
-    #print(tmp)
-    if(tmp>0.5)tmp<-tmp-1
-    return( tmp )
-    }
-    else {
-    return(NA)
-    }
-    #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
-  }  
-  
-  # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
-  numberOfSeqsWithMutations <- function(matMutations,test=1){
-    if(test==4)test=2
-    cdrSeqs <- 0
-    fwrSeqs <- 0    
-    if(test==1){#focused
-      cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
-      fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
-      if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
-      if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) 
-    }
-    if(test==2){#local
-      cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
-      fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
-      if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
-      if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) 
-    }
-  return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
-}  
-
-
-
-shadeColor <- function(sigmaVal=NA,pVal=NA){
-  if(is.na(sigmaVal) & is.na(pVal)) return(NA)
-  if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
-  if(is.na(pVal) || pVal==1 || pVal==0){
-    returnColor = "#FFFFFF";
-  }else{
-    colVal=abs(pVal);
-    
-    if(sigmaVal<0){      
-        if(colVal>0.1)
-          returnColor = "#CCFFCC";
-        if(colVal<=0.1)
-          returnColor = "#99FF99";
-        if(colVal<=0.050)
-          returnColor = "#66FF66";
-        if(colVal<=0.010)
-          returnColor = "#33FF33";
-        if(colVal<=0.005)
-          returnColor = "#00FF00";
-      
-    }else{
-      if(colVal>0.1)
-        returnColor = "#FFCCCC";
-      if(colVal<=0.1)
-        returnColor = "#FF9999";
-      if(colVal<=0.05)
-        returnColor = "#FF6666";
-      if(colVal<=0.01)
-        returnColor = "#FF3333";
-      if(colVal<0.005)
-        returnColor = "#FF0000";
-    }
-  }
-  
-  return(returnColor)
-}
-
-
-
-plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
-  if(!log){
-    x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
-    y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
-  }else {
-    if(log==2){
-      x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
-      y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
-    }
-    if(log==1){
-      x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
-      y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
-    }
-    if(log==3){
-      x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
-      y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
-    }
-  }
-  return(c("x"=x,"y"=y))
-}
-
-# SHMulation
-
-  # Based on targeting, introduce a single mutation & then update the targeting 
-  oneMutation <- function(){
-    # Pick a postion + mutation
-    posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
-    posNucNumb = ceiling(posMutation/4)                    # Nucleotide number
-    posNucKind = 4 - ( (posNucNumb*4) - posMutation )   # Nuc the position mutates to
-  
-    #mutate the simulation sequence
-    seqSimVec <-  s2c(seqSim)
-    seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
-    seqSim <<-  c2s(seqSimVec)
-    
-    #update Mutability, Targeting & MutationsTypes
-    updateMutabilityNTargeting(posNucNumb)
-  
-    #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) 
-    return(posNucNumb)
-  }  
-  
-  updateMutabilityNTargeting <- function(position){
-    min_i<-max((position-2),1)
-    max_i<-min((position+2),nchar(seqSim))
-    min_ii<-min(min_i,3)
-    
-    #mutability - update locally
-    seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]
-    
-    
-    #targeting - compute locally
-    seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])                 
-    seqTargeting[is.na(seqTargeting)] <<- 0
-    #mutCodonPos = getCodonPos(position) 
-    mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
-    #cat(mutCodonPos,"\n")                                                  
-    mutTypeCodon = getCodonPos(position)
-    seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) 
-    # Stop = 0
-    if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
-      seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
-    }
-    
-  
-    #Selection
-    selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))  
-    # CDR
-    selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
-    seqTargeting[selectedCDR] <<-  seqTargeting[selectedCDR] *  exp(selCDR)
-    seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K
-        
-    # FWR
-    selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
-    seqTargeting[selectedFWR] <<-  seqTargeting[selectedFWR] *  exp(selFWR)
-    seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K      
-    
-  }  
-  
-
-
-  # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.   
-  validateMutation <- function(){  
-    if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
-      uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
-      mutatedPositions[uniqueMutationsIntroduced] <<-  mutatedPos  
-    }else{
-      if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
-        mutatedPositions <<-  mutatedPositions[-which(mutatedPositions==mutatedPos)]
-        uniqueMutationsIntroduced <<-  uniqueMutationsIntroduced - 1
-      }      
-    }
-  }  
-  
-  
-  
-  # Places text (labels) at normalized coordinates 
-  myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
-    par(xpd=TRUE)
-    if(!log)
-      text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
-    else {
-    if(log==2)
-    text(
-      par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
-      10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
-      w,cex=cex,adj=adj,col=thecol)
-    if(log==1)
-      text(
-      10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
-      par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
-      w,cex=cex,adj=adj,col=thecol)
-    if(log==3)
-      text(
-      10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
-      10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
-      w,cex=cex,adj=adj,col=thecol)
-    }
-    par(xpd=FALSE)
-  }
-  
-  
-  
-  # Count the mutations in a sequence
-  analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){
-
-    paramGL = s2c(matInput[inputMatrixIndex,2])
-    paramSeq = s2c(matInput[inputMatrixIndex,1])            
-    
-    #if( any(paramSeq=="N") ){
-    #  gapPos_Seq =  which(paramSeq=="N")
-    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
-    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
-    #}        
-    mutations_val = paramGL != paramSeq   
-    
-    if(any(mutations_val)){
-      mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]  
-      length_mutations =length(mutationPos)
-      mutationInfo = rep(NA,length_mutations)
-                          
-      pos<- mutationPos
-      pos_array<-array(sapply(pos,getCodonPos))
-      codonGL =  paramGL[pos_array]
-      codonSeqWhole =  paramSeq[pos_array]
-      codonSeq = sapply(pos,function(x){
-                                seqP = paramGL[getCodonPos(x)]
-                                muCodonPos = {x-1}%%3+1 
-                                seqP[muCodonPos] = paramSeq[x]
-                                return(seqP)
-                              })
-      GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
-      SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)      
-      Seqcodons =   apply(codonSeq,2,c2s)
-      
-      mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
-      names(mutationInfo) = mutationPos     
-      
-      mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})           
-      names(mutationInfoWhole) = mutationPos
-
-      mutationInfo <- mutationInfo[!is.na(mutationInfo)]
-      mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
-      
-      if(any(!is.na(mutationInfo))){       
-  
-        #Filter based on Stop (at the codon level)
-        if(seqWithStops==1){
-          nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
-          mutationInfo = mutationInfo[nucleotidesAtStopCodons]
-          mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
-        }else{
-          countStops = sum(mutationInfoWhole=="Stop")
-          if(seqWithStops==2 & countStops==0) mutationInfo = NA
-          if(seqWithStops==3 & countStops>0) mutationInfo = NA
-        }         
-        
-        if(any(!is.na(mutationInfo))){
-          #Filter mutations based on multipleMutation
-          if(multipleMutation==1 & !is.na(mutationInfo)){
-            mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
-            tableMutationCodons <- table(mutationCodons)
-            codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
-            if(any(codonsWithMultipleMutations)){
-              #remove the nucleotide mutations in the codons with multiple mutations
-              mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
-              #replace those codons with Ns in the input sequence
-              paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
-              matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
-            }
-          }
-
-          #Filter mutations based on the model
-          if(any(mutationInfo)==T | is.na(any(mutationInfo))){        
-            
-            if(model==1 & !is.na(mutationInfo)){
-              mutationInfo <- mutationInfo[mutationInfo=="S"]
-            }  
-            if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
-            else return(NA)
-          }else{
-            return(NA)
-          }
-        }else{
-          return(NA)
-        }
-        
-        
-      }else{
-        return(NA)
-      }
-    
-    
-    }else{
-      return (NA)
-    }    
-  }  
-
-   analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){
-
-    paramGL = s2c(inputArray[2])
-    paramSeq = s2c(inputArray[1])            
-    inputSeq <- inputArray[1]
-    #if( any(paramSeq=="N") ){
-    #  gapPos_Seq =  which(paramSeq=="N")
-    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
-    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
-    #}        
-    mutations_val = paramGL != paramSeq   
-    
-    if(any(mutations_val)){
-      mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]  
-      length_mutations =length(mutationPos)
-      mutationInfo = rep(NA,length_mutations)
-                          
-      pos<- mutationPos
-      pos_array<-array(sapply(pos,getCodonPos))
-      codonGL =  paramGL[pos_array]
-      codonSeqWhole =  paramSeq[pos_array]
-      codonSeq = sapply(pos,function(x){
-                                seqP = paramGL[getCodonPos(x)]
-                                muCodonPos = {x-1}%%3+1 
-                                seqP[muCodonPos] = paramSeq[x]
-                                return(seqP)
-                              })
-      GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
-      SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)      
-      Seqcodons =   apply(codonSeq,2,c2s)
-      
-      mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
-      names(mutationInfo) = mutationPos     
-      
-      mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})           
-      names(mutationInfoWhole) = mutationPos
-
-      mutationInfo <- mutationInfo[!is.na(mutationInfo)]
-      mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
-      
-      if(any(!is.na(mutationInfo))){       
-  
-        #Filter based on Stop (at the codon level)
-        if(seqWithStops==1){
-          nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
-          mutationInfo = mutationInfo[nucleotidesAtStopCodons]
-          mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
-        }else{
-          countStops = sum(mutationInfoWhole=="Stop")
-          if(seqWithStops==2 & countStops==0) mutationInfo = NA
-          if(seqWithStops==3 & countStops>0) mutationInfo = NA
-        }         
-        
-        if(any(!is.na(mutationInfo))){
-          #Filter mutations based on multipleMutation
-          if(multipleMutation==1 & !is.na(mutationInfo)){
-            mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
-            tableMutationCodons <- table(mutationCodons)
-            codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
-            if(any(codonsWithMultipleMutations)){
-              #remove the nucleotide mutations in the codons with multiple mutations
-              mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
-              #replace those codons with Ns in the input sequence
-              paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
-              #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
-              inputSeq <- c2s(paramSeq)
-            }
-          }
-          
-          #Filter mutations based on the model
-          if(any(mutationInfo)==T | is.na(any(mutationInfo))){        
-            
-            if(model==1 & !is.na(mutationInfo)){
-              mutationInfo <- mutationInfo[mutationInfo=="S"]
-            }  
-            if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
-            else return(list(NA,inputSeq))
-          }else{
-            return(list(NA,inputSeq))
-          }
-        }else{
-          return(list(NA,inputSeq))
-        }
-        
-        
-      }else{
-        return(list(NA,inputSeq))
-      }
-    
-    
-    }else{
-      return (list(NA,inputSeq))
-    }    
-  }  
- 
-  # triMutability Background Count
-  buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
-    
-    #rowOrigMatInput = matInput[inputMatrixIndex,]    
-    seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
-    seqInput = gsub("-", "", matInput[inputMatrixIndex,1])    
-    #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
-    tempInput <- cbind(seqInput,seqGL)
-    seqLength = nchar(seqGL)      
-    list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
-    mutationCount <- list_analyzeMutationsFixed[[1]]
-    seqInput <- list_analyzeMutationsFixed[[2]]
-    BackgroundMatrix = mutabilityMatrix
-    MutationMatrix = mutabilityMatrix    
-    MutationCountMatrix = mutabilityMatrix    
-    if(!is.na(mutationCount)){
-      if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ 
-                  
-        fivermerStartPos = 1:(seqLength-4)
-        fivemerLength <- length(fivermerStartPos)
-        fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
-        fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
-    
-        #Background
-        for(fivemerIndex in 1:fivemerLength){
-          fivemer = fivemerGL[fivemerIndex]
-          if(!any(grep("N",fivemer))){
-            fivemerCodonPos = fivemerCodon(fivemerIndex)
-            fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) 
-            fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])          
-            
-            # All mutations model
-            #if(!any(grep("N",fivemerReadingFrameCodon))){
-              if(model==0){
-                if(stopMutations==0){
-                  if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
-                    BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)              
-                }else{
-                  if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
-                    positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
-                    BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
-                  }
-                }
-              }else{ # Only silent mutations
-                if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
-                  positionWithinCodon = which(fivemerCodonPos==3)
-                  BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
-                }
-              }
-            #}
-          }
-        }
-        
-        #Mutations
-        if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
-        if(model==1) mutationCount = mutationCount[mutationCount=="S"]  
-        mutationPositions = as.numeric(names(mutationCount))
-        mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
-        mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
-        countMutations = 0 
-        for(mutationPosition in mutationPositions){
-          fivemerIndex = mutationPosition-2
-          fivemer = fivemerSeq[fivemerIndex]
-          GLfivemer = fivemerGL[fivemerIndex]
-          fivemerCodonPos = fivemerCodon(fivemerIndex)
-          fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) 
-          fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
-          if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
-            if(model==0){
-                countMutations = countMutations + 1              
-                MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
-                MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)             
-            }else{
-              if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
-                  countMutations = countMutations + 1
-                  positionWithinCodon = which(fivemerCodonPos==3)
-                  glNuc =  substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
-                  inputNuc =  substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
-                  MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
-                  MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)                                    
-              }                
-            }                  
-          }              
-        }
-        
-        seqMutability = MutationMatrix/BackgroundMatrix
-        seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
-        #cat(inputMatrixIndex,"\t",countMutations,"\n")
-        return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))      
-        
-      }        
-    }
-  
-  }  
-  
-  #Returns the codon position containing the middle nucleotide
-  fivemerCodon <- function(fivemerIndex){
-    codonPos = list(2:4,1:3,3:5)
-    fivemerType = fivemerIndex%%3
-    return(codonPos[[fivemerType+1]])
-  }
-
-  #returns probability values for one mutation in codons resulting in R, S or Stop
-  probMutations <- function(typeOfMutation){    
-    matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))   
-    for(codon in rownames(matMutationProb)){
-        if( !any(grep("N",codon)) ){
-        for(muPos in 1:3){
-          matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
-          glNuc = matCodon[1,muPos]
-          matCodon[,muPos] = canMutateTo(glNuc) 
-          substitutionRate = substitution[glNuc,matCodon[,muPos]]
-          typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})        
-          matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
-        }
-      }
-    }
-    
-    return(matMutationProb) 
-  }
-  
-  
-  
-  
-#Mapping Trinucleotides to fivemers
-mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
-  rownames(triMutability) <- triMutability_Names
-  Fivemer<-rep(NA,1024)
-  names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
-  Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
-  Fivemer<-Fivemer/sum(Fivemer)
-  return(Fivemer)
-}
-
-collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
-  Indices<-substring(names(Fivemer),3,3)==NUC
-  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
-  tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
-}
-
-
-
-CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
-  Indices<-substring(names(Fivemer),3,3)==NUC
-  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
-  tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
-}
-
-#Uses the real counts of the mutated fivemers
-CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
-  Indices<-substring(names(Fivemer),3,3)==NUC
-  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
-  tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
-}
-
-bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
-N<-sum(x)
-if(N){
-p<-x/N
-k<-length(x)-1
-tmp<-rmultinom(M, size = N, prob=p)
-tmp_p<-apply(tmp,2,function(y)y/N)
-(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
-}
-else return(matrix(0,2,length(x)))
-}
-
-
-
-
-bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){
-
-N<-sum(x)
-k<-length(x)
-y<-rep(1:k,x)
-tmp<-sapply(1:M,function(i)sample(y,n))
-if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
-if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
-(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
-}
-
-
-
-p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
-n=sum(x_obs)
-N<-sum(x)
-k<-length(x)
-y<-rep(1:k,x)
-tmp<-sapply(1:M,function(i)sample(y,n))
-if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
-if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
-tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
-sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
-sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
-}
-
-#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
-# Remove SNPs from IMGT germline segment alleles
-generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
-  repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
-  alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
-  SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
-    Indices<-NULL
-    for(i in 1:length(x)){
-      firstSeq = s2c(x[[1]])
-      iSeq = s2c(x[[i]])
-      Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="."  ))
-    }
-    return(sort(unique(Indices)))
-  })
- repertoireOut <- repertoireIn
- repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
-                                        alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
-                                        geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
-                                        alleleSeq <- s2c(repertoireOut[[repertoireName]])
-                                        alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
-                                        alleleSeq <- c2s(alleleSeq)
-                                        repertoireOut[[repertoireName]] <- alleleSeq
-                                      })
-  names(repertoireOut) <- names(repertoireIn)
-  write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)                                               
-                                      
-}
-
-
-
-
-
-
-############
-groupBayes2 = function(indexes, param_resultMat){
-  
-  BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
-  BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
-  #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
-  #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
-  #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
-  #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
-  return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
-                "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
-                #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
-                #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
-#                "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
-#                "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )
-
-
-}
-
-
-calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
-  G <- max(length(x),length(N),length(p))
-  x=array(x,dim=G)
-  N=array(N,dim=G)
-  p=array(p,dim=G)
-
-  indexOfZero = N>0 & p>0
-  N = N[indexOfZero]
-  x = x[indexOfZero]
-  p = p[indexOfZero]  
-  G <- length(x)
-  
-  if(G){
-    
-    cons<-array( dim=c(length_sigma,G) )
-    if(G==1) {
-    return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
-    }
-    else {
-      for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
-      listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
-      y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
-      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
-    }
-  }else{
-    return(NA)
-  }
-}
-
-
-calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
-  matG <- listMatG[[1]]  
-  groups <- listMatG[[2]]
-  i = 1  
-  resConv <- matG[,i]
-  denom <- 2^groups[i]
-  if(length(groups)>1){
-    while( i<length(groups) ){
-      i = i + 1
-      resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
-      #cat({{2^groups[i]}/denom},"\n")
-      denom <- denom + 2^groups[i]
-    }
-  }
-  return(resConv)  
-}
-
-weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
-lx<-length(x)
-ly<-length(y)
-if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
-if(w<1){
-y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
-x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
-lx<-length(x1)
-ly<-length(y1)
-}
-else {
-y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
-x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
-lx<-length(x1)
-ly<-length(y1)
-}
-}
-else{
-x1<-x
-y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
-ly<-length(y1)
-}
-tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
-tmp[tmp<=0] = 0 
-return(tmp/sum(tmp))
-}
-
-########################
-
-
-
-
-mutabilityMatrixONE<-rep(0,4)
-names(mutabilityMatrixONE)<-NUCLEOTIDES
-
-  # triMutability Background Count
-  buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
-    
-    #rowOrigMatInput = matInput[inputMatrixIndex,]    
-    seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
-    seqInput = gsub("-", "", matInput[inputMatrixIndex,1])    
-    matInput[inputMatrixIndex,] <<- c(seqInput,seqGL)
-    seqLength = nchar(seqGL)      
-    mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops)
-    BackgroundMatrix = mutabilityMatrixONE
-    MutationMatrix = mutabilityMatrixONE    
-    MutationCountMatrix = mutabilityMatrixONE    
-    if(!is.na(mutationCount)){
-      if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ 
-                  
-#         ONEmerStartPos = 1:(seqLength)
-#         ONEmerLength <- length(ONEmerStartPos)
-        ONEmerGL <- s2c(seqGL)
-        ONEmerSeq <- s2c(seqInput)
-    
-        #Background
-        for(ONEmerIndex in 1:seqLength){
-          ONEmer = ONEmerGL[ONEmerIndex]
-          if(ONEmer!="N"){
-            ONEmerCodonPos = getCodonPos(ONEmerIndex)
-            ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) 
-            ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )         
-            
-            # All mutations model
-            #if(!any(grep("N",ONEmerReadingFrameCodon))){
-              if(model==0){
-                if(stopMutations==0){
-                  if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
-                    BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)              
-                }else{
-                  if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
-                    positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
-                    BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
-                  }
-                }
-              }else{ # Only silent mutations
-                if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
-                  positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
-                  BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
-                }
-              }
-            }
-          }
-        }
-        
-        #Mutations
-        if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
-        if(model==1) mutationCount = mutationCount[mutationCount=="S"]  
-        mutationPositions = as.numeric(names(mutationCount))
-        mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
-        mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
-        countMutations = 0 
-        for(mutationPosition in mutationPositions){
-          ONEmerIndex = mutationPosition
-          ONEmer = ONEmerSeq[ONEmerIndex]
-          GLONEmer = ONEmerGL[ONEmerIndex]
-          ONEmerCodonPos = getCodonPos(ONEmerIndex)
-          ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])  
-          ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])  
-          if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
-            if(model==0){
-                countMutations = countMutations + 1              
-                MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
-                MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)             
-            }else{
-              if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
-                  countMutations = countMutations + 1
-                  positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
-                  glNuc =  substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
-                  inputNuc =  substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
-                  MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
-                  MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)                                    
-              }                
-            }                  
-          }              
-        }
-        
-        seqMutability = MutationMatrix/BackgroundMatrix
-        seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
-        #cat(inputMatrixIndex,"\t",countMutations,"\n")
-        return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))      
-#         tmp<-list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
-      }        
-    }
-  
-################
-# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $
-
-trim <- function(s, recode.factor=TRUE, ...)
-  UseMethod("trim", s)
-
-trim.default <- function(s, recode.factor=TRUE, ...)
-  s
-
-trim.character <- function(s, recode.factor=TRUE, ...)
-{
-  s <- sub(pattern="^ +", replacement="", x=s)
-  s <- sub(pattern=" +$", replacement="", x=s)
-  s
-}
-
-trim.factor <- function(s, recode.factor=TRUE, ...)
-{
-  levels(s) <- trim(levels(s))
-  if(recode.factor) {
-    dots <- list(x=s, ...)
-    if(is.null(dots$sort)) dots$sort <- sort
-    s <- do.call(what=reorder.factor, args=dots)
-  }
-  s
-}
-
-trim.list <- function(s, recode.factor=TRUE, ...)
-  lapply(s, trim, recode.factor=recode.factor, ...)
-
-trim.data.frame <- function(s, recode.factor=TRUE, ...)
-{
-  s[] <- trim.list(s, recode.factor=recode.factor, ...)
-  s
-}
-#######################################
-# Compute the expected for each sequence-germline pair by codon 
-getExpectedIndividualByCodon <- function(matInput){    
-if( any(grep("multicore",search())) ){  
-  facGL <- factor(matInput[,2])
-  facLevels = levels(facGL)
-  LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
-    computeMutabilities(facLevels[x])
-  })
-  facIndex = match(facGL,facLevels)
-  
-  LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
-    cInput = rep(NA,nchar(matInput[x,1]))
-    cInput[s2c(matInput[x,1])!="N"] = 1
-    LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
-  })
-  
-  LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
-    computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
-  })
-  
-  LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
-    #print(x)
-    computeMutationTypes(matInput[x,2])
-  })
-  
-  LisGLs_R_Exp = mclapply(1:nrow(matInput),  function(x){
-    Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
-                        function(codonNucs){                                                      
-                          RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") 
-                          sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) 
-                        }
-    )                                                   
-  })
-  
-  LisGLs_S_Exp = mclapply(1:nrow(matInput),  function(x){
-    Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
-                        function(codonNucs){                                                      
-                          SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   
-                          sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
-                        }
-    )                                                 
-  })                                                
-  
-  Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
-  Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
-  return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
-  }else{
-    facGL <- factor(matInput[,2])
-    facLevels = levels(facGL)
-    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
-      computeMutabilities(facLevels[x])
-    })
-    facIndex = match(facGL,facLevels)
-    
-    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
-      cInput = rep(NA,nchar(matInput[x,1]))
-      cInput[s2c(matInput[x,1])!="N"] = 1
-      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
-    })
-    
-    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
-      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
-    })
-    
-    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
-      #print(x)
-      computeMutationTypes(matInput[x,2])
-    })
-    
-    LisGLs_R_Exp = lapply(1:nrow(matInput),  function(x){
-      Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
-                          function(codonNucs){                                                      
-                            RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") 
-                            sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) 
-                          }
-      )                                                   
-    })
-    
-    LisGLs_S_Exp = lapply(1:nrow(matInput),  function(x){
-      Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
-                          function(codonNucs){                                                      
-                            SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   
-                            sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
-                          }
-      )                                                 
-    })                                                
-    
-    Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
-    Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
-    return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )    
-  }
-}
-
-# getObservedMutationsByCodon <- function(listMutations){
-#   numbSeqs <- length(listMutations) 
-#   obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
-#   obsMu_S <- obsMu_R
-#   temp <- mclapply(1:length(listMutations), function(i){
-#     arrMutations = listMutations[[i]]
-#     RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
-#     RPos <- sapply(RPos,getCodonNumb)                                                                    
-#     if(any(RPos)){
-#       tabR <- table(RPos)
-#       obsMu_R[i,as.numeric(names(tabR))] <<- tabR
-#     }                                    
-#     
-#     SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
-#     SPos <- sapply(SPos,getCodonNumb)
-#     if(any(SPos)){
-#       tabS <- table(SPos)
-#       obsMu_S[i,names(tabS)] <<- tabS
-#     }                                          
-#   }
-#   )
-#   return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) 
-# }
-
-getObservedMutationsByCodon <- function(listMutations){
-  numbSeqs <- length(listMutations) 
-  obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
-  obsMu_S <- obsMu_R
-  temp <- lapply(1:length(listMutations), function(i){
-    arrMutations = listMutations[[i]]
-    RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
-    RPos <- sapply(RPos,getCodonNumb)                                                                    
-    if(any(RPos)){
-      tabR <- table(RPos)
-      obsMu_R[i,as.numeric(names(tabR))] <<- tabR
-    }                                    
-    
-    SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
-    SPos <- sapply(SPos,getCodonNumb)
-    if(any(SPos)){
-      tabS <- table(SPos)
-      obsMu_S[i,names(tabS)] <<- tabS
-    }                                          
-  }
-  )
-  return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) 
-}
-
--- a/baseline/Baseline_Main.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,388 +0,0 @@
-#########################################################################################
-# License Agreement
-# 
-# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE 
-# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER 
-# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE 
-# OR COPYRIGHT LAW IS PROHIBITED.
-# 
-# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE 
-# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED 
-# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN 
-# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
-#
-# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
-# Coded by: Mohamed Uduman & Gur Yaari
-# Copyright 2012 Kleinstein Lab
-# Version: 1.3 (01/23/2014)
-#########################################################################################
-
-op <- options();
-options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1)
-library('seqinr')
-if( F & Sys.info()[1]=="Linux"){
-  library("multicore")
-}
-
-# Load functions and initialize global variables
-source("Baseline_Functions.r")
-
-# Initialize parameters with user provided arguments
-  arg <- commandArgs(TRUE)                       
-  #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample")
-  #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample")
-  #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu")
-  testID <- as.numeric(arg[1])                    # 1 = Focused, 2 = Local
-  species <- as.numeric(arg[2])                   # 1 = Human. 2 = Mouse
-  substitutionModel <- as.numeric(arg[3])         # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS
-  mutabilityModel <- as.numeric(arg[4])           # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002)  , 5 = FiveS
-  clonal <- as.numeric(arg[5])                    # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations
-  fixIndels <- as.numeric(arg[6])                 # 0 = Do nothing, 1 = Try and fix Indels
-  region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3
-  inputFilePath <- arg[8]                         # Full path to input file
-  outputPath <- arg[9]                            # Full path to location of output files
-  outputID <- arg[10]                             # ID for session output  
-  
-
-  if(testID==5){
-    traitChangeModel <- 1
-    if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11])    # 1 <- Chothia 1998
-    initializeTraitChange(traitChangeModel)    
-  }
-  
-# Initialize other parameters/variables
-    
-  # Initialzie the codon table ( definitions of R/S )
-  computeCodonTable(testID) 
-
-  # Initialize   
-  # Test Name
-  testName<-"Focused"
-  if(testID==2) testName<-"Local"
-  if(testID==3) testName<-"Imbalanced"    
-  if(testID==4) testName<-"ImbalancedSilent"    
-    
-  # Indel placeholders initialization
-  indelPos <- NULL
-  delPos <- NULL
-  insPos <- NULL
-
-  # Initialize in Tranistion & Mutability matrixes
-  substitution <- initializeSubstitutionMatrix(substitutionModel,species)
-  mutability <- initializeMutabilityMatrix(mutabilityModel,species)
-  
-  # FWR/CDR boundaries
-  flagTrim <- F
-  if( is.na(region[7])){
-    flagTrim <- T
-    region[7]<-region[6]
-  }
-  readStart = min(region,na.rm=T)
-  readEnd = max(region,na.rm=T)
-  if(readStart>1){
-    region = region - (readStart - 1)
-  }
-  region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) )
-  region_Cod = region
-  
-  readStart = (readStart*3)-2
-  readEnd = (readEnd*3)
-    
-    FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])),
-                  rep(FALSE,(region_Nuc[3]-region_Nuc[2])),
-                  rep(TRUE,(region_Nuc[4]-region_Nuc[3])),
-                  rep(FALSE,(region_Nuc[5]-region_Nuc[4])),
-                  rep(TRUE,(region_Nuc[6]-region_Nuc[5])),
-                  rep(FALSE,(region_Nuc[7]-region_Nuc[6]))
-                )
-    CDR_Nuc <- (1-FWR_Nuc)
-    CDR_Nuc <- as.logical(CDR_Nuc)
-    FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T)
-    CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T)
-    
-    FWR_Codon <- c( rep(TRUE,(region[2])),
-                  rep(FALSE,(region[3]-region[2])),
-                  rep(TRUE,(region[4]-region[3])),
-                  rep(FALSE,(region[5]-region[4])),
-                  rep(TRUE,(region[6]-region[5])),
-                  rep(FALSE,(region[7]-region[6]))
-                )
-    CDR_Codon <- (1-FWR_Codon)
-    CDR_Codon <- as.logical(CDR_Codon)
-
-
-# Read input FASTA file
-  tryCatch(
-    inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
-    , error = function(ex){
-      cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
-      q()
-    }
-  )
-  
-  if (length(inputFASTA)==1) {
-    cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
-    q()
-  }
-
-  # Process sequence IDs/names
-  names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)})
-  
-  # Convert non nucleotide characters to N
-  inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)])
-  inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars)
-
-  # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence]
-  processedInput <- processInputAdvanced(inputFASTA)
-  matInput <- processedInput[[1]]
-  germlines <- processedInput[[2]]
-  lenGermlines = length(unique(germlines))
-  groups <- processedInput[[3]]
-  lenGroups = length(unique(groups))
-  rm(processedInput)
-  rm(inputFASTA)
-
-#   # remove clones with less than 2 seqeunces
-#   tableGL <- table(germlines)
-#   singletons <- which(tableGL<8)
-#   rowsToRemove <- match(singletons,germlines)
-#   if(any(rowsToRemove)){    
-#     matInput <- matInput[-rowsToRemove,]
-#     germlines <- germlines[-rowsToRemove]    
-#     groups <- groups[-rowsToRemove]
-#   }
-# 
-#   # remove unproductive seqs
-#   nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))})
-#   if(any(nonFuctionalSeqs)){
-#     if(sum(nonFuctionalSeqs)==length(germlines)){
-#       write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-#       q()      
-#     }
-#     matInput <- matInput[-which(nonFuctionalSeqs),]
-#     germlines <- germlines[-which(nonFuctionalSeqs)]
-#     germlines[1:length(germlines)] <- 1:length(germlines)
-#     groups <- groups[-which(nonFuctionalSeqs)]
-#   }
-# 
-#   if(class(matInput)=="character"){
-#     write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-#     q()    
-#   }
-#   
-#   if(nrow(matInput)<10 | is.null(nrow(matInput))){
-#     write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-#     q()
-#   }
-
-# replace leading & trailing "-" with "N:
-  matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd))
-    
-  # Trim (nucleotide) input sequences to the last codon
-  #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon) 
-
-#   # Check for Indels
-#   if(fixIndels){
-#     delPos <- fixDeletions(matInput)
-#     insPos <- fixInsertions(matInput)
-#   }else{
-#     # Check for indels
-#     indelPos <- checkForInDels(matInput)
-#     indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)})
-#   }
-  
-  # If indels are present, remove mutations in the seqeunce & throw warning at end
-  #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) })
-  
-  colnames(matInput)=c("Input","Germline")
-
-  # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions
-  germlinesOriginal = NULL
-  if(clonal){
-    germlinesOriginal <- germlines
-    collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){
-                                                                collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1))
-                                                              })
-    matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])}))
-    names_groups = tapply(groups,germlines,function(x){names(x[1])})  
-    groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))})  
-    names(groups) = names_groups
-  
-    names_germlines =  tapply(germlines,germlines,function(x){names(x[1])})  
-    germlines = tapply(   germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))}   )
-    names(germlines) = names_germlines
-    matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])})  
-  }
-
-
-# Selection Analysis
-
-  
-#  if (length(germlines)>sequenceLimit) {
-#    # Code to parallelize processing goes here
-#    stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") )
-#  }
-
-#  if (length(germlines)<sequenceLimit) {}
-  
-    # Compute expected mutation frequencies
-    matExpected <- getExpectedIndividual(matInput)
-    
-    # Count observed number of mutations in the different regions
-    mutations <- lapply( 1:nrow(matInput),  function(i){
-                                              #cat(i,"\n")
-                                              seqI = s2c(matInput[i,1])
-                                              seqG = s2c(matInput[i,2])
-                                              matIGL = matrix(c(seqI,seqG),ncol=length(seqI),nrow=2,byrow=T)    
-                                              retVal <- NA
-                                              tryCatch(
-                                                retVal <- analyzeMutations2NucUri(matIGL)
-                                                , error = function(ex){
-                                                  retVal <- NA
-                                                }
-                                              )                                              
-                                              
-                                              
-                                              return( retVal )
-                                            })
-
-    matObserved <- t(sapply( mutations, processNucMutations2 ))
-    numberOfSeqsWithMutations <- numberOfSeqsWithMutations(matObserved, testID)
-
-    #if(sum(numberOfSeqsWithMutations)==0){
-    #  write.table("No mutated sequences",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
-    #  q()      
-    #}
-    
-    matMutationInfo <- cbind(matObserved,matExpected)
-    rm(matObserved,matExpected)
-    
-     
-    #Bayesian  PDFs
-    bayes_pdf = computeBayesianScore(matMutationInfo, test=testName, max_sigma=20,length_sigma=4001)
-    bayesPDF_cdr = bayes_pdf[[1]]
-    bayesPDF_fwr = bayes_pdf[[2]]    
-    rm(bayes_pdf)
-
-    bayesPDF_germlines_cdr = tapply(bayesPDF_cdr,germlines,function(x) groupPosteriors(x,length_sigma=4001))
-    bayesPDF_germlines_fwr = tapply(bayesPDF_fwr,germlines,function(x) groupPosteriors(x,length_sigma=4001))
-    
-    bayesPDF_groups_cdr = tapply(bayesPDF_cdr,groups,function(x) groupPosteriors(x,length_sigma=4001))
-    bayesPDF_groups_fwr = tapply(bayesPDF_fwr,groups,function(x) groupPosteriors(x,length_sigma=4001))
-    
-    if(lenGroups>1){
-      groups <- c(groups,lenGroups+1)
-      names(groups)[length(groups)] = "All sequences combined"
-      bayesPDF_groups_cdr[[lenGroups+1]] =   groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001)
-      bayesPDF_groups_fwr[[lenGroups+1]] =   groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001)
-    }
-    
-    #Bayesian  Outputs
-    bayes_cdr =  t(sapply(bayesPDF_cdr,calcBayesOutputInfo))
-    bayes_fwr =  t(sapply(bayesPDF_fwr,calcBayesOutputInfo))
-    bayes_germlines_cdr =  t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo))
-    bayes_germlines_fwr =  t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo))
-    bayes_groups_cdr =  t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo))
-    bayes_groups_fwr =  t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo))
-    
-    #P-values
-    simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP)
-    simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP)
-    
-    simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP)
-    simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP)
-    
-    simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP)
-    simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP)
-    
-    
-    #Format output
-    
-    # Round expected mutation frequencies to 3 decimal places
-    matMutationInfo[germlinesOriginal[indelPos],] = NA
-    if(nrow(matMutationInfo)==1){
-      matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3)
-    }else{
-      matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3))
-    }
-    
-    listPDFs = list()
-    nRows = length(unique(groups)) + length(unique(germlines)) + length(groups)
-    
-    matOutput = matrix(NA,ncol=18,nrow=nRows)
-    rowNumb = 1
-    for(G in unique(groups)){
-      #print(G)
-      matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G])
-      listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]])
-      names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1]
-      #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){
-      gs = unique(germlines[groups==G])
-      rowNumb = rowNumb+1
-      if( !is.na(gs) ){
-        for( g in gs ){
-          matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g])
-          listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]])
-          names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1]
-          rowNumb = rowNumb+1
-          indexesOfInterest = which(germlines==g)
-          numbSeqsOfInterest =  length(indexesOfInterest)
-          rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1))
-          matOutput[rowNumb,] = matrix(   c(  rep("Sequence",numbSeqsOfInterest),
-                                              rownames(matInput)[indexesOfInterest],
-                                              c(matMutationInfo[indexesOfInterest,1:4]),
-                                              c(matMutationInfo[indexesOfInterest,5:8]),
-                                              c(bayes_cdr[indexesOfInterest,]),
-                                              c(bayes_fwr[indexesOfInterest,]),
-                                              c(simgaP_cdr[indexesOfInterest]),
-                                              c(simgaP_fwr[indexesOfInterest])                                              
-          ), ncol=18, nrow=numbSeqsOfInterest,byrow=F)
-          increment=0
-          for( ioi in indexesOfInterest){
-            listPDFs[[min(rowNumb)+increment]] =  list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]])
-            names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi]
-            increment = increment + 1
-          }
-          rowNumb=max(rowNumb)+1
-
-        }
-      }
-    }
-    colsToFormat = 11:18
-    matOutput[,colsToFormat] = formatC(  matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) ,  digits=3)
-    matOutput[matOutput== " NaN"] = NA
-    
-    
-    
-    colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S",
-                            "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S",
-                            paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"),
-                            paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_")
-    )
-    fileName = paste(outputPath,outputID,".txt",sep="")
-    write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA)
-    fileName = paste(outputPath,outputID,".RData",sep="")
-    save(listPDFs,file=fileName)
-
-indelWarning = FALSE
-if(sum(indelPos)>0){
-  indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis.";
-  indelWarning = paste( indelWarning , "<UL>", sep="" )
-  for(indels in names(indelPos)[indelPos]){
-    indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" )
-  }
-  indelWarning = paste( indelWarning , "</UL></P>", sep="" )
-}
-
-cloneWarning = FALSE
-if(clonal==1){
-  if(sum(matInputErrors)>0){
-    cloneWarning = "<P>Warning: The following clones have sequences of unequal length.";
-    cloneWarning = paste( cloneWarning , "<UL>", sep="" )
-    for(clone in names(matInputErrors)[matInputErrors]){
-      cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" )
-    }
-    cloneWarning = paste( cloneWarning , "</UL></P>", sep="" )
-  }
-}
-cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
Binary file baseline/FiveS_Mutability.RData has changed
Binary file baseline/FiveS_Substitution.RData has changed
--- a/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,703 +0,0 @@
->IGHV1-18*01
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-18*02
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
->IGHV1-18*03
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1-18*04
-caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
->IGHV1-2*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*04
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
->IGHV1-2*05
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
->IGHV1-24*01
-caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
->IGHV1-3*01
-caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
->IGHV1-3*02
-caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
->IGHV1-38-4*01
-caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
->IGHV1-45*01
-cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
->IGHV1-45*02
-cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
->IGHV1-45*03
-.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
->IGHV1-46*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-46*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-46*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
->IGHV1-58*01
-caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
->IGHV1-58*02
-caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
->IGHV1-68*01
-caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
->IGHV1-69*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*02
-caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1-69*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
->IGHV1-69*04
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*05
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1-69*06
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*07
-.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
->IGHV1-69*08
-caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*09
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*10
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*11
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*12
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*13
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69*14
-caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-69-2*01
-gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
->IGHV1-69-2*02
-.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
->IGHV1-69D*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1-8*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
->IGHV1-8*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
->IGHV1-NL1*01
-caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
->IGHV1/OR15-1*01
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
->IGHV1/OR15-1*02
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
->IGHV1/OR15-1*03
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
->IGHV1/OR15-1*04
-caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
->IGHV1/OR15-2*01
-caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1/OR15-2*02
-caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1/OR15-2*03
-caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
->IGHV1/OR15-3*01
-caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1/OR15-3*02
-caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
->IGHV1/OR15-3*03
-caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1/OR15-4*01
-caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
->IGHV1/OR15-5*01
-.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
->IGHV1/OR15-5*02
-caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
->IGHV1/OR15-9*01
-caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
->IGHV1/OR21-1*01
-caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
->IGHV2-10*01
-caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
->IGHV2-26*01
-caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
->IGHV2-5*01
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*02
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*03
-................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
->IGHV2-5*04|
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
->IGHV2-5*05
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*06
-cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
->IGHV2-5*08
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-5*09
-caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-70*01
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70*02
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*03
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*04
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
->IGHV2-70*05
-..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
->IGHV2-70*06
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*07
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*08
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
->IGHV2-70*09
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
->IGHV2-70*10
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70*11
-cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70*12
-cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
->IGHV2-70*13
-caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
->IGHV2-70D*04
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2-70D*14
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
->IGHV2/OR16-5*01
-caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
->IGHV3-11*01
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-11*03
-caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
->IGHV3-11*04
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-11*05
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-11*06
-caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-13*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-13*02
-gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-13*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
->IGHV3-13*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-13*05
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
->IGHV3-15*01
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*02
-gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*03
-gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*04
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*05
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*06
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*07
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
->IGHV3-15*08
-gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
->IGHV3-16*01
-gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
->IGHV3-16*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
->IGHV3-19*01
-acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
->IGHV3-20*01
-gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
->IGHV3-20*02
-gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
->IGHV3-21*01
-gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-21*02
-gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-21*03
-gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
->IGHV3-21*04
-gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-22*01
-gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
->IGHV3-22*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
->IGHV3-23*01
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*02
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*03
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23*05
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
->IGHV3-23D*01
-gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-23D*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
->IGHV3-25*01
-gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
->IGHV3-25*02
-gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
->IGHV3-25*03
-gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
->IGHV3-25*04
-gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
->IGHV3-25*05
-gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
->IGHV3-29*01
-gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
->IGHV3-30*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*02
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30*03
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*04
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*05
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
->IGHV3-30*06
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*07
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*08
-caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
->IGHV3-30*09
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*10
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*11
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*12
-caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*13
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*14
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*15
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*16
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*17
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30*18
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30*19
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30-2*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
->IGHV3-30-22*01
-gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
->IGHV3-30-3*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30-3*02
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30-3*03
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-30-33*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
->IGHV3-30-42*01
-gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
->IGHV3-30-5*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30-5*02
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-30-52*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
->IGHV3-32*01
-gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
->AIGHV3-33*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*02
-caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*03
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-33*04
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*05
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-33*06
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
->IGHV3-33-2*01
-gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
->IGHV3-35*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
->IGHV3-38*01|
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
->IGHV3-38*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
->IGHV3-38*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
->IGHV3-38-3*01
-gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
->IGHV3-43*01
-gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
->IGHV3-43*02
-gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
->IGHV3-43D*01
-gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
->IGHV3-47*01
-gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
->IGHV3-47*02
-gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
->IGHV3-48*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-48*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
->IGHV3-48*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
->IGHV3-48*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-49*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*04
-gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-49*05
-gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
->IGHV3-52*01
-gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
->IGHV3-52*02
-gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
->IGHV3-52*03
-gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
->IGHV3-53*01
-gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-53*02
-gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-53*03
-gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
->IGHV3-53*04
-gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
->IGHV3-54*01
-gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
->IGHV3-54*02
-gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
->IGHV3-54*04
-gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
->IGHV3-62*01
-gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
->IGHV3-63*01
-gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
->IGHV3-63*02
-gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
->IGHV3-64*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
->IGHV3-64*02
-gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
->IGHV3-64*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
->IGHV3-64*04
-caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-64*05
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
->IGHV3-64D*06
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
->IGHV3-66*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-66*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
->IGHV3-66*03
-gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
->IGHV3-66*04
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
->IGHV3-69-1*01
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-69-1*02
-gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
->IGHV3-7*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-7*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
->IGHV3-7*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-71*01
-gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
->IGHV3-71*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
->IGHV3-71*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
->IGHV3-72*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
->IGHV3-72*02
-....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
->IGHV3-73*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
->IGHV3-73*02
-gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
->IGHV3-74*01
-gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
->IGHV3-74*02
-gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
->IGHV3-74*03
-gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
->IGHV3-9*01
-gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
->IGHV3-9*02
-gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
->IGHV3-9*03
-gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
->IGHV3-NL1*01
-caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
->IGHV3/OR15-7*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
->IGHV3/OR15-7*02
-gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
->IGHV3/OR15-7*03
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
->IGHV3/OR15-7*05
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
->IGHV3/OR16-10*01
-gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
->IGHV3/OR16-10*02
-gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
->IGHV3/OR16-10*03
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
->IGHV3/OR16-12*01
-gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
->IGHV3/OR16-13*01
-gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
->IGHV3/OR16-14*01
-gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
->IGHV3/OR16-15*01
-gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
->IGHV3/OR16-15*02
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
->IGHV3/OR16-16*01
-gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
->IGHV3/OR16-6*02
-gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
->IGHV3/OR16-8*01
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
->IGHV3/OR16-8*02
-gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
->IGHV3/OR16-9*01
-gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
->IGHV4-28*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
->IGHV4-28*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
->IGHV4-28*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*06
-caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
->IGHV4-28*07
-caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-30-2*01
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-30-2*02
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
->IGHV4-30-2*03
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
->IGHV4-30-2*04
-...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-30-2*05
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-2*06
-cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-30-4*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->XIGHV4-30-4*04
-caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
->IGHV4-30-4*05
-..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*06
-...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
->IGHV4-30-4*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-31*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-31*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-31*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-31*04
-caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
->IGHV4-31*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
->IGHV4-31*06
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->IGHV4-31*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->IGHV4-31*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
->IGHV4-31*09
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-31*10
-caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
->IGHV4-34*01
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*02
-caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*03
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-34*04
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*05
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-34*06
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-34*07
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-34*08
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
->IGHV4-34*09
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-34*10
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-34*11
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
->IGHV4-34*12
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
->IGHV4-34*13
-...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
->IGHV4-38-2*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
->IGHV4-38-2*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-39*01
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
->IGHV4-39*02
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
->IGHV4-39*03
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
->IGHV4-39*04
-..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
->IGHV4-39*05
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
->IGHV4-39*06
-cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-39*07
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
->IGHV4-4*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-4*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-4*05
-caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-4*06
-............................................................
-...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-4*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-55*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-55*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-55*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-55*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-55*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
->IGHV4-55*06
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
->IGHV4-55*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
->IGHV4-55*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4-55*09
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
->IGHV4-59*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-59*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-59*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
->IGHV4-59*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
->IGHV4-59*05
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
->IGHV4-59*06
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
->IGHV4-59*07
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
->IGHV4-59*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
->IGHV4-59*09
-...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
->IGHV4-59*10
-caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
->IGHV4-61*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-61*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
->IGHV4-61*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4-61*04
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
->IGHV4-61*05
-cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
->IGHV4-61*06
-...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
->IGHV4-61*07
-...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
->IGHV4-61*08
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
->IGHV4/OR15-8*01
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4/OR15-8*02
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV4/OR15-8*03
-caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
->IGHV5-10-1*01
-gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-10-1*02
-gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
->IGHV5-10-1*03
-gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-10-1*04
-gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-51*01
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
->IGHV5-51*02
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
->IGHV5-51*03
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-51*04
-gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
->IGHV5-51*05
-.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
->IGHV5-78*01
-gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
->IGHV6-1*01
-caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
->IGHV6-1*02
-caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
->IGHV7-34-1*01
-...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
->IGHV7-34-1*02
-...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
->IGHV7-4-1*01
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
->IGHV7-4-1*02
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
->IGHV7-4-1*03
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
->IGHV7-4-1*04
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
->IGHV7-4-1*05
-caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
->AIGHV7-40*03|
-ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
->IGHV7-81*01
-caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- a/baseline/IMGTVHreferencedataset20161215.fa	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
->IGHV1-18*01
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-18*02
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
>IGHV1-18*03
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1-18*04
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-2*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*05
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-24*01
caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-3*01
caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
>IGHV1-3*02
caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
>IGHV1-38-4*01
caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
>IGHV1-45*01
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
>IGHV1-45*02
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
>IGHV1-45*03
.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
>IGHV1-46*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
>IGHV1-58*01
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-58*02
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-68*01
caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
>IGHV1-69*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*02
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
>IGHV1-69*04
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*05
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*06
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*07
.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69*08
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*09
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*10
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*11
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*12
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*13
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*14
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69-2*01
gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-69-2*02
.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69D*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-8*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-8*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-NL1*01
caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
>IGHV1/OR15-1*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
>IGHV1/OR15-1*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-1*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
>IGHV1/OR15-1*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-2*01
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*02
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*03
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*01
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-3*02
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*03
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-4*01
caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-5*01
.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-5*02
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-9*01
caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV1/OR21-1*01
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV2-10*01
caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
>IGHV2-26*01
caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
>IGHV2-5*01
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*02
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*03
................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
>IGHV2-5*04
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
>IGHV2-5*05
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*06
cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
>IGHV2-5*08
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*09
caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*01
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*02
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*03
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
>IGHV2-70*05
..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
>IGHV2-70*06
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*07
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*08
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*09
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
>IGHV2-70*10
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*11
cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*12
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*13
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
>IGHV2-70D*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70D*14
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2/OR16-5*01
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
>IGHV3-11*01
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*03
caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
>IGHV3-11*04
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-11*05
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*06
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-13*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*02
gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
>IGHV3-13*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*05
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-15*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*02
gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*03
gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*04
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*06
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*07
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*08
gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3-16*01
gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-16*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-19*01
acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-20*01
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-20*02
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-21*01
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*02
gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*03
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
>IGHV3-21*04
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-22*01
gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-22*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-23*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*02
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*03
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*05
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
>IGHV3-23D*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-25*01
gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*02
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*03
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
>IGHV3-25*04
gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
>IGHV3-25*05
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-29*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
>IGHV3-30*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*07
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*08
caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-30*09
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*10
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*11
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*12
caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*13
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*14
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*15
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*16
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*17
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*18
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*19
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
>IGHV3-30-22*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
>IGHV3-30-3*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-3*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-3*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-33*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
>IGHV3-30-42*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30-5*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-5*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-52*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
>IGHV3-32*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
>IGHV3-33*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*02
caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
>IGHV3-35*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
>IGHV3-38*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
>IGHV3-38*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38-3*01
gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
>IGHV3-43*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43*02
gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43D*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-47*01
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
>IGHV3-47*02
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
>IGHV3-48*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-48*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-49*01
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*02
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*03
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*04
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-52*01
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
>IGHV3-52*02
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-52*03
gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-53*01
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*02
gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
>IGHV3-53*04
gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
>IGHV3-54*01
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-54*02
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
>IGHV3-54*04
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-62*01
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
>IGHV3-63*01
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
>IGHV3-63*02
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
>IGHV3-64*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*02
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64*04
caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-64*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64D*06
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-66*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-66*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*04
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
>IGHV3-69-1*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-69-1*02
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-7*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
>IGHV3-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*01
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
>IGHV3-71*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-72*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
>IGHV3-72*02
....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
>IGHV3-73*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-73*02
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-74*01
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-74*02
gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
>IGHV3-74*03
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-9*01
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*02
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*03
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
>IGHV3-NL1*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3/OR15-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*02
gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
>IGHV3/OR16-10*01
gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*02
gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
>IGHV3/OR16-12*01
gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
>IGHV3/OR16-13*01
gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-14*01
gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-15*01
gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
>IGHV3/OR16-15*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
>IGHV3/OR16-16*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
>IGHV3/OR16-6*02
gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3/OR16-8*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
>IGHV3/OR16-8*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
>IGHV3/OR16-9*01
gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
>IGHV4-28*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
>IGHV4-28*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
>IGHV4-28*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*06
caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*07
caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-30-2*01
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-2*02
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-30-2*03
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
>IGHV4-30-2*04
...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-30-2*05
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-2*06
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-30-4*04
caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
>IGHV4-30-4*05
..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*06
...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-31*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*04
caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-31*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
>IGHV4-31*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-31*10
caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
>IGHV4-34*01
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*02
caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*03
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*04
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*05
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*06
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*07
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*08
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
>IGHV4-34*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-34*10
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-34*11
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-34*12
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
>IGHV4-34*13
...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-38-2*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
>IGHV4-38-2*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-39*01
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
>IGHV4-39*02
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
>IGHV4-39*03
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-39*04
..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
>IGHV4-39*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-39*06
cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-39*07
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*05
caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*06
...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-55*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
>IGHV4-55*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-55*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-55*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-59*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-59*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-59*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
>IGHV4-59*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
>IGHV4-59*09
...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
>IGHV4-59*10
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-61*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-61*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
>IGHV4-61*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
>IGHV4-61*06
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-61*07
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
>IGHV4-61*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV5-10-1*01
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*02
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
>IGHV5-10-1*03
gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*04
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*01
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*02
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*03
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*04
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*05
.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
>IGHV5-78*01
gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
>IGHV6-1*01
caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV6-1*02
caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV7-34-1*01
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-34-1*02
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-4-1*01
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
>IGHV7-4-1*02
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*03
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
>IGHV7-4-1*04
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*05
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
>IGHV7-40*03
ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
>IGHV7-81*01
caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- a/baseline/IMGTVHreferencedataset20161215.fasta	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
->IGHV1-18*01
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-18*02
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
>IGHV1-18*03
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1-18*04
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-2*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*05
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-24*01
caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-3*01
caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
>IGHV1-3*02
caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
>IGHV1-38-4*01
caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
>IGHV1-45*01
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
>IGHV1-45*02
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
>IGHV1-45*03
.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
>IGHV1-46*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
>IGHV1-58*01
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-58*02
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-68*01
caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
>IGHV1-69*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*02
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
>IGHV1-69*04
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*05
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*06
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*07
.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69*08
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*09
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*10
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*11
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*12
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*13
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*14
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69-2*01
gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-69-2*02
.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69D*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-8*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-8*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-NL1*01
caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
>IGHV1/OR15-1*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
>IGHV1/OR15-1*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-1*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
>IGHV1/OR15-1*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-2*01
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*02
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*03
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*01
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-3*02
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*03
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-4*01
caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-5*01
.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-5*02
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-9*01
caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV1/OR21-1*01
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV2-10*01
caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
>IGHV2-26*01
caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
>IGHV2-5*01
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*02
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*03
................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
>IGHV2-5*04
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
>IGHV2-5*05
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*06
cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
>IGHV2-5*08
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*09
caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*01
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*02
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*03
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
>IGHV2-70*05
..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
>IGHV2-70*06
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*07
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*08
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*09
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
>IGHV2-70*10
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*11
cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*12
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*13
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
>IGHV2-70D*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70D*14
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2/OR16-5*01
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
>IGHV3-11*01
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*03
caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
>IGHV3-11*04
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-11*05
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*06
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-13*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*02
gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
>IGHV3-13*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*05
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-15*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*02
gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*03
gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*04
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*06
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*07
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*08
gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3-16*01
gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-16*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-19*01
acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-20*01
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-20*02
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-21*01
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*02
gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*03
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
>IGHV3-21*04
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-22*01
gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-22*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-23*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*02
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*03
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*05
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
>IGHV3-23D*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-25*01
gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*02
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*03
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
>IGHV3-25*04
gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
>IGHV3-25*05
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-29*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
>IGHV3-30*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*07
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*08
caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-30*09
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*10
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*11
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*12
caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*13
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*14
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*15
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*16
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*17
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*18
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*19
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
>IGHV3-30-22*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
>IGHV3-30-3*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-3*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-3*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-33*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
>IGHV3-30-42*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30-5*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-5*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-52*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
>IGHV3-32*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
>IGHV3-33*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*02
caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
>IGHV3-35*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
>IGHV3-38*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
>IGHV3-38*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38-3*01
gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
>IGHV3-43*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43*02
gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43D*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-47*01
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
>IGHV3-47*02
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
>IGHV3-48*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-48*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-49*01
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*02
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*03
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*04
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-52*01
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
>IGHV3-52*02
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-52*03
gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-53*01
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*02
gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
>IGHV3-53*04
gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
>IGHV3-54*01
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-54*02
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
>IGHV3-54*04
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-62*01
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
>IGHV3-63*01
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
>IGHV3-63*02
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
>IGHV3-64*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*02
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64*04
caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-64*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64D*06
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-66*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-66*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*04
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
>IGHV3-69-1*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-69-1*02
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-7*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
>IGHV3-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*01
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
>IGHV3-71*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-72*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
>IGHV3-72*02
....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
>IGHV3-73*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-73*02
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-74*01
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-74*02
gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
>IGHV3-74*03
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-9*01
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*02
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*03
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
>IGHV3-NL1*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3/OR15-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*02
gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
>IGHV3/OR16-10*01
gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*02
gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
>IGHV3/OR16-12*01
gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
>IGHV3/OR16-13*01
gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-14*01
gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-15*01
gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
>IGHV3/OR16-15*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
>IGHV3/OR16-16*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
>IGHV3/OR16-6*02
gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3/OR16-8*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
>IGHV3/OR16-8*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
>IGHV3/OR16-9*01
gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
>IGHV4-28*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
>IGHV4-28*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
>IGHV4-28*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*06
caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*07
caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-30-2*01
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-2*02
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-30-2*03
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
>IGHV4-30-2*04
...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-30-2*05
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-2*06
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-30-4*04
caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
>IGHV4-30-4*05
..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*06
...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-31*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*04
caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-31*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
>IGHV4-31*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-31*10
caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
>IGHV4-34*01
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*02
caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*03
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*04
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*05
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*06
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*07
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*08
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
>IGHV4-34*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-34*10
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-34*11
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-34*12
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
>IGHV4-34*13
...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-38-2*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
>IGHV4-38-2*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-39*01
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
>IGHV4-39*02
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
>IGHV4-39*03
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-39*04
..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
>IGHV4-39*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-39*06
cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-39*07
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*05
caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*06
...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-55*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
>IGHV4-55*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-55*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-55*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-59*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-59*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-59*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
>IGHV4-59*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
>IGHV4-59*09
...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
>IGHV4-59*10
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-61*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-61*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
>IGHV4-61*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
>IGHV4-61*06
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-61*07
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
>IGHV4-61*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV5-10-1*01
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*02
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
>IGHV5-10-1*03
gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*04
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*01
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*02
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*03
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*04
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*05
.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
>IGHV5-78*01
gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
>IGHV6-1*01
caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV6-1*02
caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV7-34-1*01
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-34-1*02
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-4-1*01
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
>IGHV7-4-1*02
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*03
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
>IGHV7-4-1*04
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*05
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
>IGHV7-40*03
ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
>IGHV7-81*01
caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- a/baseline/baseline_url.txt	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-http://selection.med.yale.edu/baseline/
\ No newline at end of file
--- a/baseline/comparePDFs.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-options("warn"=-1)
-
-#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r
-# Compute p-value of two distributions
-compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
-#print(c(length(dens1),length(dens2)))
-if(length(dens1)>1 & length(dens2)>1 ){
-	dens1<-dens1/sum(dens1)
-	dens2<-dens2/sum(dens2)
-	cum2 <- cumsum(dens2)-dens2/2
-	tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
-	#print(tmp)
-	if(tmp>0.5)tmp<-tmp-1
-	return( tmp )
-	}
-	else {
-	return(NA)
-	}
-	#return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
-}  
-
-
-require("grid")
-arg <- commandArgs(TRUE)
-#arg <- c("300143","4","5")
-arg[!arg=="clonal"]
-input <- arg[1]
-output <- arg[2]
-rowIDs <- as.numeric(  sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } )  )
-
-numbSeqs = length(rowIDs)
-
-if ( is.na(rowIDs[1]) | numbSeqs>10 ) {
-  stop( paste("Error: Please select between one and 10 seqeunces to compare.") )
-}
-
-#load( paste("output/",sessionID,".RData",sep="") )
-load( input )
-#input
-
-xMarks = seq(-20,20,length.out=4001)
-
-plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){
-  yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1
-
-  if(length(xlim==2)){
-    xMin=xlim[1]
-    xMax=xlim[2]
-  } else {
-    xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
-    xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
-    xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
-    xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
-  
-    xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1]
-    xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1]
-    xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])]
-    xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])]
-  
-    xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE)
-    xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE)
-  }
-
-  sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x
-  grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex))
-  x <- sigma
-  pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc"))
-  #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex))
-  pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05)))
-  grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc")
-  grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc")
-  grid.rect()
-  grid.xaxis(gp = gpar(cex=cex/1.1))
-  yticks = pretty(c(-yMax,yMax),8)
-  yticks = yticks[yticks>(-yMax) & yticks<(yMax)]
-  grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1))
-  if(length(listPDFs[pdf1][[1]][["CDR"]])>1){
-    ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
-    grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2))
-  }
-  if(length(listPDFs[pdf1][[1]][["FWR"]])>1){
-    yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
-    grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2))
-   }
-
-  if(length(listPDFs[pdf2][[1]][["CDR"]])>1){
-    ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
-    grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2))
-  }
-  if(length(listPDFs[pdf2][[1]][["FWR"]])>1){
-    yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
-    grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2))
-   }
-
-  grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1))
-  grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3))
-
-  grid.text("All", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex))
-  grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex))
-  
-  if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){
-    pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]])       
-    pval = formatC(as.numeric(pCDRFWR),digits=3)
-    grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2))
-  }
-  grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5))
-  grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5))
-  popViewport(2)
-}
-#plot_grid_s(1)
-
-
-p2col<-function(p=0.01){
-  breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51)
-  i<-findInterval(p,breaks)
-  cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0),
-            rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) )
-  return(cols[i])
-}
-
-
-plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){
-  if(upper){
-    pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]])       
-    pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]])
-    pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]])       
-    pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]])
-    grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc")
-    grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc")
-    grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc")
-    grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc")
-         
-    grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925)))
-    grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925)))
-
-    grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
-    grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
-    grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
-    grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
-    
-           
- #   grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex))
- #   grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex))
-  }
-  else{
-  }
-}
-
-
-##################################################################################
-################## The whole OCD's matrix ########################################
-##################################################################################
-
-#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)
-pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) 
-
-pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null")))))
-
-for( seqOne in 1:numbSeqs+1){
-  pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1))
-  if(seqOne>2){ 
-    grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
-    grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
-    grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc")
-       
-    grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center")
-    grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center")
-  }
-  grid.rect(gp = gpar(col=grey(0.9)))
-  grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center")
-  popViewport(1)
-}
-
-for( seqOne in 1:numbSeqs+1){
-  pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1))
-  if(seqOne<=numbSeqs){   
-    grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
-    grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
-    grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc")
-    grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270)
-    grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270)
-  }
-  grid.rect(gp = gpar(col=grey(0.9)))
-  grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center")
-  popViewport(1)
-}
-
-for( seqOne in 1:numbSeqs+1){
-  for(seqTwo in 1:numbSeqs+1){
-    pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne))
-    if(seqTwo>seqOne){
-      plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2)
-      grid.rect()
-    }    
-    popViewport(1)
-  }
-}
-   
-
-xMin=0
-xMax=0.01
-for(pdf1 in rowIDs){
-  xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
-  xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
-  xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
-  xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
-  xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE)
-  xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE)
-}
-
-
-
-for(i in 1:numbSeqs+1){
-  for(j in (i-1):numbSeqs){    
-    pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1))
-    grid.rect()
-    plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1)
-    popViewport(1)
-  }
-}
-
-dev.off() 
-
-cat("Success", paste(rowIDs,collapse="_"),sep=":")
-
--- a/baseline/filter.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-arg = commandArgs(TRUE)
-summaryfile = arg[1]
-gappedfile = arg[2]
-selection = arg[3]
-output = arg[4]
-print(paste("selection = ", selection))
-
-
-summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "")
-gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "")
-
-fix_column_names = function(df){
-    if("V.DOMAIN.Functionality" %in% names(df)){
-        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"
-        print("found V.DOMAIN.Functionality, changed")
-    }
-    if("V.DOMAIN.Functionality.comment" %in% names(df)){
-        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"
-        print("found V.DOMAIN.Functionality.comment, changed")
-    }
-    return(df)
-}
-
-gappeddat = fix_column_names(gappeddat)
-
-#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
-
-dat = cbind(gappeddat, summarydat$AA.JUNCTION)
-
-colnames(dat)[length(dat)] = "AA.JUNCTION"
-
-dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
-dat$VGene = gsub("[*].*", "", dat$VGene)
-
-dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
-dat$DGene = gsub("[*].*", "", dat$DGene)
-
-dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
-dat$JGene = gsub("[*].*", "", dat$JGene)
-
-print(str(dat))
-
-dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
-
-dat = dat[!duplicated(dat$past), ]
-
-print(paste("Sequences remaining after duplicate filter:", nrow(dat)))
-
-dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",]
-
-print(paste("Sequences remaining after functionality filter:", nrow(dat)))
-
-print(paste("Sequences remaining:", nrow(dat)))
-
-write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/baseline/script_imgt.py	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-#import xlrd #avoid dep
-import argparse
-import re
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
-parser.add_argument("--ref", help="Reference file")
-parser.add_argument("--output", help="Output file")
-parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
-
-args = parser.parse_args()
-
-print "script_imgt.py"
-print "input:", args.input
-print "ref:", args.ref
-print "output:", args.output
-print "id:", args.id
-
-refdic = dict()
-with open(args.ref, 'rU') as ref:
-	currentSeq = ""
-	currentId = ""
-	for line in ref:
-		if line.startswith(">"):
-			if currentSeq is not "" and currentId is not "":
-				refdic[currentId[1:]] = currentSeq
-			currentId = line.rstrip()
-			currentSeq = ""
-		else:
-			currentSeq += line.rstrip()
-	refdic[currentId[1:]] = currentSeq
-
-print "Have", str(len(refdic)), "reference sequences"
-
-vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
-#						r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
-#						r"(IGKV[0-3]D?-[0-9]{1,2})",
-#						r"(IGLV[0-9]-[0-9]{1,2})",
-#						r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
-#						r"(TRGV[234589])",
-#						r"(TRDV[1-3])"]
-
-#vPattern = re.compile(r"|".join(vPattern))
-vPattern = re.compile("|".join(vPattern))
-
-def filterGene(s, pattern):
-    if type(s) is not str:
-        return None
-    res = pattern.search(s)
-    if res:
-        return res.group(0)
-    return None
-
-
-
-currentSeq = ""
-currentId = ""
-first=True
-with open(args.input, 'r') as i:
-	with open(args.output, 'a') as o:
-		o.write(">>>" + args.id + "\n")
-		outputdic = dict()
-		for line in i:
-			if first:
-				first = False
-				continue
-			linesplt = line.split("\t")
-			ref = filterGene(linesplt[1], vPattern)
-			if not ref or not linesplt[2].rstrip():
-				continue
-			if ref in outputdic:
-				outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
-			else:
-				outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
-		#print outputdic
-		
-		for k in outputdic.keys():
-			if k in refdic:
-				o.write(">>" + k + "\n")
-				o.write(refdic[k] + "\n")
-				for seq in outputdic[k]:
-					#print seq
-					o.write(">" + seq[0] + "\n")
-					o.write(seq[1] + "\n")
-			else:
-				print k + " not in reference, skipping " + k
--- a/baseline/script_xlsx.py	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-import xlrd
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
-parser.add_argument("--ref", help="Reference file")
-parser.add_argument("--output", help="Output file")
-
-args = parser.parse_args()
-
-gene_column = 6
-id_column = 7
-seq_column = 8
-LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
-
-
-refdic = dict()
-with open(args.ref, 'r') as ref:
-	currentSeq = ""
-	currentId = ""
-	for line in ref.readlines():
-		if line[0] is ">":
-			if currentSeq is not "" and currentId is not "":
-				refdic[currentId[1:]] = currentSeq
-			currentId = line.rstrip()
-			currentSeq = ""
-		else:
-			currentSeq += line.rstrip()
-	refdic[currentId[1:]] = currentSeq
-	
-currentSeq = ""
-currentId = ""
-with xlrd.open_workbook(args.input, 'r') as wb:
-	with open(args.output, 'a') as o:
-		for sheet in wb.sheets():
-			if sheet.cell(1,gene_column).value.find("IGHV") < 0:
-				print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name
-				continue
-			o.write(">>>" + sheet.name + "\n")
-			outputdic = dict()
-			for rowindex in range(1, sheet.nrows):
-				ref = sheet.cell(rowindex, gene_column).value.replace(">", "")
-				if ref in outputdic:
-					outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
-				else:
-					outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
-			#print outputdic
-			
-			for k in outputdic.keys():
-				if k in refdic:
-					o.write(">>" + k + "\n")
-					o.write(refdic[k] + "\n")
-					for seq in outputdic[k]:
-						#print seq
-						o.write(">" + seq[0] + "\n")
-						o.write(seq[1] + "\n")
-				else:
-					print k + " not in reference, skipping " + k
--- a/baseline/wrapper.sh	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-#!/bin/bash
-dir="$(cd "$(dirname "$0")" && pwd)"
-
-testID=$1
-species=$2
-substitutionModel=$3
-mutabilityModel=$4
-clonal=$5
-fixIndels=$6
-region=$7
-inputs=$8
-inputs=($inputs)
-IDs=$9
-IDs=($IDs)
-ref=${10}
-output=${11}
-selection=${12}
-output_table=${13}
-outID="result"
-
-echo "$PWD"
-
-echo "testID = $testID"
-echo "species = $species"
-echo "substitutionModel = $substitutionModel"
-echo "mutabilityModel = $mutabilityModel"
-echo "clonal = $clonal"
-echo "fixIndels = $fixIndels"
-echo "region = $region"
-echo "inputs = ${inputs[@]}"
-echo "IDs = ${IDs[@]}"
-echo "ref = $ref"
-echo "output = $output"
-echo "outID = $outID"
-
-fasta="$PWD/baseline.fasta"
-
-
-count=0
-for current in ${inputs[@]}
-do
-	f=$(file $current)
-	zipType="Zip archive"
-	if [[ "$f" == *"Zip archive"* ]] || [[ "$f" == *"XZ compressed data"* ]]
-	then
-		id=${IDs[$count]}
-		echo "id=$id"
-		if [[ "$f" == *"Zip archive"* ]] ; then
-			echo "Zip archive"
-			echo "unzip $input -d $PWD/files/"
-			unzip $current -d "$PWD/$id/"
-		elif [[ "$f" == *"XZ compressed data"* ]] ; then
-			echo "ZX archive"
-			echo "tar -xJf $input -C $PWD/files/"
-			mkdir -p "$PWD/$id/files"
-			tar -xJf $current -C "$PWD/$id/files/"
-		fi
-		filtered="$PWD/filtered_${id}.txt"
-		imgt_1_file="`find $PWD/$id -name '1_*.txt'`"
-		imgt_2_file="`find $PWD/$id -name '2_*.txt'`"
-		echo "1_Summary file: ${imgt_1_file}"
-		echo "2_IMGT-gapped file: ${imgt_2_file}"
-		echo "filter.r for $id"
-		Rscript $dir/filter.r ${imgt_1_file} ${imgt_2_file} "$selection" $filtered 2>&1
-		
-		final="$PWD/final_${id}.txt"
-		cat $filtered | cut -f2,4,7 > $final
-		python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id
-	else
-		python $dir/script_xlsx.py --input $current --ref $ref --output $fasta
-	fi
-	count=$((count+1))
-done
-workdir="$PWD"
-cd $dir
-echo "file: ${inputs[0]}"
-#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1
-Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1
-
-echo "$workdir/${outID}.txt"
-
-rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
-rows=($rows)
-#unset rows[${#rows[@]}-1]
-
-cd $dir
-Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1
-cp $workdir/result.txt ${output_table}
-
-
-
-
--- a/change_o/change_o_url.txt	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-https://changeo.readthedocs.io/en/version-0.4.4/
\ No newline at end of file
--- a/change_o/define_clones.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-input=args[1]
-output=args[2]
-
-change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F)
-
-freq = data.frame(table(change.o$CLONE))
-freq2 = data.frame(table(freq$Freq))
-
-freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1))
-
-names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences")
-
-write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/change_o/define_clones.sh	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-#!/bin/bash
-dir="$(cd "$(dirname "$0")" && pwd)"
-
-#define_clones.sh $input $noparse $scores $regions $out_file
-
-type=$1
-input=$2
-
-mkdir -p $PWD/outdir
-
-cp $input $PWD/input.tab #file has to have a ".tab" extension
-
-if [ "bygroup" == "$type" ] ; then	
-	mode=$3
-	act=$4
-	model=$5
-	norm=$6
-	sym=$7
-	link=$8
-	dist=$9
-	output=${10}
-	output2=${11}
-	
-	DefineClones.py -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
-	
-	Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
-else
-	method=$3
-	output=$4
-	output2=$5
-	
-	DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
-	
-	Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
-fi
-
-cp $PWD/outdir/output_clone-pass.tab $output
-
-rm -rf $PWD/outdir/
--- a/change_o/makedb.sh	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#!/bin/bash
-dir="$(cd "$(dirname "$0")" && pwd)"
-
-input=$1
-noparse=$2
-scores=$3
-regions=$4
-output=$5
-
-if [ "true" == "$noparse" ] ; then
-	noparse="--noparse"
-else
-	noparse=""
-fi
-
-if [ "true" == "$scores" ] ; then
-	scores="--scores"
-else
-	scores=""
-fi
-
-if [ "true" == "$regions" ] ; then
-	regions="--regions"
-else
-	regions=""
-fi
-
-mkdir $PWD/outdir
-
-echo "makedb: $PWD/outdir"
-
-MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
-
-mv $PWD/outdir/output_db-pass.tab $output
-
-rm -rf $PWD/outdir/
--- a/change_o/select_first_in_clone.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-input.file = args[1]
-output.file = args[2]
-
-print("select_in_first_clone.r")
-print(input.file)
-print(output.file)
-
-input = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-
-input = input[!duplicated(input$CLONE),]
-
-names(input)[1] = "Sequence.ID"
-
-write.table(input, output.file, quote=F, sep="\t", row.names=F, col.names=T, na="")
--- a/check_unique_id.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the 
-
-current_file = args[1]
-
-current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F)
-
-if(!("Sequence number" %in% names(current))){
-	stop("First argument doesn't contain the 'Sequence number' column")
-}
-
-tbl = table(current[,"Sequence ID"])
-l_tbl = length(tbl)
-check = any(tbl > 1)
-
-#if(l_tbl != nrow(current)){ # non unique IDs?
-if(check){
-	print("Sequence.ID is not unique for every sequence, adding sequence number to IDs")
-	for(i in 1:length(args)){
-		current_file = args[i]
-		print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file))
-		current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F)
-		current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_")
-		write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T)
-	}
-}
--- a/datatypes_conf.xml	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<datatypes>
-    <registration>
-        <datatype extension="imgt_archive" type="galaxy.datatypes.binary:CompressedArchive" display_in_upload="True" subclass="True"/>
-    </registration>
-</datatypes>
--- a/gene_identification.py	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,226 +0,0 @@
-import re
-import argparse
-import time
-starttime= int(time.time() * 1000)
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file")
-parser.add_argument("--output", help="The annotated output file to be merged back with the summary file")
-
-args = parser.parse_args()
-
-infile = args.input
-#infile = "test_VH-Ca_Cg_25nt/1_Summary_test_VH-Ca_Cg_25nt_241013.txt"
-output = args.output
-#outfile = "identified.txt"
-
-dic = dict()
-total = 0
-
-
-first = True
-IDIndex = 0
-seqIndex = 0
-
-with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence
-	for line in f:
-		total += 1
-		linesplt = line.split("\t")
-		if first:
-			print "linesplt", linesplt
-			IDIndex = linesplt.index("Sequence ID")
-			seqIndex = linesplt.index("Sequence")
-			first = False
-			continue
-		
-		ID = linesplt[IDIndex]
-		if len(linesplt) < 28: #weird rows without a sequence
-			dic[ID] = ""
-		else:
-			dic[ID] = linesplt[seqIndex]
-			
-print "Number of input sequences:", len(dic)
-
-#old cm sequence: gggagtgcatccgccccaacccttttccccctcgtctcctgtgagaattccc
-#old cg sequence: ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggccctgggctgcctggtcaaggactacttccccgaaccggtgacggtgtcgtggaactcaggcgccctgaccag
-
-#lambda/kappa reference sequence
-searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg",
-                 "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc",
-                 "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc",
-                 "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence
-
-compiledregex = {"ca": [],
-                 "cg": [],
-                 "ce": [],
-                 "cm": []}
-
-#lambda/kappa reference sequence variable nucleotides
-ca1 = {38: 't', 39: 'g', 48: 'a', 49: 'g', 51: 'c', 68: 'a', 73: 'c'}
-ca2 = {38: 'g', 39: 'a', 48: 'c', 49: 'c', 51: 'a', 68: 'g', 73: 'a'}
-cg1 = {0: 'c', 33: 'a', 38: 'c', 44: 'a', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'}
-cg2 = {0: 'c', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'g', 132: 't'}
-cg3 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'}
-cg4 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'c', 132: 'c'}
-
-#remove last snp for shorter cg sequence --- note, also change varsInCG
-del cg1[132]
-del cg2[132]
-del cg3[132]
-del cg4[132]
-
-#reference sequences are cut into smaller parts of 'chunklength' length, and with 'chunklength' / 2 overlap
-chunklength = 8
-
-#create the chunks of the reference sequence with regular expressions for the variable nucleotides
-for i in range(0, len(searchstrings["ca"]) - chunklength, chunklength / 2):
-  pos = i
-  chunk = searchstrings["ca"][i:i+chunklength]
-  result = ""
-  varsInResult = 0
-  for c in chunk:
-    if pos in ca1.keys():
-      varsInResult += 1
-      result += "[" + ca1[pos] + ca2[pos] + "]"
-    else:
-      result += c
-    pos += 1
-  compiledregex["ca"].append((re.compile(result), varsInResult))
-
-for i in range(0, len(searchstrings["cg"]) - chunklength, chunklength / 2):
-  pos = i
-  chunk = searchstrings["cg"][i:i+chunklength]
-  result = ""
-  varsInResult = 0
-  for c in chunk:
-    if pos in cg1.keys():
-      varsInResult += 1
-      result += "[" + "".join(set([cg1[pos], cg2[pos], cg3[pos], cg4[pos]])) + "]"
-    else:
-      result += c
-    pos += 1
-  compiledregex["cg"].append((re.compile(result), varsInResult))
-
-for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2):
-  compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False))
-
-for i in range(0, len(searchstrings["ce"]) - chunklength + 1, chunklength / 2):
-  compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False))
-
-def removeAndReturnMaxIndex(x): #simplifies a list comprehension
-  m = max(x)
-  index = x.index(m)
-  x[index] = 0
-  return index
-  
-
-start_location = dict()
-hits = dict()
-alltotal = 0
-for key in compiledregex.keys(): #for ca/cg/cm/ce
-	regularexpressions = compiledregex[key] #get the compiled regular expressions
-	for ID in dic.keys()[0:]: #for every ID
-		if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists
-			hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0}
-		currentIDHits = hits[ID]
-		seq = dic[ID]
-		lastindex = 0
-		start_zero = len(searchstrings[key]) #allows the reference sequence to start before search sequence (start_locations of < 0)
-		start = [0] * (len(seq) + start_zero)
-		for i, regexp in enumerate(regularexpressions): #for every regular expression
-			relativeStartLocation = lastindex - (chunklength / 2) * i
-			if relativeStartLocation >= len(seq):
-				break
-			regex, hasVar = regexp
-			matches = regex.finditer(seq[lastindex:])
-			for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop
-				lastindex += match.start()
-				start[relativeStartLocation + start_zero] += 1
-				if hasVar: #if the regex has a variable nt in it
-					chunkstart = chunklength / 2 * i #where in the reference does this chunk start
-					chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end
-					if key == "ca": #just calculate the variable nt score for 'ca', cheaper
-						currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]])
-						currentIDHits["ca2"] += len([1 for x in ca2 if chunkstart <= x < chunkend and ca2[x] == seq[lastindex + x - chunkstart]])
-					elif key == "cg": #just calculate the variable nt score for 'cg', cheaper
-						currentIDHits["cg1"] += len([1 for x in cg1 if chunkstart <= x < chunkend and cg1[x] == seq[lastindex + x - chunkstart]])
-						currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]])
-						currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]])
-						currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]])
-					else: #key == "cm" #no variable regions in 'cm' or 'ce'
-						pass
-				break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped
-			else: #only runs if there were no hits
-				continue
-			#print "found ", regex.pattern , "at", lastindex, "adding one to", (lastindex - chunklength / 2 * i), "to the start array of", ID, "gene", key, "it's now:", start[lastindex - chunklength / 2 * i]
-			currentIDHits[key + "_hits"] += 1
-		start_location[ID + "_" + key] = str([(removeAndReturnMaxIndex(start) + 1 - start_zero) for x in range(5) if len(start) > 0 and max(start) > 1])
-		#start_location[ID + "_" + key] = str(start.index(max(start)))
-
-
-varsInCA = float(len(ca1.keys()) * 2)
-varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice
-varsInCM = 0
-varsInCE = 0
-
-def round_int(val):
-	return int(round(val))
-
-first = True
-seq_write_count=0
-with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence
-	with open(output, 'w') as o:
-		for line in f:
-			total += 1
-			if first:
-				o.write("Sequence ID\tbest_match\tnt_hit_percentage\tchunk_hit_percentage\tstart_locations\n")
-				first = False
-				continue
-			linesplt = line.split("\t")
-			if linesplt[2] == "No results":
-				pass
-			ID = linesplt[1]
-			currentIDHits = hits[ID]
-			possibleca = float(len(compiledregex["ca"]))
-			possiblecg = float(len(compiledregex["cg"]))
-			possiblecm = float(len(compiledregex["cm"]))
-			possiblece = float(len(compiledregex["ce"]))
-			cahits = currentIDHits["ca_hits"]
-			cghits = currentIDHits["cg_hits"]
-			cmhits = currentIDHits["cm_hits"]
-			cehits = currentIDHits["ce_hits"]
-			if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene
-				ca1hits = currentIDHits["ca1"]
-				ca2hits = currentIDHits["ca2"]
-				if ca1hits >= ca2hits:
-					o.write(ID + "\tIGA1\t" + str(round_int(ca1hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n")
-				else:
-					o.write(ID + "\tIGA2\t" + str(round_int(ca2hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n")
-			elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene
-				cg1hits = currentIDHits["cg1"]
-				cg2hits = currentIDHits["cg2"]
-				cg3hits = currentIDHits["cg3"]
-				cg4hits = currentIDHits["cg4"]
-				if cg1hits >= cg2hits and cg1hits >= cg3hits and cg1hits >= cg4hits: #cg1 gene
-					o.write(ID + "\tIGG1\t" + str(round_int(cg1hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
-				elif cg2hits >= cg1hits and cg2hits >= cg3hits and cg2hits >= cg4hits: #cg2 gene
-					o.write(ID + "\tIGG2\t" + str(round_int(cg2hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
-				elif cg3hits >= cg1hits and cg3hits >= cg2hits and cg3hits >= cg4hits: #cg3 gene
-					o.write(ID + "\tIGG3\t" + str(round_int(cg3hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
-				else: #cg4 gene
-					o.write(ID + "\tIGG4\t" + str(round_int(cg4hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
-			else: #its a cm or ce gene
-				if cmhits >= cehits:
-					o.write(ID + "\tIGM\t100\t" + str(round_int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n")
-				else:
-					o.write(ID + "\tIGE\t100\t" + str(round_int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n")
-			seq_write_count += 1
-
-print "Time: %i" % (int(time.time() * 1000) - starttime)
-
-print "Number of sequences written to file:", seq_write_count
-
-
-
-
-
--- a/imgt_loader.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-summ.file = args[1]
-aa.file = args[2]
-junction.file = args[3]
-out.file = args[4]
-
-summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T)
-aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T)
-junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T)
-
-fix_column_names = function(df){
-    if("V.DOMAIN.Functionality" %in% names(df)){
-        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"
-        print("found V.DOMAIN.Functionality, changed")
-    }
-    if("V.DOMAIN.Functionality.comment" %in% names(df)){
-        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"
-        print("found V.DOMAIN.Functionality.comment, changed")
-    }
-    return(df)
-}
-
-summ = fix_column_names(summ)
-aa = fix_column_names(aa)
-junction = fix_column_names(junction)
-
-old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation')
-old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT')
-old_junction_columns=c('JUNCTION')
-
-added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence')
-added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT')
-
-added_junction_columns=c('P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION')
-added_junction_columns=c(added_junction_columns, 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')
-
-out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")]
-
-out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"]
-out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"]
-
-out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"]
-out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"]
-
-out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"]
-out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"]
-
-out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"]
-out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"]))
-out[,"Strand"] = summ[,"Orientation"]
-out[,"CDR3.Found.How"] = "a"
-
-out[,added_summary_columns] = summ[,added_summary_columns]
-
-out[,added_sequence_columns] = aa[,added_sequence_columns]
-
-out[,added_junction_columns] = junction[,added_junction_columns]
-
-out[,"Top V Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"]))
-out[,"Top D Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"]))
-out[,"Top J Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"]))
-
-out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')]
-
-names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')
-
-out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"])
-
-fltr = out[,"VDJ Frame"] == "in-frame"
-if(any(fltr, na.rm = T)){
-	out[fltr, "VDJ Frame"] = "In-frame"
-}
-
-fltr = out[,"VDJ Frame"] == "null"
-if(any(fltr, na.rm = T)){
-	out[fltr, "VDJ Frame"] = "Out-of-frame"
-}
-
-fltr = out[,"VDJ Frame"] == "out-of-frame"
-if(any(fltr, na.rm = T)){
-	out[fltr, "VDJ Frame"] = "Out-of-frame"
-}
-
-fltr = out[,"VDJ Frame"] == ""
-if(any(fltr, na.rm = T)){
-	out[fltr, "VDJ Frame"] = "Out-of-frame"
-}
-
-for(col in c('Top V Gene','Top D Gene','Top J Gene')){
-	out[,col] = as.character(out[,col])
-	fltr = out[,col] == ""
-	if(any(fltr, na.rm = T)){
-		out[fltr,col] = "NA"
-	}
-}
-
-write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T)
--- a/merge.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-input.1 = args[1]
-input.2 = args[2]
-
-fields.1 = args[3]
-fields.2 = args[4]
-
-field.1 = args[5]
-field.2 = args[6]
-
-output = args[7]
-
-dat1 = read.table(input.1, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL)
-if(fields.1 != "all"){
-	fields.1 = unlist(strsplit(fields.1, ","))
-	dat1 = dat1[,fields.1]
-}
-dat2 = read.table(input.2, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL)
-if(fields.2 != "all"){
-	fields.2 = unlist(strsplit(fields.2, ","))
-	dat2 = dat2[,fields.2]
-}
-
-dat3 = merge(dat1, dat2, by.x=field.1, by.y=field.2)
-
-write.table(dat3, output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/merge_and_filter.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,304 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-
-summaryfile = args[1]
-sequencesfile = args[2]
-mutationanalysisfile = args[3]
-mutationstatsfile = args[4]
-hotspotsfile = args[5]
-aafile = args[6]
-gene_identification_file= args[7]
-output = args[8]
-before.unique.file = args[9]
-unmatchedfile = args[10]
-method=args[11]
-functionality=args[12]
-unique.type=args[13]
-filter.unique=args[14]
-filter.unique.count=as.numeric(args[15])
-class.filter=args[16]
-empty.region.filter=args[17]
-
-print(paste("filter.unique.count:", filter.unique.count))
-
-summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-
-fix_column_names = function(df){
-    if("V.DOMAIN.Functionality" %in% names(df)){
-        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"
-        print("found V.DOMAIN.Functionality, changed")
-    }
-    if("V.DOMAIN.Functionality.comment" %in% names(df)){
-        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"
-        print("found V.DOMAIN.Functionality.comment, changed")
-    }
-    return(df)
-}
-
-fix_non_unique_ids = function(df){
-	df$Sequence.ID = paste(df$Sequence.ID, 1:nrow(df))
-	return(df)
-}
-
-summ = fix_column_names(summ)
-sequences = fix_column_names(sequences)
-mutationanalysis = fix_column_names(mutationanalysis)
-mutationstats = fix_column_names(mutationstats)
-hotspots = fix_column_names(hotspots)
-AAs = fix_column_names(AAs)
-
-if(method == "blastn"){
-	#"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore"
-	gene_identification = gene_identification[!duplicated(gene_identification$qseqid),]
-	ref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52))
-	gene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T)
-	gene_identification$chunk_hit_percentage = (gene_identification$length / gene_identification$ref.length) * 100
-	gene_identification = gene_identification[,c("qseqid", "chunk_hit_percentage", "pident", "qstart", "sseqid")]
-	colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")
-}
-
-#print("Summary analysis files columns")
-#print(names(summ))
-
-
-
-input.sequence.count = nrow(summ)
-print(paste("Number of sequences in summary file:", input.sequence.count))
-
-filtering.steps = data.frame(character(0), numeric(0))
-
-filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count))
-
-filtering.steps[,1] = as.character(filtering.steps[,1])
-filtering.steps[,2] = as.character(filtering.steps[,2])
-#filtering.steps[,3] = as.numeric(filtering.steps[,3])
-
-#print("summary files columns")
-#print(names(summ))
-
-summ = merge(summ, gene_identification, by="Sequence.ID")
-
-print(paste("Number of sequences after merging with gene identification:", nrow(summ)))
-
-summ = summ[summ$Functionality != "No results",]
-
-print(paste("Number of sequences after 'No results' filter:", nrow(summ)))
-
-filtering.steps = rbind(filtering.steps, c("After 'No results' filter", nrow(summ)))
-
-if(functionality == "productive"){
-	summ = summ[summ$Functionality == "productive (see comment)" | summ$Functionality == "productive",]
-} else if (functionality == "unproductive"){
-	summ = summ[summ$Functionality == "unproductive (see comment)" | summ$Functionality == "unproductive",]
-} else if (functionality == "remove_unknown"){
-	summ = summ[summ$Functionality != "No results" & summ$Functionality != "unknown (see comment)" & summ$Functionality != "unknown",]
-}
-
-print(paste("Number of sequences after functionality filter:", nrow(summ)))
-
-filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
-
-if(F){ #to speed up debugging
-    set.seed(1)
-    summ = summ[sample(nrow(summ), floor(nrow(summ) * 0.03)),]
-    print(paste("Number of sequences after sampling 3%:", nrow(summ)))
-
-    filtering.steps = rbind(filtering.steps, c("Number of sequences after sampling 3%", nrow(summ)))
-}
-
-print("mutation analysis files columns")
-print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
-
-result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
-
-print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
-
-#print("mutation stats files columns")
-#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
-
-result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
-
-print(paste("Number of sequences after merging with mutation stats file:", nrow(result)))
-
-print("hotspots files columns")
-print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
-
-result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")
-
-print(paste("Number of sequences after merging with hotspots file:", nrow(result)))
-
-print("sequences files columns")
-print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT"))
-
-sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")]
-names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
-result = merge(result, sequences, by="Sequence.ID", all.x=T)
-
-AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
-names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
-result = merge(result, AAs, by="Sequence.ID", all.x=T)
-
-print(paste("Number of sequences in result after merging with sequences:", nrow(result)))
-
-result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele)
-result$VGene = gsub("[*].*", "", result$VGene)
-result$DGene = gsub("^Homsap ", "", result$D.GENE.and.allele)
-result$DGene = gsub("[*].*", "", result$DGene)
-result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele)
-result$JGene = gsub("[*].*", "", result$JGene)
-
-splt = strsplit(class.filter, "_")[[1]]
-chunk_hit_threshold = as.numeric(splt[1])
-nt_hit_threshold = as.numeric(splt[2])
-
-higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold)
-
-if(!all(higher_than, na.rm=T)){ #check for no unmatched
-	result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"])
-}
-
-if(class.filter == "101_101"){
-	result$best_match = "all"
-}
-
-write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
-
-print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "", na.rm=T)))
-print(paste("Number of empty FR2 sequences:", sum(result$FR2.IMGT.seq == "", na.rm=T)))
-print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == "", na.rm=T)))
-print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == "", na.rm=T)))
-
-if(empty.region.filter == "leader"){
-	result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-} else if(empty.region.filter == "FR1"){
-	result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-} else if(empty.region.filter == "CDR1"){
-	result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-} else if(empty.region.filter == "FR2"){
-	result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-}
-
-print(paste("After removal sequences that are missing a gene region:", nrow(result)))
-filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result)))
-
-if(empty.region.filter == "leader"){
-	result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
-} else if(empty.region.filter == "FR1"){
-	result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
-} else if(empty.region.filter == "CDR1"){
-	result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
-} else if(empty.region.filter == "FR2"){
-	result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
-}
-
-print(paste("Number of sequences in result after n filtering:", nrow(result)))
-filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result)))
-
-cleanup_columns = c("FR1.IMGT.Nb.of.mutations", 
-                    "CDR1.IMGT.Nb.of.mutations", 
-                    "FR2.IMGT.Nb.of.mutations", 
-                    "CDR2.IMGT.Nb.of.mutations", 
-                    "FR3.IMGT.Nb.of.mutations")
-
-for(col in cleanup_columns){
-  result[,col] = gsub("\\(.*\\)", "", result[,col])
-  result[,col] = as.numeric(result[,col])
-  result[is.na(result[,col]),] = 0
-}
-
-write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T)
-
-
-if(filter.unique != "no"){
-	clmns = names(result)
-	if(filter.unique == "remove_vjaa"){
-		result$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA)
-	} else if(empty.region.filter == "leader"){
-		result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	} else if(empty.region.filter == "FR1"){
-		result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	} else if(empty.region.filter == "CDR1"){
-		result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	} else if(empty.region.filter == "FR2"){
-		result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
-	}
-	
-	if(grepl("remove", filter.unique)){
-		result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
-		unique.defs = data.frame(table(result$unique.def))
-		unique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,]
-		result = result[result$unique.def %in% unique.defs$Var1,]
-	}
-
-	if(filter.unique != "remove_vjaa"){
-		result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it
-	}
-
-	result = result[!duplicated(result$unique.def),]
-}
-
-write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T)
-
-filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
-
-print(paste("Number of sequences in result after unique filtering:", nrow(result)))
-
-if(nrow(summ) == 0){
-	stop("No data remaining after filter")
-}
-
-result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it
-
-#result$past = ""
-#cls = unlist(strsplit(unique.type, ","))
-#for (i in 1:nrow(result)){
-#	result[i,"past"] = paste(result[i,cls], collapse=":")
-#}
-
-
-
-result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":"))
-
-result.matched = result[!grepl("unmatched", result$best_match),]
-result.unmatched = result[grepl("unmatched", result$best_match),]
-
-result = rbind(result.matched, result.unmatched)
-
-result = result[!(duplicated(result$past)), ]
-
-result = result[,!(names(result) %in% c("past", "best_match_class"))]
-
-print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))
-
-filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result)))
-
-unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
-
-print(paste("Number of rows in result:", nrow(result)))
-print(paste("Number of rows in unmatched:", nrow(unmatched)))
-
-matched.sequences = result[!grepl("^unmatched", result$best_match),]
-
-write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
-
-matched.sequences.count = nrow(matched.sequences)
-unmatched.sequences.count = sum(grepl("^unmatched", result$best_match))
-if(matched.sequences.count <= unmatched.sequences.count){
-	print("WARNING NO MATCHED (SUB)CLASS SEQUENCES!!")
-}
-
-filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count))
-filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count))
-filtering.steps[,2] = as.numeric(filtering.steps[,2])
-filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2)
-
-write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\t",quote=F,row.names=F,col.names=F)
-
-write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T)
-write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
--- a/mutation_column_checker.py	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-import re
-
-mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
-
-with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle:
-    first = True
-    fr3_index = -1
-    for i, line in enumerate(file_handle):
-        line_split = line.split("\t")
-        if first:
-            fr3_index = line_split.index("FR3-IMGT")
-            first = False
-            continue
-
-        if len(line_split) < fr3_index:
-            continue
-        
-        fr3_data = line_split[fr3_index]
-        if len(fr3_data) > 5:
-            try:
-                test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x]
-            except:
-                print(line_split[1])
-                print("Something went wrong at line {line} with:".format(line=line_split[0]))
-                #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)])
-        if i % 100000 == 0:
-            print(i)
--- a/naive_output.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-naive.file = args[1]
-shm.file = args[2]
-output.file.ca = args[3]
-output.file.cg = args[4]
-output.file.cm = args[5]
-
-naive = read.table(naive.file, sep="\t", header=T, quote="", fill=T)
-shm.merge = read.table(shm.file, sep="\t", header=T, quote="", fill=T)
-
-
-final = merge(naive, shm.merge[,c("Sequence.ID", "best_match")], by.x="ID", by.y="Sequence.ID")
-print(paste("nrow final:", nrow(final)))
-names(final)[names(final) == "best_match"] = "Sample"
-final.numeric = final[,sapply(final, is.numeric)]
-final.numeric[is.na(final.numeric)] = 0
-final[,sapply(final, is.numeric)] = final.numeric
-
-final.ca = final[grepl("^ca", final$Sample),]
-final.cg = final[grepl("^cg", final$Sample),]
-final.cm = final[grepl("^cm", final$Sample),]
-
-if(nrow(final.ca) > 0){
-	final.ca$Replicate = 1
-}
-
-if(nrow(final.cg) > 0){
-	final.cg$Replicate = 1
-}
-
-if(nrow(final.cm) > 0){
-	final.cm$Replicate = 1
-}
-
-#print(paste("nrow final:", nrow(final)))
-#final2 = final
-#final2$Sample = gsub("[0-9]", "", final2$Sample)
-#final = rbind(final, final2)
-#final$Replicate = 1
-
-write.table(final.ca, output.file.ca, quote=F, sep="\t", row.names=F, col.names=T)
-write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T)
-write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T)
-
--- a/new_imgt.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-args <- commandArgs(trailingOnly = TRUE)
-
-imgt.dir = args[1]
-merged.file = args[2]
-gene = args[3]
-
-merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="")
-
-if(!("Sequence.ID" %in% names(merged))){ #change-o db
-	print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'")
-	names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID"
-}
-
-if(gene != "-"){
-	merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
-}
-
-if("best_match" %in% names(merged)){
-	merged = merged[!grepl("unmatched", merged$best_match),]
-}
-
-nrow_dat = 0
-
-for(f in list.files(imgt.dir, pattern="*.txt$")){
-	#print(paste("filtering", f))
-	path = file.path(imgt.dir, f)
-	dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="")
-	
-	dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
-	
-	nrow_dat = nrow(dat)
-	
-	if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
-		dat[,grepl("^FR1", names(dat))] = 0
-	}
-	
-	write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
-}
-
-print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))
--- a/pattern_plots.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,178 +0,0 @@
-library(ggplot2)
-library(reshape2)
-library(scales)
-
-args <- commandArgs(trailingOnly = TRUE)
-
-input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
-
-plot1.path = args[2]
-plot1.png = paste(plot1.path, ".png", sep="")
-plot1.txt = paste(plot1.path, ".txt", sep="")
-plot1.pdf = paste(plot1.path, ".pdf", sep="")
-
-plot2.path = args[3]
-plot2.png = paste(plot2.path, ".png", sep="")
-plot2.txt = paste(plot2.path, ".txt", sep="")
-plot2.pdf = paste(plot2.path, ".pdf", sep="")
-
-plot3.path = args[4]
-plot3.png = paste(plot3.path, ".png", sep="")
-plot3.txt = paste(plot3.path, ".txt", sep="")
-plot3.pdf = paste(plot3.path, ".pdf", sep="")
-
-clean.output = args[5]
-
-dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
-
-classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
-xyz = c("x", "y", "z")
-new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
-
-names(dat) = new.names
-
-clean.dat = dat
-clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
-
-write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
-
-dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T)
-dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T)
-
-data1 = dat[c("RGYW.WRCY", "TW.WA"),]
-
-data1 = data1[,names(data1)[grepl(".z", names(data1))]]
-names(data1) = gsub("\\..*", "", names(data1))
-
-data1 = melt(t(data1))
-
-names(data1) = c("Class", "Type", "value")
-
-chk = is.na(data1$value)
-if(any(chk)){
-	data1[chk, "value"] = 0
-}
-
-data1 = data1[order(data1$Type),]
-
-write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
-
-p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
-p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
-#p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
-png(filename=plot1.png, width=510, height=300)
-print(p)
-dev.off()
-
-ggsave(plot1.pdf, p)
-
-data2 = dat[c(1, 5:8),]
-
-data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
-names(data2) = gsub(".x", "", names(data2))
-
-data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
-
-data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
-
-data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
-data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
-
-data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
-data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
-data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
-data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
-
-data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
-
-names(data2) = c("Class", "Type", "value")
-
-chk = is.na(data2$value)
-if(any(chk)){
-	data2[chk, "value"] = 0
-}
-
-data2 = data2[order(data2$Type),]
-
-write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
-
-p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
-p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
-#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
-png(filename=plot2.png, width=480, height=300)
-print(p)
-dev.off()
-
-ggsave(plot2.pdf, p)
-
-data3 = dat[c(5, 6, 8, 17:20),]
-data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
-names(data3) = gsub(".x", "", names(data3))
-
-data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
-
-data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
-
-data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
-
-data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
-data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
-
-data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
-data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
-
-data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
-data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
-
-data3 = melt(t(data3[8:10,]))
-names(data3) = c("Class", "Type", "value")
-
-chk = is.na(data3$value)
-if(any(chk)){
-	data3[chk, "value"] = 0
-}
-
-data3 = data3[order(data3$Type),]
-
-write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
-
-p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
-p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
-#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
-png(filename=plot3.png, width=480, height=300)
-print(p)
-dev.off()
-
-ggsave(plot3.pdf, p)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/plot_pdf.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-library(ggplot2)
-
-args <- commandArgs(trailingOnly = TRUE)
-print(args)
-
-input = args[1]
-outputdir = args[2]
-setwd(outputdir)
-
-load(input)
-
-print(names(pdfplots))
-
-for(n in names(pdfplots)){
-    print(paste("n:", n))
-    ggsave(pdfplots[[n]], file=n)
-}
--- a/sequence_overview.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,363 +0,0 @@
-library(reshape2)
-
-args <- commandArgs(trailingOnly = TRUE)
-
-before.unique.file = args[1]
-merged.file = args[2]
-outputdir = args[3]
-gene.classes = unlist(strsplit(args[4], ","))
-hotspot.analysis.sum.file = args[5]
-NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
-NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
-main.html = "index.html"
-empty.region.filter = args[6]
-
-
-setwd(outputdir)
-
-before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
-hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
-
-#before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
-
-if(empty.region.filter == "leader"){
-	before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
-} else if(empty.region.filter == "FR1"){
-	before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
-} else if(empty.region.filter == "CDR1"){
-	before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
-} else if(empty.region.filter == "FR2"){
-	before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
-}
-
-IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
-IDs$best_match = as.character(IDs$best_match)
-
-dat = data.frame(table(before.unique$seq_conc))
-
-names(dat) = c("seq_conc", "Freq")
-
-dat$seq_conc = factor(dat$seq_conc)
-
-dat = dat[order(as.character(dat$seq_conc)),]
-
-#writing html from R...
-get.bg.color = function(val){
-	if(val %in% c("TRUE", "FALSE", "T", "F")){ #if its a logical value, give the background a green/red color
-		return(ifelse(val,"#eafaf1","#f9ebea"))
-	} else if (!is.na(as.numeric(val))) { #if its a numerical value, give it a grey tint if its >0
-		return(ifelse(val > 0,"#eaecee","white"))
-	} else {
-		return("white")
-	}
-}
-td = function(val) {
-  return(paste("<td bgcolor='", get.bg.color(val), "'>", val, "</td>", sep=""))
-}
-tr = function(val) { 
-	return(paste(c("<tr>", sapply(val, td), "</tr>"), collapse="")) 
-}
-
-make.link = function(id, clss, val) { 
-	paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") 
-}
-tbl = function(df) {
-	res = "<table border='1'>"
-	for(i in 1:nrow(df)){ 
-		res = paste(res, tr(df[i,]), sep="")
-	}
-	res = paste(res, "</table>")
-}
-
-cat("<center><img src=''> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences occuring more than once are included. </center>", file=main.html, append=F)
-cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T)
-
-if(empty.region.filter == "leader"){
-	cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
-} else if(empty.region.filter == "FR1"){
-	cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
-} else if(empty.region.filter == "CDR1"){
-	cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
-} else if(empty.region.filter == "FR2"){
-	cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
-}
-
-cat("<tr>", file=main.html, append=T)
-cat("<th>Sequence</th><th>Functionality</th><th>IGA1</th><th>IGA2</th><th>IGG1</th><th>IGG2</th><th>IGG3</th><th>IGG4</th><th>IGM</th><th>IGE</th><th>UN</th>", file=main.html, append=T)
-cat("<th>total IGA</th><th>total IGG</th><th>total IGM</th><th>total IGE</th><th>number of subclasses</th><th>present in both IGA and IGG</th><th>present in IGA, IGG and IGM</th><th>present in IGA, IGG and IGE</th><th>present in IGA, IGG, IGM and IGE</th><th>IGA1+IGA2</th>", file=main.html, append=T)
-cat("<th>IGG1+IGG2</th><th>IGG1+IGG3</th><th>IGG1+IGG4</th><th>IGG2+IGG3</th><th>IGG2+IGG4</th><th>IGG3+IGG4</th>", file=main.html, append=T)
-cat("<th>IGG1+IGG2+IGG3</th><th>IGG2+IGG3+IGG4</th><th>IGG1+IGG2+IGG4</th><th>IGG1+IGG3+IGG4</th><th>IGG1+IGG2+IGG3+IGG4</th>", file=main.html, append=T)
-cat("</tr>", file=main.html, append=T)
-
-
-
-single.sequences=0 #sequence only found once, skipped
-in.multiple=0 #same sequence across multiple subclasses
-multiple.in.one=0 #same sequence multiple times in one subclass
-unmatched=0 #all of the sequences are unmatched
-some.unmatched=0 #one or more sequences in a clone are unmatched
-matched=0 #should be the same als matched sequences
-
-sequence.id.page="by_id.html"
-
-for(i in 1:nrow(dat)){
-	
-	ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA1", IDs$best_match),]
-	ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA2", IDs$best_match),]
-	
-	cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG1", IDs$best_match),]
-	cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG2", IDs$best_match),]
-	cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG3", IDs$best_match),]
-	cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG4", IDs$best_match),]
-	
-	cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGM", IDs$best_match),]
-	
-	ce = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGE", IDs$best_match),]
-	
-	un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),]
-	
-	allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, ce, un)
-	
-	ca1.n = nrow(ca1)
-	ca2.n = nrow(ca2)
-	
-	cg1.n = nrow(cg1)
-	cg2.n = nrow(cg2)
-	cg3.n = nrow(cg3)
-	cg4.n = nrow(cg4)
-	
-	cm.n = nrow(cm)
-	
-	ce.n = nrow(ce)
-	
-	un.n = nrow(un)
-	
-	classes = c(ca1.n, ca2.n, cg1.n, cg2.n, cg3.n, cg4.n, cm.n, ce.n, un.n)
-	
-	classes.sum = sum(classes)
-	
-	if(classes.sum == 1){
-		single.sequences = single.sequences + 1
-		next
-	}
-	
-	if(un.n == classes.sum){
-		unmatched = unmatched + 1
-		next
-	}
-	
-	classes.no.un = classes[-length(classes)]
-	
-	in.classes = sum(classes.no.un > 0)
-	
-	matched = matched + in.classes #count in how many subclasses the sequence occurs.
-	
-	if(any(classes == classes.sum)){
-		multiple.in.one = multiple.in.one + 1
-	} else if (un.n > 0) {
-		some.unmatched = some.unmatched + 1
-	} else {
-		in.multiple = in.multiple + 1
-	}
-	
-	id = as.numeric(dat[i,"seq_conc"])
-	
-	functionality = paste(unique(allc[,"Functionality"]), collapse=",")
-	
-	by.id.row = c()
-	
-	if(ca1.n > 0){
-		cat(tbl(ca1), file=paste("IGA1_", id, ".html", sep=""))
-	}
-
-	if(ca2.n > 0){
-		cat(tbl(ca2), file=paste("IGA2_", id, ".html", sep=""))
-	}
-
-	if(cg1.n > 0){
-		cat(tbl(cg1), file=paste("IGG1_", id, ".html", sep=""))
-	}
-
-	if(cg2.n > 0){
-		cat(tbl(cg2), file=paste("IGG2_", id, ".html", sep=""))
-	}
-
-	if(cg3.n > 0){
-		cat(tbl(cg3), file=paste("IGG3_", id, ".html", sep=""))
-	}
-
-	if(cg4.n > 0){
-		cat(tbl(cg4), file=paste("IGG4_", id, ".html", sep=""))
-	}
-
-	if(cm.n > 0){
-		cat(tbl(cm), file=paste("IGM_", id, ".html", sep=""))
-	}
-
-	if(ce.n > 0){
-		cat(tbl(ce), file=paste("IGE_", id, ".html", sep=""))
-	}
-
-	if(un.n > 0){
-		cat(tbl(un), file=paste("un_", id, ".html", sep=""))
-	}
-	
-	ca1.html = make.link(id, "IGA1", ca1.n)
-	ca2.html = make.link(id, "IGA2", ca2.n)
-	
-	cg1.html = make.link(id, "IGG1", cg1.n)
-	cg2.html = make.link(id, "IGG2", cg2.n)
-	cg3.html = make.link(id, "IGG3", cg3.n)
-	cg4.html = make.link(id, "IGG4", cg4.n)
-	
-	cm.html = make.link(id, "IGM", cm.n)
-	
-	ce.html = make.link(id, "IGE", ce.n)
-	
-	un.html = make.link(id, "un", un.n)
-	
-	#extra columns
-	ca.n = ca1.n + ca2.n
-	
-	cg.n = cg1.n + cg2.n + cg3.n + cg4.n
-	
-	#in.classes
-	
-	in.ca.cg = (ca.n > 0 & cg.n > 0)
-	
-	in.ca.cg.cm = (ca.n > 0 & cg.n > 0 & cm.n > 0)
-	
-	in.ca.cg.ce = (ca.n > 0 & cg.n > 0 & ce.n > 0)
-	
-	in.ca.cg.cm.ce = (ca.n > 0 & cg.n > 0 & cm.n > 0 & ce.n > 0)
-	
-	in.ca1.ca2 = (ca1.n > 0 & ca2.n > 0)
-	
-	in.cg1.cg2 = (cg1.n > 0 & cg2.n > 0)
-	in.cg1.cg3 = (cg1.n > 0 & cg3.n > 0)
-	in.cg1.cg4 = (cg1.n > 0 & cg4.n > 0)
-	in.cg2.cg3 = (cg2.n > 0 & cg3.n > 0)
-	in.cg2.cg4 = (cg2.n > 0 & cg4.n > 0)
-	in.cg3.cg4 = (cg3.n > 0 & cg4.n > 0)
-	
-	in.cg1.cg2.cg3 = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0)
-	in.cg2.cg3.cg4 = (cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
-	in.cg1.cg2.cg4 = (cg1.n > 0 & cg2.n > 0 & cg4.n > 0)
-	in.cg1.cg3.cg4 = (cg1.n > 0 & cg3.n > 0 & cg4.n > 0)
-	
-	in.cg.all = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
-	
-	#rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html)
-	rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, ce.html, un.html)
-	rw = c(rw, ca.n, cg.n, cm.n, ce.n, in.classes, in.ca.cg, in.ca.cg.cm, in.ca.cg.ce, in.ca.cg.cm.ce, in.ca1.ca2, in.cg1.cg2, in.cg1.cg3, in.cg1.cg4, in.cg2.cg3, in.cg2.cg4, in.cg3.cg4, in.cg1.cg2.cg3, in.cg2.cg3.cg4, in.cg1.cg2.cg4, in.cg1.cg3.cg4, in.cg.all)
-	
-	
-
-	cat(tr(rw), file=main.html, append=T)
-	
-	
-	for(i in 1:nrow(allc)){ #generate html by id
-		html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"])
-		cat(paste(html, "<br />"), file=sequence.id.page, append=T)
-	}
-}
-
-cat("</table>", file=main.html, append=T)
-
-print(paste("Single sequences:", single.sequences))
-print(paste("Sequences in multiple subclasses:", in.multiple))
-print(paste("Multiple sequences in one subclass:", multiple.in.one))
-print(paste("Matched with unmatched:", some.unmatched))
-print(paste("Count that should match 'matched' sequences:", matched))
-
-#ACGT overview
-
-#NToverview = merged[!grepl("^unmatched", merged$best_match),]
-NToverview = merged
-
-if(empty.region.filter == "leader"){
-	NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
-} else if(empty.region.filter == "FR1"){
-	NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
-} else if(empty.region.filter == "CDR1"){
-	NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
-} else if(empty.region.filter == "FR2"){
-	NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
-}
-
-NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
-NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
-NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
-NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))
-
-#Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T))
-
-#NToverview = rbind(NToverview, NTsum)
-
-NTresult = data.frame(nt=c("A", "C", "T", "G"))
-
-for(clazz in gene.classes){
-	print(paste("class:", clazz))
-	NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),]
-	print(paste("nrow:", nrow(NToverview.sub)))
-	new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G))
-	new.col.y = sum(new.col.x)
-	new.col.z = round(new.col.x / new.col.y * 100, 2)
-	
-	tmp = names(NTresult)
-	NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
-	names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep=""))
-}
-
-NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")]
-
-names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T")
-
-write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
-
-NToverview = NToverview[!grepl("unmatched", NToverview$best_match),]
-
-new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G))
-new.col.y = sum(new.col.x)
-new.col.z = round(new.col.x / new.col.y * 100, 2)
-
-tmp = names(NTresult)
-NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
-names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep=""))
-
-names(hotspot.analysis.sum) = names(NTresult)
-
-hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult)
-
-write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/shm_clonality.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,144 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
-@font-face
-	{font-family:Tahoma;
-	panose-1:2 11 6 4 3 5 4 4 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-a:link, span.MsoHyperlink
-	{color:blue;
-	text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
-	{color:purple;
-	text-decoration:underline;}
-p
-	{margin-right:0in;
-	margin-left:0in;
-	font-size:12.0pt;
-	font-family:"Times New Roman","serif";}
-p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
-	{mso-style-link:"Balloon Text Char";
-	margin:0in;
-	margin-bottom:.0001pt;
-	font-size:8.0pt;
-	font-family:"Tahoma","sans-serif";}
-p.msochpdefault, li.msochpdefault, div.msochpdefault
-	{mso-style-name:msochpdefault;
-	margin-right:0in;
-	margin-left:0in;
-	font-size:12.0pt;
-	font-family:"Calibri","sans-serif";}
-p.msopapdefault, li.msopapdefault, div.msopapdefault
-	{mso-style-name:msopapdefault;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:12.0pt;
-	font-family:"Times New Roman","serif";}
-span.apple-converted-space
-	{mso-style-name:apple-converted-space;}
-span.BalloonTextChar
-	{mso-style-name:"Balloon Text Char";
-	mso-style-link:"Balloon Text";
-	font-family:"Tahoma","sans-serif";}
-.MsoChpDefault
-	{font-size:10.0pt;
-	font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=WordSection1>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><b><span lang=EN-GB style='color:black'>References</span></b></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><span lang=EN-GB style='color:black'>Gupta,
-Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria,
-Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a
-name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell
-immunoglobulin repertoire sequencing data: Table 1. In<span
-class=apple-converted-space>&nbsp;</span><em>Bioinformatics, 31 (20), pp.
-3356–3358.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><a
-href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span
-lang=EN-GB style='color:#303030'>doi:10.1093/bioinformatics/btv359</span></a><span
-lang=EN-GB style='color:black'>][</span><a
-href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span
-lang=EN-GB style='color:#303030'>Link</span></a><span lang=EN-GB
-style='color:black'>]</span></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><span lang=EN-GB style='color:black'>&nbsp;</span></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><a name="OLE_LINK110"><u><span lang=EN-GB
-style='color:black'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><span lang=EN-GB style='color:black'>In
-these tabs information on the clonal relation of transcripts can be found. To
-calculate clonal relation Change-O is used (Gupta et al, PMID: 26069265).
-Transcripts are considered clonally related if they have maximal three nucleotides
-difference in their CDR3 sequence and the same first V segment (as assigned by
-IMGT). Results are represented in a table format showing the clone size and the
-number of clones or sequences with this clone size. Change-O settings used are
-the </span><span lang=EN-GB>nucleotide hamming distance substitution model with
-a complete distance of maximal three. For clonal assignment the first gene
-segments were used, and the distances were not normalized. In case of
-asymmetric distances, the minimal distance was used.<span style='color:black'> </span></span></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><span lang=EN-GB style='color:black'>&nbsp;</span></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><u><span lang=EN-GB style='color:black'>Overlap
-tab</span></u><span lang=EN-GB style='color:black'> </span></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><span lang=EN-GB style='color:black'>This
-tab gives information on with which (sub)classe(s) each unique analyzed region
-(based on the exact nucleotide sequence of the analyzes region and the CDR3
-nucleotide sequence) is found with. This gives information if the combination
-of the exact same nucleotide sequence of the analyzed region and the CDR3
-sequence can be found in multiple (sub)classes.</span></p>
-
-<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
-text-align:justify;background:white'><span style='color:black'><img src=""> Please note that this tab is based on all
-sequences before filter unique sequences and the remove duplicates based on
-filters are applied. In this table only sequences occuring more than once are
-included. </span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_csr.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-a:link, span.MsoHyperlink
-	{color:blue;
-	text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
-	{color:purple;
-	text-decoration:underline;}
-span.apple-converted-space
-	{mso-style-name:apple-converted-space;}
-.MsoChpDefault
-	{font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
-graphs in this tab give insight into the subclass distribution of IGG and IGA
-transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Human Cµ, C&#945;, C&#947; and C&#949;
-constant genes are assigned using a </span><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script
-specifically designed for human (sub)class assignment in repertoire data as
-described in van Schouwenburg and IJspeert et al, submitted for publication. In
-this script the reference sequences for the subclasses are divided in 8
-nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are
-then individually aligned in the right order to each input sequence. The
-percentage of the chunks identified in each rearrangement is calculated in the
-‘chunk hit percentage’. </span><span lang=EN-GB style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>C&#945; and C&#947;
-subclasses are very homologous and only differ in a few nucleotides. To assign
-subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height:
-115%;font-family:"Times New Roman","serif"'>‘nt hit percentage’ is calculated.
-This percentage indicates how well the chunks covering the subclass specific
-nucleotide match with the different subclasses. </span><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information
-on normal distribution of subclasses in healthy individuals of different ages
-can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a
-name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA
-subclass distribution</span></u></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
-chart showing the relative distribution of IGA1 and IGA2 transcripts in the
-sample.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG
-subclass distribution</span></u></p>
-
-<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
-chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4
-transcripts in the sample.</span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_csr.py	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,501 +0,0 @@
-import argparse
-import logging
-import sys
-import os
-import re
-
-from collections import defaultdict
-
-def main():
-	parser = argparse.ArgumentParser()
-	parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation")
-	parser.add_argument("--genes", help="The genes available in the 'best_match' column")
-	parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=['leader', 'FR1', 'CDR1', 'FR2'])
-	parser.add_argument("--output", help="Output file")
-
-	args = parser.parse_args()
-
-	infile = args.input
-	genes = str(args.genes).split(",")
-	empty_region_filter = args.empty_region_filter
-	outfile = args.output
-
-	genedic = dict()
-
-	mutationdic = dict()
-	mutationMatcher = re.compile("^(.)(\d+).(.),?[ ]?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?")
-	mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?.?([A-Z])?(.*)?")
-	mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
-	mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
-	NAMatchResult = (None, None, None, None, None, None, '')
-	geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes}
-	linecount = 0
-
-	IDIndex = 0
-	best_matchIndex = 0
-	fr1Index = 0
-	cdr1Index = 0
-	fr2Index = 0
-	cdr2Index = 0
-	fr3Index = 0
-	first = True
-	IDlist = []
-	mutationList = []
-	mutationListByID = {}
-	cdr1LengthDic = {}
-	cdr2LengthDic = {}
-
-	fr1LengthDict = {}
-	fr2LengthDict = {}
-	fr3LengthDict = {}
-
-	cdr1LengthIndex = 0
-	cdr2LengthIndex = 0
-
-	fr1SeqIndex = 0
-	fr2SeqIndex = 0
-	fr3SeqIndex = 0
-
-	tandem_sum_by_class = defaultdict(int)
-	expected_tandem_sum_by_class = defaultdict(float)
-
-	with open(infile, 'ru') as i:
-		for line in i:
-			if first:
-				linesplt = line.split("\t")
-				IDIndex = linesplt.index("Sequence.ID")
-				best_matchIndex = linesplt.index("best_match")
-				fr1Index = linesplt.index("FR1.IMGT")
-				cdr1Index = linesplt.index("CDR1.IMGT")
-				fr2Index = linesplt.index("FR2.IMGT")
-				cdr2Index = linesplt.index("CDR2.IMGT")
-				fr3Index = linesplt.index("FR3.IMGT")
-				cdr1LengthIndex = linesplt.index("CDR1.IMGT.seq")
-				cdr2LengthIndex = linesplt.index("CDR2.IMGT.seq")
-				fr1SeqIndex = linesplt.index("FR1.IMGT.seq")
-				fr2SeqIndex = linesplt.index("FR2.IMGT.seq")
-				fr3SeqIndex = linesplt.index("FR3.IMGT.seq")
-				first = False
-				continue
-			linecount += 1
-			linesplt = line.split("\t")
-			ID = linesplt[IDIndex]
-			genedic[ID] = linesplt[best_matchIndex]
-			
-			mutationdic[ID + "_FR1"] = []
-			if len(linesplt[fr1Index]) > 5 and empty_region_filter == "leader":
-				mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x]
-
-			mutationdic[ID + "_CDR1"] = []
-			if len(linesplt[cdr1Index]) > 5 and empty_region_filter in ["leader", "FR1"]:
-				mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
-
-			mutationdic[ID + "_FR2"] = []
-			if len(linesplt[fr2Index]) > 5 and empty_region_filter in ["leader", "FR1", "CDR1"]:
-				mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
-
-			mutationdic[ID + "_CDR2"] = []
-			if len(linesplt[cdr2Index]) > 5:
-				mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
-			
-			mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
-
-			mutationdic[ID + "_FR3"] = []
-			if len(linesplt[fr3Index]) > 5:
-				mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
-				
-			mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
-			mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
-
-			cdr1Length = len(linesplt[cdr1LengthIndex])
-			cdr2Length = len(linesplt[cdr2LengthIndex])
-
-			#print linesplt[fr2SeqIndex]
-			fr1Length = len(linesplt[fr1SeqIndex]) if empty_region_filter == "leader" else 0
-			fr2Length = len(linesplt[fr2SeqIndex]) if empty_region_filter in ["leader", "FR1", "CDR1"] else 0
-			fr3Length = len(linesplt[fr3SeqIndex])
-
-			cdr1LengthDic[ID] = cdr1Length
-			cdr2LengthDic[ID] = cdr2Length
-
-			fr1LengthDict[ID] = fr1Length
-			fr2LengthDict[ID] = fr2Length
-			fr3LengthDict[ID] = fr3Length
-
-			IDlist += [ID]
-	print "len(mutationdic) =", len(mutationdic)
-
-	with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle:
-		for ID, lst in mutationdic.iteritems():
-			for mut in lst:
-				out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in mut])))
-
-	#tandem mutation stuff
-	tandem_frequency = defaultdict(int)
-	mutation_frequency = defaultdict(int)
-	
-	mutations_by_id_dic = {}
-	first = True
-	mutation_by_id_file = os.path.join(os.path.dirname(outfile), "mutation_by_id.txt")
-	with open(mutation_by_id_file, 'r') as mutation_by_id:
-		for l in mutation_by_id:
-			if first:
-				first = False
-				continue
-			splt = l.split("\t")
-			mutations_by_id_dic[splt[0]] = int(splt[1])
-    
-	tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt")
-	with open(tandem_file, 'w') as o:
-		highest_tandem_length = 0
-
-		o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n")
-		for ID in IDlist:
-			mutations = mutationListByID[ID]
-			if len(mutations) == 0:
-				continue
-			last_mut = max(mutations, key=lambda x: int(x[1]))
-
-			last_mut_pos = int(last_mut[1])
-
-			mut_positions = [False] * (last_mut_pos + 1)
-
-			for mutation in mutations:
-				frm, where, to, frmAA, whereAA, toAA, thing = mutation
-				where = int(where)
-				mut_positions[where] = True
-
-			tandem_muts = []
-			tandem_start = -1
-			tandem_length = 0
-			for i in range(len(mut_positions)):
-				if mut_positions[i]:
-					if tandem_start == -1:
-						tandem_start = i
-					tandem_length += 1
-					#print "".join(["1" if x else "0" for x in mut_positions[:i+1]])
-				else:
-					if tandem_length > 1:
-						tandem_muts.append((tandem_start, tandem_length))
-						#print "{0}{1} {2}:{3}".format(" " * (i - tandem_length), "^" * tandem_length, tandem_start, tandem_length)
-					tandem_start = -1
-					tandem_length = 0
-			if tandem_length > 1:  # if the sequence ends with a tandem mutation
-				tandem_muts.append((tandem_start, tandem_length))
-
-			if len(tandem_muts) > 0:
-				if highest_tandem_length < len(tandem_muts):
-					highest_tandem_length = len(tandem_muts)
-
-			region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID]
-			longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0)
-			num_mutations = mutations_by_id_dic[ID] # len(mutations)
-			f_num_mutations = float(num_mutations)
-			num_tandem_muts = len(tandem_muts)
-			expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length)
-			o.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(ID,
-																str(num_mutations),
-																str(num_tandem_muts),
-																str(region_length),
-																str(round(expected_tandem_muts, 2)),
-																str(longest_tandem[1]),
-																str(tandem_muts)))
-			gene = genedic[ID]
-			if gene.find("unmatched") == -1:
-				tandem_sum_by_class[gene] += num_tandem_muts
-				expected_tandem_sum_by_class[gene] += expected_tandem_muts
-
-				tandem_sum_by_class["all"] += num_tandem_muts
-				expected_tandem_sum_by_class["all"] += expected_tandem_muts
-
-				gene = gene[:3]
-				if gene in ["IGA", "IGG"]:
-					tandem_sum_by_class[gene] += num_tandem_muts
-					expected_tandem_sum_by_class[gene] += expected_tandem_muts
-			else:
-				tandem_sum_by_class["unmatched"] += num_tandem_muts
-				expected_tandem_sum_by_class["unmatched"] += expected_tandem_muts
-
-
-			for tandem_mut in tandem_muts:
-				tandem_frequency[str(tandem_mut[1])] += 1
-			#print "\t".join([ID, str(len(tandem_muts)), str(longest_tandem[1]) , str(tandem_muts)])
-
-	tandem_freq_file = os.path.join(os.path.dirname(outfile), "tandem_frequency.txt")
-	with open(tandem_freq_file, 'w') as o:
-		for frq in sorted([int(x) for x in tandem_frequency.keys()]):
-			o.write("{0}\t{1}\n".format(frq, tandem_frequency[str(frq)]))
-
-	tandem_row = []
-	genes_extra = list(genes)
-	genes_extra.append("all")
-	for x, y, in zip([tandem_sum_by_class[x] for x in genes_extra], [expected_tandem_sum_by_class[x] for x in genes_extra]):
-		if y != 0:
-			tandem_row += [x, round(y, 2), round(x / y, 2)]
-		else:
-			tandem_row += [x, round(y, 2), 0]
-
-	tandem_freq_file = os.path.join(os.path.dirname(outfile), "shm_overview_tandem_row.txt")
-	with open(tandem_freq_file, 'w') as o:
-		o.write("Tandems/Expected (ratio),{0}\n".format(",".join([str(x) for x in tandem_row])))
-
-	#print mutationList, linecount
-
-	AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] and i[5] != ";" else 0)[4]) + 1)  # [4] is the position of the AA mutation, None if silent
-	if AALength < 60:
-		AALength = 64
-
-	AA_mutation = [0] * AALength
-	AA_mutation_dic = {"IGA": AA_mutation[:], "IGG": AA_mutation[:], "IGM": AA_mutation[:], "IGE": AA_mutation[:], "unm": AA_mutation[:], "all": AA_mutation[:]}
-	AA_mutation_empty = AA_mutation[:]
-
-	print "AALength:", AALength
-	aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt"
-	with open(aa_mutations_by_id_file, 'w') as o:
-		o.write("ID\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n")
-		for ID in mutationListByID.keys():
-			AA_mutation_for_ID = AA_mutation_empty[:]
-			for mutation in mutationListByID[ID]:
-				if mutation[4] and mutation[5] != ";":
-					AA_mutation_position = int(mutation[4])
-					try:
-						AA_mutation[AA_mutation_position] += 1
-						AA_mutation_for_ID[AA_mutation_position] += 1
-					except Exception as e:
-						print e
-						print mutation
-						sys.exit()
-					clss = genedic[ID][:3]
-					AA_mutation_dic[clss][AA_mutation_position] += 1
-			o.write(ID + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in AA_mutation_for_ID[1:]]) + "\n")
-
-
-
-	#absent AA stuff
-	absentAACDR1Dic = defaultdict(list)
-	absentAACDR1Dic[5] = range(29,36)
-	absentAACDR1Dic[6] = range(29,35)
-	absentAACDR1Dic[7] = range(30,35)
-	absentAACDR1Dic[8] = range(30,34)
-	absentAACDR1Dic[9] = range(31,34)
-	absentAACDR1Dic[10] = range(31,33)
-	absentAACDR1Dic[11] = [32]
-
-	absentAACDR2Dic = defaultdict(list)
-	absentAACDR2Dic[0] = range(55,65)
-	absentAACDR2Dic[1] = range(56,65)
-	absentAACDR2Dic[2] = range(56,64)
-	absentAACDR2Dic[3] = range(57,64)
-	absentAACDR2Dic[4] = range(57,63)
-	absentAACDR2Dic[5] = range(58,63)
-	absentAACDR2Dic[6] = range(58,62)
-	absentAACDR2Dic[7] = range(59,62)
-	absentAACDR2Dic[8] = range(59,61)
-	absentAACDR2Dic[9] = [60]
-
-	absentAA = [len(IDlist)] * (AALength-1)
-	for k, cdr1Length in cdr1LengthDic.iteritems():
-		for c in absentAACDR1Dic[cdr1Length]:
-			absentAA[c] -= 1
-
-	for k, cdr2Length in cdr2LengthDic.iteritems():
-		for c in absentAACDR2Dic[cdr2Length]:
-			absentAA[c] -= 1
-
-
-	aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/absent_aa_id.txt"
-	with open(aa_mutations_by_id_file, 'w') as o:
-		o.write("ID\tcdr1length\tcdr2length\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n")
-		for ID in IDlist:
-			absentAAbyID = [1] * (AALength-1)
-			cdr1Length = cdr1LengthDic[ID]
-			for c in absentAACDR1Dic[cdr1Length]:
-				absentAAbyID[c] -= 1
-
-			cdr2Length = cdr2LengthDic[ID]
-			for c in absentAACDR2Dic[cdr2Length]:
-				absentAAbyID[c] -= 1
-			o.write(ID + "\t" + str(cdr1Length) + "\t" + str(cdr2Length) + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in absentAAbyID]) + "\n")
-
-	if linecount == 0:
-		print "No data, exiting"
-		with open(outfile, 'w') as o:
-			o.write("RGYW (%)," + ("0,0,0\n" * len(genes)))
-			o.write("WRCY (%)," + ("0,0,0\n" * len(genes)))
-			o.write("WA (%)," + ("0,0,0\n" * len(genes)))
-			o.write("TW (%)," + ("0,0,0\n" * len(genes)))
-		import sys
-
-		sys.exit()
-
-	hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)")
-	RGYWCount = {}
-	WRCYCount = {}
-	WACount = {}
-	TWCount = {}
-
-	#IDIndex = 0
-	ataIndex = 0
-	tatIndex = 0
-	aggctatIndex = 0
-	atagcctIndex = 0
-	first = True
-	with open(infile, 'ru') as i:
-		for line in i:
-			if first:
-				linesplt = line.split("\t")
-				ataIndex = linesplt.index("X.a.t.a")
-				tatIndex = linesplt.index("t.a.t.")
-				aggctatIndex = linesplt.index("X.a.g.g.c.t..a.t.")
-				atagcctIndex = linesplt.index("X.a.t..a.g.c.c.t.")
-				first = False
-				continue
-			linesplt = line.split("\t")
-			gene = linesplt[best_matchIndex]
-			ID = linesplt[IDIndex]
-			RGYW = [(int(x), int(y), z) for (x, y, z) in
-					[hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]]
-			WRCY = [(int(x), int(y), z) for (x, y, z) in
-					[hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]]
-			WA = [(int(x), int(y), z) for (x, y, z) in
-				[hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]]
-			TW = [(int(x), int(y), z) for (x, y, z) in
-				[hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
-			RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0
-
-			with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "RGYW.txt"), 'a') as out_handle:
-				for hotspot in RGYW:
-					out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in hotspot])))
-
-			mutationList = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
-			for mutation in mutationList:
-				frm, where, to, AAfrm, AAwhere, AAto, junk = mutation
-				mutation_in_RGYW = any(((start <= int(where) <= end) for (start, end, region) in RGYW))
-				mutation_in_WRCY = any(((start <= int(where) <= end) for (start, end, region) in WRCY))
-				mutation_in_WA = any(((start <= int(where) <= end) for (start, end, region) in WA))
-				mutation_in_TW = any(((start <= int(where) <= end) for (start, end, region) in TW))
-
-				in_how_many_motifs = sum([mutation_in_RGYW, mutation_in_WRCY, mutation_in_WA, mutation_in_TW])
-
-				if in_how_many_motifs > 0:
-					RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs
-					WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs
-					WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs
-					TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs
-			
-			mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt")
-			if not os.path.exists(mutation_by_id_file):
-				with open(mutations_in_motifs_file, 'w') as out_handle:
-					out_handle.write("{0}\n".format("\t".join([
-						"Sequence.ID",
-						"mutation_position",
-						"region",
-						"from_nt",
-						"to_nt",
-						"mutation_position_AA",
-						"from_AA",
-						"to_AA",
-						"motif",
-						"motif_start_nt",
-						"motif_end_nt",
-						"rest"
-					])))
-
-			with open(mutations_in_motifs_file, 'a') as out_handle:
-				motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW}
-				for mutation in mutationList:
-					frm, where, to, AAfrm, AAwhere, AAto, junk = mutation
-					for motif in motif_dic.keys():
-							
-						for start, end, region in motif_dic[motif]:
-							if start <= int(where) <= end:
-								out_handle.write("{0}\n".format(
-									"\t".join([
-										ID,
-										where,
-										region,
-										frm,
-										to,
-										str(AAwhere),
-										str(AAfrm),
-										str(AAto),
-										motif,
-										str(start),
-										str(end),
-										str(junk)
-									])
-								))
-
-
-
-	def mean(lst):
-		return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0
-
-
-	def median(lst):
-		lst = sorted(lst)
-		l = len(lst)
-		if l == 0:
-			return 0
-		if l == 1:
-			return lst[0]
-			
-		l = int(l / 2)
-		
-		if len(lst) % 2 == 0:
-			return float(lst[l] + lst[(l - 1)]) / 2.0
-		else:
-			return lst[l]
-
-	funcs = {"mean": mean, "median": median, "sum": sum}
-
-	directory = outfile[:outfile.rfind("/") + 1]
-	value = 0
-	valuedic = dict()
-
-	for fname in funcs.keys():
-		for gene in genes:
-			with open(directory + gene + "_" + fname + "_value.txt", 'r') as v:
-				valuedic[gene + "_" + fname] = float(v.readlines()[0].rstrip())
-		with open(directory + "all_" + fname + "_value.txt", 'r') as v:
-			valuedic["total_" + fname] = float(v.readlines()[0].rstrip())
-		
-
-	def get_xyz(lst, gene, f, fname):
-		x = round(round(f(lst), 1))
-		y = valuedic[gene + "_" + fname]
-		z = str(round(x / float(y) * 100, 1)) if y != 0 else "0"
-		return (str(x), str(y), z)
-
-	dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount}
-	arr = ["RGYW", "WRCY", "WA", "TW"]
-
-	for fname in funcs.keys():
-		func = funcs[fname]
-		foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt"
-		with open(foutfile, 'w') as o:
-			for typ in arr:
-				o.write(typ + " (%)")
-				curr = dic[typ]
-				for gene in genes:
-					geneMatcher = geneMatchers[gene]
-					if valuedic[gene + "_" + fname] is 0:
-						o.write(",0,0,0")
-					else:
-						x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname)
-						o.write("," + x + "," + y + "," + z)
-				x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname)
-				#x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname)
-				o.write("," + x + "," + y + "," + z + "\n")
-
-
-	# for testing
-	seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt"
-	with open(seq_motif_file, 'w') as o:
-		o.write("ID\tRGYW\tWRCY\tWA\tTW\n")
-		for ID in IDlist:
-			#o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n")
-			o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n")
-
-if __name__ == "__main__":
-	main()
--- a/shm_csr.r	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,561 +0,0 @@
-library(data.table)
-library(ggplot2)
-library(reshape2)
-
-args <- commandArgs(trailingOnly = TRUE)
-
-input = args[1]
-genes = unlist(strsplit(args[2], ","))
-outputdir = args[3]
-empty.region.filter = args[4]
-setwd(outputdir)
-
-#dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
-
-dat = data.frame(fread(input, sep="\t", header=T, stringsAsFactors=F)) #fread because read.table suddenly skips certain rows...
-
-if(length(dat$Sequence.ID) == 0){
-  setwd(outputdir)
-  result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
-  row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)")
-  write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
-  transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
-  row.names(transitionTable) = c("A", "C", "G", "T")
-  transitionTable["A","A"] = NA
-  transitionTable["C","C"] = NA
-  transitionTable["G","G"] = NA
-  transitionTable["T","T"] = NA
-
-  write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
-  cat("0", file="n.txt")
-  stop("No data")
-}
-
-cleanup_columns = c("FR1.IMGT.c.a",
-					"FR2.IMGT.g.t",
-					"CDR1.IMGT.Nb.of.nucleotides",
-					"CDR2.IMGT.t.a",
-					"FR1.IMGT.c.g",
-					"CDR1.IMGT.c.t",
-					"FR2.IMGT.a.c",
-					"FR2.IMGT.Nb.of.mutations",
-					"FR2.IMGT.g.c",
-					"FR2.IMGT.a.g",
-					"FR3.IMGT.t.a",
-					"FR3.IMGT.t.c",
-					"FR2.IMGT.g.a",
-					"FR3.IMGT.c.g",
-					"FR1.IMGT.Nb.of.mutations",
-					"CDR1.IMGT.g.a",
-					"CDR1.IMGT.t.g",
-					"CDR1.IMGT.g.c",
-					"CDR2.IMGT.Nb.of.nucleotides",
-					"FR2.IMGT.a.t",
-					"CDR1.IMGT.Nb.of.mutations",
-					"CDR3.IMGT.Nb.of.nucleotides",
-					"CDR1.IMGT.a.g",
-					"FR3.IMGT.a.c",
-					"FR1.IMGT.g.a",
-					"FR3.IMGT.a.g",
-					"FR1.IMGT.a.t",
-					"CDR2.IMGT.a.g",
-					"CDR2.IMGT.Nb.of.mutations",
-					"CDR2.IMGT.g.t",
-					"CDR2.IMGT.a.c",
-					"CDR1.IMGT.t.c",
-					"FR3.IMGT.g.c",
-					"FR1.IMGT.g.t",
-					"FR3.IMGT.g.t",
-					"CDR1.IMGT.a.t",
-					"FR1.IMGT.a.g",
-					"FR3.IMGT.a.t",
-					"FR3.IMGT.Nb.of.nucleotides",
-					"FR2.IMGT.t.c",
-					"CDR2.IMGT.g.a",
-					"FR2.IMGT.t.a",
-					"CDR1.IMGT.t.a",
-					"FR2.IMGT.t.g",
-					"FR3.IMGT.t.g",
-					"FR2.IMGT.Nb.of.nucleotides",
-					"FR1.IMGT.t.a",
-					"FR1.IMGT.t.g",
-					"FR3.IMGT.c.t",
-					"FR1.IMGT.t.c",
-					"CDR2.IMGT.a.t",
-					"FR2.IMGT.c.t",
-					"CDR1.IMGT.g.t",
-					"CDR2.IMGT.t.g",
-					"FR1.IMGT.Nb.of.nucleotides",
-					"CDR1.IMGT.c.g",
-					"CDR2.IMGT.t.c",
-					"FR3.IMGT.g.a",
-					"CDR1.IMGT.a.c",
-					"FR2.IMGT.c.a",
-					"FR3.IMGT.Nb.of.mutations",
-					"FR2.IMGT.c.g",
-					"CDR2.IMGT.g.c",
-					"FR1.IMGT.g.c",
-					"CDR2.IMGT.c.t",
-					"FR3.IMGT.c.a",
-					"CDR1.IMGT.c.a",
-					"CDR2.IMGT.c.g",
-					"CDR2.IMGT.c.a",
-					"FR1.IMGT.c.t",
-					"FR1.IMGT.Nb.of.silent.mutations",
-					"FR2.IMGT.Nb.of.silent.mutations",
-					"FR3.IMGT.Nb.of.silent.mutations",
-					"FR1.IMGT.Nb.of.nonsilent.mutations",
-					"FR2.IMGT.Nb.of.nonsilent.mutations",
-					"FR3.IMGT.Nb.of.nonsilent.mutations")
-
-print("Cleaning up columns")
-
-for(col in cleanup_columns){
-  dat[,col] = gsub("\\(.*\\)", "", dat[,col])
-  #dat[dat[,col] == "",] = "0"
-  dat[,col] = as.numeric(dat[,col])
-  dat[is.na(dat[,col]),col] = 0
-}
-
-regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3")
-if(empty.region.filter == "FR1") {
-	regions = c("CDR1", "FR2", "CDR2", "FR3")
-} else if (empty.region.filter == "CDR1") {
-	regions = c("FR2", "CDR2", "FR3")
-} else if (empty.region.filter == "FR2") {
-	regions = c("CDR2", "FR3")
-}
-
-pdfplots = list() #save() this later to create the pdf plots in another script (maybe avoids the "address (nil), cause memory not mapped")
-
-sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) }
-
-print("aggregating data into new columns")
-
-VRegionMutations_columns = paste(regions, ".IMGT.Nb.of.mutations", sep="")
-dat$VRegionMutations =  apply(dat, FUN=sum_by_row, 1, columns=VRegionMutations_columns)
-
-VRegionNucleotides_columns = paste(regions, ".IMGT.Nb.of.nucleotides", sep="")
-dat$FR3.IMGT.Nb.of.nucleotides = nchar(dat$FR3.IMGT.seq)
-dat$VRegionNucleotides =  apply(dat, FUN=sum_by_row, 1, columns=VRegionNucleotides_columns)
-
-transitionMutations_columns = paste(rep(regions, each=4), c(".IMGT.a.g", ".IMGT.g.a", ".IMGT.c.t", ".IMGT.t.c"), sep="")
-dat$transitionMutations = apply(dat, FUN=sum_by_row, 1, columns=transitionMutations_columns)
-
-transversionMutations_columns = paste(rep(regions, each=8), c(".IMGT.a.c",".IMGT.c.a",".IMGT.a.t",".IMGT.t.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t",".IMGT.t.g"), sep="")
-dat$transversionMutations = apply(dat, FUN=sum_by_row, 1, columns=transversionMutations_columns)
-
-transitionMutationsAtGC_columns = paste(rep(regions, each=2), c(".IMGT.g.a",".IMGT.c.t"), sep="")
-dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns)
-
-totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.c.g",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.g.a",".IMGT.g.t"), sep="")
-#totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.c.g",".IMGT.g.t"), sep="")
-dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns)
-
-transitionMutationsAtAT_columns = paste(rep(regions, each=2), c(".IMGT.a.g",".IMGT.t.c"), sep="")
-dat$transitionMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtAT_columns)
-
-totalMutationsAtAT_columns = paste(rep(regions, each=6), c(".IMGT.a.g",".IMGT.a.c",".IMGT.a.t",".IMGT.t.g",".IMGT.t.c",".IMGT.t.a"), sep="")
-#totalMutationsAtAT_columns = paste(rep(regions, each=5), c(".IMGT.a.g",".IMGT.t.c",".IMGT.a.c",".IMGT.g.c",".IMGT.t.g"), sep="")
-dat$totalMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtAT_columns)
-
-FRRegions = regions[grepl("FR", regions)]
-CDRRegions = regions[grepl("CDR", regions)]
-
-FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
-dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns)
-
-CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
-dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns)
-
-FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
-dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns)
-
-CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
-dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns)
-
-mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR")
-write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T)
-
-setwd(outputdir)
-
-write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T)
-
-base.order.x = data.frame(base=c("A", "C", "G", "T"), order.x=1:4)
-base.order.y = data.frame(base=c("T", "G", "C", "A"), order.y=1:4)
-
-calculate_result = function(i, gene, dat, matrx, f, fname, name){
-	tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),]
-
-	j = i - 1
-	x = (j * 3) + 1
-	y = (j * 3) + 2
-	z = (j * 3) + 3
-
-	if(nrow(tmp) > 0){
-		if(fname == "sum"){
-			matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
-			matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
-			matrx[1,z] = round(f(matrx[1,x] / matrx[1,y]) * 100, digits=1)
-		} else {
-			matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
-			matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
-			matrx[1,z] = round(f(tmp$VRegionMutations / tmp$VRegionNucleotides) * 100, digits=1)
-		}
-
-		matrx[2,x] = round(f(tmp$transitionMutations, na.rm=T), digits=1)
-		matrx[2,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
-		matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
-
-		matrx[3,x] = round(f(tmp$transversionMutations, na.rm=T), digits=1)
-		matrx[3,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
-		matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
-
-		matrx[4,x] = round(f(tmp$transitionMutationsAtGC, na.rm=T), digits=1)
-		matrx[4,y] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
-		matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
-
-		matrx[5,x] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
-		matrx[5,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
-		matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
-
-		matrx[6,x] = round(f(tmp$transitionMutationsAtAT, na.rm=T), digits=1)
-		matrx[6,y] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
-		matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
-
-		matrx[7,x] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
-		matrx[7,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
-		matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1)
-
-		matrx[8,x] = round(f(tmp$nonSilentMutationsFR, na.rm=T), digits=1)
-		matrx[8,y] = round(f(tmp$silentMutationsFR, na.rm=T), digits=1)
-		matrx[8,z] = round(matrx[8,x] / matrx[8,y], digits=1)
-
-		matrx[9,x] = round(f(tmp$nonSilentMutationsCDR, na.rm=T), digits=1)
-		matrx[9,y] = round(f(tmp$silentMutationsCDR, na.rm=T), digits=1)
-		matrx[9,z] = round(matrx[9,x] / matrx[9,y], digits=1)
-
-		if(fname == "sum"){
-			
-			regions.fr = regions[grepl("FR", regions)]
-			regions.fr = paste(regions.fr, ".IMGT.Nb.of.nucleotides", sep="")
-			regions.cdr = regions[grepl("CDR", regions)]
-			regions.cdr = paste(regions.cdr, ".IMGT.Nb.of.nucleotides", sep="")
-			
-			if(length(regions.fr) > 1){ #in case there is only on FR region (rowSums needs >1 column)
-				matrx[10,x] = round(f(rowSums(tmp[,regions.fr], na.rm=T)), digits=1)
-			} else {
-				matrx[10,x] = round(f(tmp[,regions.fr], na.rm=T), digits=1)
-			}
-			matrx[10,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
-			matrx[10,z] = round(matrx[10,x] / matrx[10,y] * 100, digits=1)
-
-			if(length(regions.cdr) > 1){ #in case there is only on CDR region
-				matrx[11,x] = round(f(rowSums(tmp[,regions.cdr], na.rm=T)), digits=1)
-			} else {
-				matrx[11,x] = round(f(tmp[,regions.cdr], na.rm=T), digits=1)
-			}
-			matrx[11,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
-			matrx[11,z] = round(matrx[11,x] / matrx[11,y] * 100, digits=1)
-		}
-	}
-  
-	transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
-	row.names(transitionTable) = c("A", "C", "G", "T")
-	transitionTable["A","A"] = NA
-	transitionTable["C","C"] = NA
-	transitionTable["G","G"] = NA
-	transitionTable["T","T"] = NA
-
-	if(nrow(tmp) > 0){
-		for(nt1 in nts){
-			for(nt2 in nts){
-				if(nt1 == nt2){
-					next
-				}
-				NT1 = LETTERS[letters == nt1]
-				NT2 = LETTERS[letters == nt2]
-				FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
-				CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
-				FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
-				CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
-				FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
-				if (empty.region.filter == "leader"){
-					transitionTable[NT1,NT2] = sum(tmp[,c(FR1, CDR1, FR2, CDR2, FR3)])
-				} else if (empty.region.filter == "FR1") {
-					transitionTable[NT1,NT2] = sum(tmp[,c(CDR1, FR2, CDR2, FR3)])
-				} else if (empty.region.filter == "CDR1") {
-					transitionTable[NT1,NT2] = sum(tmp[,c(FR2, CDR2, FR3)])
-				} else if (empty.region.filter == "FR2") {
-					transitionTable[NT1,NT2] = sum(tmp[,c(CDR2, FR3)])
-				}
-			}
-		}
-		transition = transitionTable
-		transition$id = names(transition)
-		
-		transition2 = melt(transition, id.vars="id")
-
-		transition2 = merge(transition2, base.order.x, by.x="id", by.y="base")
-
-		transition2 = merge(transition2, base.order.y, by.x="variable", by.y="base")
-
-		transition2[is.na(transition2$value),]$value = 0
-		
-		if(any(transition2$value != 0)){ #having a transition table filled with 0 is bad
-			print("Plotting heatmap and transition")
-			png(filename=paste("transitions_stacked_", name, ".png", sep=""))
-			p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
-			p = p + xlab("From base") + ylab("") + ggtitle("Bargraph transition information") + guides(fill=guide_legend(title=NULL))
-			p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
-			#p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
-			print(p)
-			dev.off()
-			
-			pdfplots[[paste("transitions_stacked_", name, ".pdf", sep="")]] <<- p
-			
-			png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
-			p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
-			p = p + xlab("To base") + ylab("From Base") + ggtitle("Heatmap transition information")  + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
-			print(p)
-			dev.off()
-			
-			pdfplots[[paste("transitions_heatmap_", name, ".pdf", sep="")]] <<- p
-		} else {
-			#print("No data to plot")
-		}
-	}
-
-	#print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep=""))
-	write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
-	write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
-	cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep=""))
-	cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep=""))
-	#print(paste(fname, name, nrow(tmp)))
-	matrx
-}
-nts = c("a", "c", "g", "t")
-zeros=rep(0, 4)
-funcs = c(median, sum, mean)
-fnames = c("median", "sum", "mean")
-
-print("Creating result tables")
-
-for(i in 1:length(funcs)){
-	func = funcs[[i]]
-	fname = fnames[[i]]
-	
-	print(paste("Creating table for", fname))
-	
-	rows = 9
-	if(fname == "sum"){
-		rows = 11
-	}
-	matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows)
-	for(i in 1:length(genes)){
-		matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i])
-	}
-	matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all")
-
-	result = data.frame(matrx)
-	if(fname == "sum"){
-		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
-	} else {
-		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
-	}
-	write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F)
-}
-
-print("Adding median number of mutations to sum table")
-sum.table = read.table("mutations_sum.txt", sep=",", header=F)
-median.table = read.table("mutations_median.txt", sep=",", header=F)
-
-new.table = sum.table[1,]
-new.table[2,] = median.table[1,]
-new.table[3:12,] = sum.table[2:11,]
-new.table[,1] = as.character(new.table[,1])
-new.table[2,1] = "Median of Number of Mutations (%)"
-
-#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
-
-write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F)
-
-print("Plotting IGA piechart")
-
-dat = dat[!grepl("^unmatched", dat$best_match),]
-
-#blegh
-
-genesForPlot = dat[grepl("IGA", dat$best_match),]$best_match
-
-if(length(genesForPlot) > 0){
-	genesForPlot = data.frame(table(genesForPlot))
-	colnames(genesForPlot) = c("Gene","Freq")
-	genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
-
-	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
-	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4"))
-	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
-	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
-	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclass distribution", "( n =", sum(genesForPlot$Freq), ")"))
-	write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
-
-	png(filename="IGA.png")
-	print(pc)
-	dev.off()
-	
-	pdfplots[["IGA.pdf"]] <- pc	
-}
-
-print("Plotting IGG piechart")
-
-genesForPlot = dat[grepl("IGG", dat$best_match),]$best_match
-
-if(length(genesForPlot) > 0){
-	genesForPlot = data.frame(table(genesForPlot))
-	colnames(genesForPlot) = c("Gene","Freq")
-	genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
-
-	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
-	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred"))
-	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
-	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
-	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclass distribution", "( n =", sum(genesForPlot$Freq), ")"))
-	write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
-
-	png(filename="IGG.png")
-	print(pc)
-	dev.off()
-	
-	pdfplots[["IGG.pdf"]] <- pc	
-}
-
-print("Plotting scatterplot")
-
-dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2)
-dat.clss = dat
-
-dat.clss$best_match = substr(dat.clss$best_match, 0, 3)
-
-dat.clss = rbind(dat, dat.clss)
-
-p = ggplot(dat.clss, aes(best_match, percentage_mutations))
-p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
-p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
-p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
-p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
-
-png(filename="scatter.png")
-print(p)
-dev.off()
-
-pdfplots[["scatter.pdf"]] <- p
-
-write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
-
-print("Plotting frequency ranges plot")
-
-dat$best_match_class = substr(dat$best_match, 0, 3)
-freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20")
-dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels)
-
-frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match_class")])
-
-frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match_class", "frequency_bins")])
-
-frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class")
-
-frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
-
-p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
-p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
-p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
-
-png(filename="frequency_ranges.png")
-print(p)
-dev.off()
-
-pdfplots[["frequency_ranges.pdf"]] <- p
-
-save(pdfplots, file="pdfplots.RData")
-
-frequency_bins_data_by_class = frequency_bins_data
-
-frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),]
-
-frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins)
-frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
-frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
-
-write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T)
-
-frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
-
-frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")])
-
-frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match")
-
-frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
-
-frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),]
-frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins)
-frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
-frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
-
-write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/shm_csr.xml	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,240 +0,0 @@
-<tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
-	<description></description>
-	<requirements>
-		<requirement type="package" version="2.7">python</requirement>
-		<requirement type="package" version="1.16.0">numpy</requirement>
-		<requirement type="package" version="1.2.0">xlrd</requirement>
-		<requirement type="package" version="3.0.0">r-ggplot2</requirement>
-		<requirement type="package" version="1.4.3">r-reshape2</requirement>
-		<requirement type="package" version="0.5.0">r-scales</requirement>
-		<requirement type="package" version="3.4_5">r-seqinr</requirement>
-		<requirement type="package" version="1.11.4">r-data.table</requirement>
-	</requirements>
-	<command interpreter="bash">
-		#if str ( $filter_unique.filter_unique_select ) == "remove":
-			wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast
-		#else:
-			wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast
-		#end if
-	</command>
-	<inputs>
-		<param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" />
-		<param name="empty_region_filter" type="select" label="Sequence starts at" help="" >
-			<option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option>
-			<option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option>
-			<option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option>
-			<option value="FR2">FR2: include CDR2,FR3 in filters</option>
-		</param>
-		<param name="functionality" type="select" label="Functionality filter" help="" >
-			<option value="productive" selected="true">Productive (Productive and Productive see comment)</option>
-			<option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option>
-			<option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option>
-		</param>
-		<conditional name="filter_unique">
-			<param name="filter_unique_select" type="select" label="Filter unique sequences" help="See below for an example.">
-				<option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option>
-				<option value="remove_vjaa">Remove uniques (Based on V+J+CDR3 (AA))</option>
-				<option value="keep">Keep uniques (Based on nucleotide sequence + C)</option>
-				<option value="no">No</option>
-			</param>
-			<when value="remove">
-				<param name="filter_unique_clone_count" size="4" type="integer" label="How many sequences should be in a group to keep 1 of them" value="2" min="2"/>
-			</when>
-			<when value="keep"></when>
-			<when value="no"></when>
-		</conditional>
-		<param name="unique" type="select" label="Remove duplicates based on" help="" >
-			<option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
-			<option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
-			<option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
-			<option value="CDR3.IMGT.AA">CDR3 (AA)</option>
-			
-			<option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option>
-			<option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
-			<option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
-			<option value="CDR3.IMGT.seq">CDR3 (nt)</option>
-			<option value="Sequence.ID" selected="true">Don't remove duplicates</option>
-		</param>
-		<conditional name="class_filter_cond">
-			<param name="class_filter" type="select" label="Human Class/Subclass filter" help="" >
-				<option value="70_70" selected="true">>70% class and >70% subclass</option>
-				<option value="60_55">>60% class and >55% subclass</option>
-				<option value="70_0">>70% class</option>
-				<option value="60_0">>60% class</option>
-				<option value="19_0">>19% class</option>
-				<option value="101_101">Do not assign (sub)class</option>
-			</param>
-			<when value="70_70"></when>
-			<when value="60_55"></when>
-			<when value="70_0"></when>
-			<when value="60_0"></when>
-			<when value="19_0"></when>
-			<when value="101_101"></when>
-		</conditional>
-		<conditional name="naive_output_cond">
-			<param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">
-				<option value="yes">Yes</option>
-				<option value="no" selected="true">No</option>
-			</param>
-			<when value="yes"></when>
-			<when value="no"></when>
-		</conditional>
-		<param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" >
-			<option value="yes">Yes</option>
-			<option value="no" selected="true">No</option>
-		</param>
-	</inputs>
-	<outputs>
-		<data format="html" name="out_file" label = "SHM &amp; CSR on ${in_file.name}"/>
-		<data format="imgt_archive" name="naive_output_ca" label = "Filtered IMGT IGA: ${in_file.name}" >
-		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
-		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
-		</data>
-		<data format="imgt_archive" name="naive_output_cg" label = "Filtered IMGT IGG: ${in_file.name}" >
-		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
-		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
-		</data>
-		<data format="imgt_archive" name="naive_output_cm" label = "Filtered IMGT IGM: ${in_file.name}" >
-		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
-		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
-		</data>
-		<data format="imgt_archive" name="naive_output_ce" label = "Filtered IMGT IGE: ${in_file.name}" >
-		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
-		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
-		</data>
-		<data format="imgt_archive" name="naive_output_all" label = "Filtered IMGT all: ${in_file.name}" >
-		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
-		    <filter>class_filter_cond['class_filter'] == "101_101"</filter>
-		</data>
-	</outputs>
-	<tests>
-		<test>
-			<param name="fast" value="yes"/>
-			<output name="out_file" file="test1.html"/>
-		</test>
-	</tests>
-	<help>
-<![CDATA[
-**References**
-
-Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_]
-
-.. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457
-
-Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_]
-
-.. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359
-
------
-
-**Input files**
-
-IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu.
-
-.. class:: infomark
-
-Note: Files can be uploaded by using “get data†and “upload file†and selecting “IMGT archive“ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names.
-
------
-
-**Sequence starts at**
-
-Identifies the region which will be included in the analysis (analysed region)
-
-- Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded. 
-- Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded. 
-- All other filtering/analysis is based on the analysed region.
-
------
-
-**Functionality filter**
-
-Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT. 
-
-**Filter unique sequences**
-
-*Remove unique:*
-
-
-This filter consists of two different steps.
-
-Step 1: removes all sequences of which the nucleotide sequence in the “analysed region†and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
-
-Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class).
-
-.. class:: infomark
-
-This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
-
-*Keep unique:*
-
-Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
-
-Example of the sequences that are included using either the “remove unique filter†or the “keep unique filterâ€
-
-+--------------------------+
-|       unique filter      |
-+--------+--------+--------+
-| values | remove | keep   |
-+--------+--------+--------+
-|   A    |   A    |   A    |
-+--------+--------+--------+
-|   A    |   B    |   B    |
-+--------+--------+--------+
-|   B    |   D    |   C    |
-+--------+--------+--------+
-|   B    |        |   D    |
-+--------+--------+--------+
-|   C    |        |        |
-+--------+--------+--------+
-|   D    |        |        |
-+--------+--------+--------+
-|   D    |        |        |
-+--------+--------+--------+
-
------
- 
-**Remove duplicates based on**
-
-Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen. 
-
-.. class:: infomark
-
-Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results. 
-
------
-
-**Human Class/Subclass filter**
-
-.. class:: warningmark
-
-Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline. 
-
-The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below).
-
-The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage. 
-
-*Chunk hit percentage*: The percentage of the chunks that is aligned 
-
-*Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass. 
-The option “>25% class†can be chosen when you only are interested in the class (Cα/Cγ/Cµ/Cɛ) of  your sequences and the length of your sequence is not long enough to assign the subclasses.
-
------
-
-**Output new IMGT archives per class into your history?**
-
-If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline.  
-
------
-
-**Execute**
-
-Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found.
-
-]]>
-	</help>
-	<citations>
-		<citation type="doi">10.1093/nar/gks457</citation>
-		<citation type="doi">10.1093/bioinformatics/btv359</citation>
-	</citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/.gitattributes	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/.gitignore	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,4 @@
+
+shm_csr\.tar\.gz
+
+\.vscode/settings\.json
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/LICENSE	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 david
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/README.md	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,13 @@
+# SHM CSR
+
+Somatic hypermutation and class switch recombination pipeline.  
+The docker version can be found [here](https://github.com/ErasmusMC-Bioinformatics/ARGalaxy-docker).
+
+# Dependencies
+--------------------
+[Python 2.7](https://www.python.org/)  
+[Change-O](https://changeo.readthedocs.io/en/version-0.4.4/)  
+[Baseline](http://selection.med.yale.edu/baseline/)  
+[R data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf)
+[R ggplot2](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf)
+[R reshape2](https://cran.r-project.org/web/packages/reshape/reshape.pdf)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/aa_histogram.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,69 @@
+library(ggplot2)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+mutations.by.id.file = args[1]
+absent.aa.by.id.file = args[2]
+genes = strsplit(args[3], ",")[[1]]
+genes = c(genes, "")
+outdir = args[4]
+
+
+print("---------------- read input ----------------")
+
+mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="")
+absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
+
+for(gene in genes){
+	graph.title = paste(gene, "AA mutation frequency")
+	if(gene == ""){
+		mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
+		absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
+		
+		graph.title = "AA mutation frequency all"
+	} else {
+		mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
+		absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
+	}
+	print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
+	if(nrow(mutations.by.id.gene) == 0){
+		next
+	}
+
+	mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
+	aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
+
+	dat_freq = mutations.at.position / aa.at.position
+	dat_freq[is.na(dat_freq)] = 0
+	dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
+	
+
+	print("---------------- plot ----------------")
+
+	m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black"))
+	m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i)
+	m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
+	m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
+	m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
+	m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
+	m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
+	m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) 
+	m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
+	#m = m + scale_colour_manual(values=c("black"))
+
+	print("---------------- write/print ----------------")
+
+
+	dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
+
+	write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	
+	png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
+	print(m)
+	dev.off()
+	
+	ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7)
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/Baseline_Functions.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,2287 @@
+#########################################################################################
+# License Agreement
+# 
+# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE 
+# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER 
+# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE 
+# OR COPYRIGHT LAW IS PROHIBITED.
+# 
+# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE 
+# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED 
+# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN 
+# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
+#
+# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
+# Coded by: Mohamed Uduman & Gur Yaari
+# Copyright 2012 Kleinstein Lab
+# Version: 1.3 (01/23/2014)
+#########################################################################################
+
+# Global variables  
+  
+  FILTER_BY_MUTATIONS = 1000
+
+  # Nucleotides
+  NUCLEOTIDES = c("A","C","G","T")
+  
+  # Amino Acids
+  AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G")
+  names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG")
+  names(AMINO_ACIDS) <- names(AMINO_ACIDS)
+
+  #Amino Acid Traits
+  #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y"
+  #B = "Hydrophobic/Burried"  N = "Intermediate/Neutral"  S="Hydrophilic/Surface") 
+  TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N")
+  names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS))
+  TRAITS_AMINO_ACIDS <- array(NA,21)
+  
+  # Codon Table
+  CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12))
+
+  # Substitution Model: Smith DS et al. 1996
+  substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
+  substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
+  substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES))
+  load("FiveS_Substitution.RData")
+
+  # Mutability Models: Shapiro GS et al. 2002
+  triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T)
+  triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T)
+  triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT")
+  load("FiveS_Mutability.RData")
+
+# Functions
+  
+  # Translate codon to amino acid
+  translateCodonToAminoAcid<-function(Codon){
+     return(AMINO_ACIDS[Codon])
+  }
+
+  # Translate amino acid to trait change
+  translateAminoAcidToTraitChange<-function(AminoAcid){
+     return(TRAITS_AMINO_ACIDS[AminoAcid])
+  }
+    
+  # Initialize Amino Acid Trait Changes
+  initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){
+    if(!is.null(traitChangeFileName)){
+      tryCatch(
+          traitChange <- read.delim(traitChangeFileName,sep="\t",header=T)
+          , error = function(ex){
+            cat("Error|Error reading trait changes. Please check file name/path and format.\n")
+            q()
+          }
+        )
+    }else{
+      traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98
+    }
+    TRAITS_AMINO_ACIDS <<- traitChange
+ } 
+  
+  # Read in formatted nucleotide substitution matrix
+  initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){
+    if(!is.null(subsMatFileName)){
+      tryCatch(
+          subsMat <- read.delim(subsMatFileName,sep="\t",header=T)
+          , error = function(ex){
+            cat("Error|Error reading substitution matrix. Please check file name/path and format.\n")
+            q()
+          }
+        )
+      if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x)))
+    }else{
+      if(substitutionModel==1)subsMat <- substitution_Literature_Mouse
+      if(substitutionModel==2)subsMat <- substitution_Flu_Human      
+      if(substitutionModel==3)subsMat <- substitution_Flu25_Human      
+       
+    }
+
+    if(substitutionModel==0){
+      subsMat <- matrix(1,4,4)
+      subsMat[,] = 1/3
+      subsMat[1,1] = 0
+      subsMat[2,2] = 0
+      subsMat[3,3] = 0
+      subsMat[4,4] = 0
+    }
+    
+    
+    NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-")
+    if(substitutionModel==5){
+      subsMat <- FiveS_Substitution
+      return(subsMat)
+    }else{
+      subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4))
+      return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) )
+    }
+  }
+
+   
+  # Read in formatted Mutability file
+  initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){
+    if(!is.null(mutabilityMatFileName)){
+        tryCatch(
+            mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T)
+            , error = function(ex){
+              cat("Error|Error reading mutability matrix. Please check file name/path and format.\n")
+              q()
+            }
+          )
+    }else{
+      mutabilityMat <- triMutability_Literature_Human
+      if(species==2) mutabilityMat <- triMutability_Literature_Mouse
+    }
+
+  if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)}
+  
+    if(mutabilityModel==5){
+      mutabilityMat <- FiveS_Mutability
+      return(mutabilityMat)
+    }else{
+      return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) )
+    }
+  }
+
+  # Read FASTA file formats
+  # Modified from read.fasta from the seqinR package
+  baseline.read.fasta <-
+  function (file = system.file("sequences/sample.fasta", package = "seqinr"), 
+      seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, 
+      set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, 
+      strip.desc = FALSE,  sizeof.longlong = .Machine$sizeof.longlong, 
+      endian = .Platform$endian, apply.mask = TRUE) 
+  {
+      seqtype <- match.arg(seqtype)
+  
+          lines <- readLines(file)
+          
+          if (legacy.mode) {
+              comments <- grep("^;", lines)
+              if (length(comments) > 0) 
+                  lines <- lines[-comments]
+          }
+          
+          
+          ind_groups<-which(substr(lines, 1L, 3L) == ">>>")
+          lines_mod<-lines
+  
+          if(!length(ind_groups)){
+              lines_mod<-c(">>>All sequences combined",lines)            
+          }
+          
+          ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>")
+  
+          lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod)))
+          id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1
+          lines[id] <- "THIS IS A FAKE SEQUENCE"
+          lines[-id] <- lines_mod
+          rm(lines_mod)
+  
+  		ind <- which(substr(lines, 1L, 1L) == ">")
+          nseq <- length(ind)
+          if (nseq == 0) {
+               stop("no line starting with a > character found")
+          }        
+          start <- ind + 1
+          end <- ind - 1
+  
+          while( any(which(ind%in%end)) ){
+            ind=ind[-which(ind%in%end)]
+            nseq <- length(ind)
+            if (nseq == 0) {
+                stop("no line starting with a > character found")
+            }        
+            start <- ind + 1
+            end <- ind - 1        
+          }
+          
+          end <- c(end[-1], length(lines))
+          sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = ""))
+          if (seqonly) 
+              return(sequences)
+          nomseq <- lapply(seq_len(nseq), function(i) {
+          
+              #firstword <- strsplit(lines[ind[i]], " ")[[1]][1]
+              substr(lines[ind[i]], 2, nchar(lines[ind[i]]))
+          
+          })
+          if (seqtype == "DNA") {
+              if (forceDNAtolower) {
+                  sequences <- as.list(tolower(chartr(".","-",sequences)))
+              }else{
+                  sequences <- as.list(toupper(chartr(".","-",sequences)))
+              }
+          }
+          if (as.string == FALSE) 
+              sequences <- lapply(sequences, s2c)
+          if (set.attributes) {
+              for (i in seq_len(nseq)) {
+                  Annot <- lines[ind[i]]
+                  if (strip.desc) 
+                    Annot <- substr(Annot, 2L, nchar(Annot))
+                  attributes(sequences[[i]]) <- list(name = nomseq[[i]], 
+                    Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", 
+                      DNA = "SeqFastadna"))
+              }
+          }
+          names(sequences) <- nomseq
+          return(sequences)
+  }
+
+  
+  # Replaces non FASTA characters in input files with N  
+  replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){
+    gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE)
+  }    
+  
+  # Find the germlines in the FASTA list
+  germlinesInFile <- function(seqIDs){
+    firstChar = sapply(seqIDs,function(x){substr(x,1,1)})
+    secondChar = sapply(seqIDs,function(x){substr(x,2,2)})
+    return(firstChar==">" & secondChar!=">")
+  }
+  
+  # Find the groups in the FASTA list
+  groupsInFile <- function(seqIDs){
+    sapply(seqIDs,function(x){substr(x,1,2)})==">>"
+  }
+
+  # In the process of finding germlines/groups, expand from the start to end of the group
+  expandTillNext <- function(vecPosToID){    
+    IDs = names(vecPosToID)
+    posOfInterests =  which(vecPosToID)
+  
+    expandedID = rep(NA,length(IDs))
+    expandedIDNames = gsub(">","",IDs[posOfInterests])
+    startIndexes = c(1,posOfInterests[-1])
+    stopIndexes = c(posOfInterests[-1]-1,length(IDs))
+    expandedID  = unlist(sapply(1:length(startIndexes),function(i){
+                                    rep(i,stopIndexes[i]-startIndexes[i]+1)
+                                  }))
+    names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){
+                                    rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1)
+                                  }))  
+    return(expandedID)                                                                                                  
+  }
+    
+  # Process FASTA (list) to return a matrix[input, germline)
+  processInputAdvanced <- function(inputFASTA){
+  
+    seqIDs = names(inputFASTA)
+    numbSeqs = length(seqIDs)
+    posGermlines1 = germlinesInFile(seqIDs)
+    numbGermlines = sum(posGermlines1)
+    posGroups1 = groupsInFile(seqIDs)
+    numbGroups = sum(posGroups1)
+    consDef = NA
+    
+    if(numbGermlines==0){
+      posGermlines = 2
+      numbGermlines = 1  
+    }
+  
+      glPositionsSum = cumsum(posGermlines1)
+      glPositions = table(glPositionsSum)
+      #Find the position of the conservation row
+      consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1  
+    if( length(consDefPos)> 0 ){
+      consDefID =  match(consDefPos, glPositionsSum) 
+      #The coservation rows need to be pulled out and stores seperately 
+      consDef =  inputFASTA[consDefID]
+      inputFASTA =  inputFASTA[-consDefID]
+  
+      seqIDs = names(inputFASTA)
+      numbSeqs = length(seqIDs)
+      posGermlines1 = germlinesInFile(seqIDs)
+      numbGermlines = sum(posGermlines1)
+      posGroups1 = groupsInFile(seqIDs)
+      numbGroups = sum(posGroups1)
+      if(numbGermlines==0){
+        posGermlines = 2
+        numbGermlines = 1  
+      }    
+    }
+    
+    posGroups <- expandTillNext(posGroups1)
+    posGermlines <- expandTillNext(posGermlines1)
+    posGermlines[posGroups1] = 0
+    names(posGermlines)[posGroups1] = names(posGroups)[posGroups1]
+    posInput = rep(TRUE,numbSeqs)
+    posInput[posGroups1 | posGermlines1] = FALSE
+    
+    matInput = matrix(NA, nrow=sum(posInput), ncol=2)
+    rownames(matInput) = seqIDs[posInput]
+    colnames(matInput) = c("Input","Germline")
+    
+    vecInputFASTA = unlist(inputFASTA)  
+    matInput[,1] = vecInputFASTA[posInput]
+    matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ]
+    
+    germlines = posGermlines[posInput]
+    groups = posGroups[posInput]
+    
+    return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef ))      
+  }
+
+
+  # Replace leading and trailing dashes in the sequence
+  replaceLeadingTrailingDashes <- function(x,readEnd){
+    iiGap = unlist(gregexpr("-",x[1]))
+    ggGap = unlist(gregexpr("-",x[2]))  
+    #posToChange = intersect(iiGap,ggGap)
+    
+    
+    seqIn = replaceLeadingTrailingDashesHelper(x[1])
+    seqGL = replaceLeadingTrailingDashesHelper(x[2])
+    seqTemplate = rep('N',readEnd)
+    seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd])
+    seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd])
+#    if(posToChange!=-1){
+#      seqIn[posToChange] = "-"
+#      seqGL[posToChange] = "-"
+#    }
+  
+    seqIn = c2s(seqIn[1:readEnd])
+    seqGL = c2s(seqGL[1:readEnd])
+  
+    lenGL = nchar(seqGL)
+    if(lenGL<readEnd){
+      seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="")
+    }
+  
+    lenInput = nchar(seqIn)
+    if(lenInput<readEnd){
+      seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="")
+    }    
+    return( c(seqIn,seqGL) )
+  }  
+
+  replaceLeadingTrailingDashesHelper <- function(x){
+    grepResults = gregexpr("-*",x)
+    grepResultsPos = unlist(grepResults)
+    grepResultsLen =  attr(grepResults[[1]],"match.length")   
+    #print(paste("x = '", x, "'", sep=""))
+    x = s2c(x)
+    if(x[1]=="-"){
+      x[1:grepResultsLen[1]] = "N"      
+    }
+    if(x[length(x)]=="-"){
+      x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N"      
+    }
+    return(x)
+  }
+
+
+
+  
+  # Check sequences for indels
+  checkForInDels <- function(matInputP){
+    insPos <- checkInsertion(matInputP)
+    delPos <- checkDeletions(matInputP)
+    return(list("Insertions"=insPos, "Deletions"=delPos))
+  }
+
+  # Check sequences for insertions
+  checkInsertion <- function(matInputP){
+    insertionCheck = apply( matInputP,1, function(x){
+                                          inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
+                                          glGaps <- as.vector( gregexpr("-",x[2])[[1]] )                                          
+                                          return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) )
+                                        })   
+    return(as.vector(insertionCheck))
+  }
+  # Fix inserstions
+  fixInsertions <- function(matInputP){
+    insPos <- checkInsertion(matInputP)
+    sapply((1:nrow(matInputP))[insPos],function(rowIndex){
+                                                x <- matInputP[rowIndex,]
+                                                inputGaps <- gregexpr("-",x[1])[[1]]
+                                                glGaps <- gregexpr("-",x[2])[[1]]
+                                                posInsertions <- glGaps[!(glGaps%in%inputGaps)]
+                                                inputInsertionToN <- s2c(x[2])
+                                                inputInsertionToN[posInsertions]!="-"
+                                                inputInsertionToN[posInsertions] <- "N"
+                                                inputInsertionToN <- c2s(inputInsertionToN)
+                                                matInput[rowIndex,2] <<- inputInsertionToN 
+                                              })                                                               
+    return(insPos)
+  } 
+    
+  # Check sequences for deletions
+  checkDeletions <-function(matInputP){
+    deletionCheck = apply( matInputP,1, function(x){
+                                          inputGaps <- as.vector( gregexpr("-",x[1])[[1]] )
+                                          glGaps <- as.vector( gregexpr("-",x[2])[[1]] )
+                                          return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) )
+                                      })
+    return(as.vector(deletionCheck))                                      
+  }
+  # Fix sequences with deletions
+  fixDeletions <- function(matInputP){
+    delPos <- checkDeletions(matInputP)    
+    sapply((1:nrow(matInputP))[delPos],function(rowIndex){
+                                                x <- matInputP[rowIndex,]
+                                                inputGaps <- gregexpr("-",x[1])[[1]]
+                                                glGaps <- gregexpr("-",x[2])[[1]]
+                                                posDeletions <- inputGaps[!(inputGaps%in%glGaps)]
+                                                inputDeletionToN <- s2c(x[1])
+                                                inputDeletionToN[posDeletions] <- "N"
+                                                inputDeletionToN <- c2s(inputDeletionToN)
+                                                matInput[rowIndex,1] <<- inputDeletionToN 
+                                              })                                                                   
+    return(delPos)
+  }  
+    
+
+  # Trim DNA sequence to the last codon
+  trimToLastCodon <- function(seqToTrim){
+    seqLen = nchar(seqToTrim)  
+    trimmedSeq = s2c(seqToTrim)
+    poi = seqLen
+    tailLen = 0
+    
+    while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){
+      tailLen = tailLen + 1
+      poi = poi - 1   
+    }
+    
+    trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)])
+    seqLen = nchar(trimmedSeq)
+    # Trim sequence to last codon
+  	if( getCodonPos(seqLen)[3] > seqLen )
+  	  trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) )
+    
+    return(trimmedSeq)
+  }
+  
+  # Given a nuclotide position, returns the pos of the 3 nucs that made the codon
+  # e.g. nuc 86 is part of nucs 85,86,87
+  getCodonPos <- function(nucPos){
+    codonNum =  (ceiling(nucPos/3))*3
+    return( (codonNum-2):codonNum)
+  }
+  
+  # Given a nuclotide position, returns the codon number
+  # e.g. nuc 86  = codon 29
+  getCodonNumb <- function(nucPos){
+    return( ceiling(nucPos/3) )
+  }
+  
+  # Given a codon, returns all the nuc positions that make the codon
+  getCodonNucs <- function(codonNumb){
+    getCodonPos(codonNumb*3)
+  }  
+
+  computeCodonTable <- function(testID=1){
+                  
+    if(testID<=4){    
+      # Pre-compute every codons
+      intCounter = 1
+      for(pOne in NUCLEOTIDES){
+        for(pTwo in NUCLEOTIDES){
+          for(pThree in NUCLEOTIDES){
+            codon = paste(pOne,pTwo,pThree,sep="")
+            colnames(CODON_TABLE)[intCounter] =  codon
+            intCounter = intCounter + 1
+            CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12)))
+          }  
+        }
+      }
+      chars = c("N","A","C","G","T", "-")
+      for(a in chars){
+        for(b in chars){
+          for(c in chars){
+            if(a=="N" | b=="N" | c=="N"){ 
+              #cat(paste(a,b,c),sep="","\n") 
+              CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
+            }
+          }  
+        }
+      }
+      
+      chars = c("-","A","C","G","T")
+      for(a in chars){
+        for(b in chars){
+          for(c in chars){
+            if(a=="-" | b=="-" | c=="-"){ 
+              #cat(paste(a,b,c),sep="","\n") 
+              CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12)
+            }
+          }  
+        }
+      }
+      CODON_TABLE <<- as.matrix(CODON_TABLE)
+    }
+  }
+  
+  collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){
+  #print(length(vecInputSeqs))
+    vecInputSeqs = unique(vecInputSeqs) 
+    if(length(vecInputSeqs)==1){
+      return( list( c(vecInputSeqs,glSeq), F) )
+    }else{
+      charInputSeqs <- sapply(vecInputSeqs, function(x){
+                                              s2c(x)[1:readEnd]
+                                            })
+      charGLSeq <- s2c(glSeq)
+      matClone <- sapply(1:readEnd, function(i){
+                                            posNucs = unique(charInputSeqs[i,])
+                                            posGL = charGLSeq[i]
+                                            error = FALSE                                            
+                                            if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){
+                                              return(c("-",error))
+                                            }
+                                            if(length(posNucs)==1)
+                                              return(c(posNucs[1],error))
+                                            else{
+                                              if("N"%in%posNucs){
+                                                error=TRUE
+                                              }
+                                              if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){
+                                                return( c(posGL,error) )  
+                                              }else{
+                                                #return( c(sample(posNucs[posNucs!="N"],1),error) )  
+                                                if(nonTerminalOnly==0){
+                                                  return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) )  
+                                                }else{
+                                                  posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL]
+                                                  posNucsTable = table(posNucs)
+                                                  if(sum(posNucsTable>1)==0){
+                                                    return( c(posGL,error) )
+                                                  }else{
+                                                    return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) )
+                                                  }
+                                                }
+                                                
+                                              }
+                                            } 
+                                          })
+      
+                                          
+      #print(length(vecInputSeqs))                                        
+      return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,]))
+    }
+  }
+
+  # Compute the expected for each sequence-germline pair
+  getExpectedIndividual <- function(matInput){
+  if( any(grep("multicore",search())) ){ 
+    facGL <- factor(matInput[,2])
+    facLevels = levels(facGL)
+    LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
+                                                      computeMutabilities(facLevels[x])
+                                                    })
+    facIndex = match(facGL,facLevels)
+    
+    LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
+                                                      cInput = rep(NA,nchar(matInput[x,1]))
+                                                      cInput[s2c(matInput[x,1])!="N"] = 1
+                                                      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
+                                                    })
+                                                    
+    LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
+                                                      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+                                                    })
+                                                    
+    LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
+                                                    #print(x)
+                                                    computeMutationTypes(matInput[x,2])
+                                                })
+    
+    LisGLs_Exp = mclapply(1:dim(matInput)[1],  function(x){
+                                                  computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
+                                                })
+    
+    ul_LisGLs_Exp =  unlist(LisGLs_Exp)                                            
+    return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
+  }else{
+    facGL <- factor(matInput[,2])
+    facLevels = levels(facGL)
+    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
+      computeMutabilities(facLevels[x])
+    })
+    facIndex = match(facGL,facLevels)
+    
+    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
+      cInput = rep(NA,nchar(matInput[x,1]))
+      cInput[s2c(matInput[x,1])!="N"] = 1
+      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
+    })
+    
+    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
+      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+    })
+    
+    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
+      #print(x)
+      computeMutationTypes(matInput[x,2])
+    })
+    
+    LisGLs_Exp = lapply(1:dim(matInput)[1],  function(x){
+      computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]])
+    })
+    
+    ul_LisGLs_Exp =  unlist(LisGLs_Exp)                                            
+    return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T))
+    
+  }
+  }
+
+  # Compute mutabilities of sequence based on the tri-nucleotide model
+  computeMutabilities <- function(paramSeq){
+    seqLen = nchar(paramSeq)
+    seqMutabilites = rep(NA,seqLen)
+  
+    gaplessSeq = gsub("-", "", paramSeq)
+    gaplessSeqLen = nchar(gaplessSeq)
+    gaplessSeqMutabilites = rep(NA,gaplessSeqLen)
+    
+    if(mutabilityModel!=5){
+      pos<- 3:(gaplessSeqLen)
+      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
+      gaplessSeqMutabilites[pos] =      
+        tapply( c(
+                                        getMutability( substr(subSeq,1,3), 3) , 
+                                        getMutability( substr(subSeq,2,4), 2), 
+                                        getMutability( substr(subSeq,3,5), 1) 
+                                        ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE
+                                      )
+      #Pos 1
+      subSeq =  substr(gaplessSeq,1,3)
+      gaplessSeqMutabilites[1] =  getMutability(subSeq , 1)
+      #Pos 2
+      subSeq =  substr(gaplessSeq,1,4)
+      gaplessSeqMutabilites[2] =  mean( c(
+                                            getMutability( substr(subSeq,1,3), 2) , 
+                                            getMutability( substr(subSeq,2,4), 1) 
+                                          ),na.rm=T
+                                      ) 
+      seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
+      return(seqMutabilites)
+    }else{
+      
+      pos<- 3:(gaplessSeqLen)
+      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
+      gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T)
+      seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites
+      return(seqMutabilites)
+    }
+
+  }
+
+  # Returns the mutability of a triplet at a given position
+  getMutability <- function(codon, pos=1:3){
+    triplets <- rownames(mutability)
+    mutability[  match(codon,triplets) ,pos]
+  }
+
+  getMutability5 <- function(fivemer){
+    return(mutability[fivemer])
+  }
+
+  # Returns the substitution probabilty
+  getTransistionProb <- function(nuc){
+    substitution[nuc,]
+  }
+
+  getTransistionProb5 <- function(fivemer){    
+    if(any(which(fivemer==colnames(substitution)))){
+      return(substitution[,fivemer])
+    }else{
+      return(array(NA,4))
+    }
+  }
+
+  # Given a nuc, returns the other 3 nucs it can mutate to
+  canMutateTo <- function(nuc){
+    NUCLEOTIDES[- which(NUCLEOTIDES==nuc)]
+  }
+  
+  # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to 
+  canMutateToProb <- function(nuc){
+    substitution[nuc,canMutateTo(nuc)]
+  }
+
+  # Compute targeting, based on precomputed mutatbility & substitution  
+  computeTargeting <- function(param_strSeq,param_vecMutabilities){
+
+    if(substitutionModel!=5){
+      vecSeq = s2c(param_strSeq)
+      matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } )  
+      #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } )
+      dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(length(vecSeq))) 
+      return (matTargeting)
+    }else{
+      
+      seqLen = nchar(param_strSeq)
+      seqsubstitution = matrix(NA,ncol=seqLen,nrow=4)
+      paramSeq <- param_strSeq
+      gaplessSeq = gsub("-", "", paramSeq)
+      gaplessSeqLen = nchar(gaplessSeq)
+      gaplessSeqSubstitution  = matrix(NA,ncol=gaplessSeqLen,nrow=4) 
+      
+      pos<- 3:(gaplessSeqLen)
+      subSeq =  substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2))    
+      gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T)
+      seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution
+      #matTargeting <- param_vecMutabilities  %*% seqsubstitution
+      matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`)
+      dimnames( matTargeting ) =  list(NUCLEOTIDES,1:(seqLen)) 
+      return (matTargeting)      
+    }
+  }  
+
+  # Compute the mutations types   
+  computeMutationTypes <- function(param_strSeq){
+  #cat(param_strSeq,"\n")
+    #vecSeq = trimToLastCodon(param_strSeq)
+    lenSeq = nchar(param_strSeq)
+    vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)})
+    matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F)
+    dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(ncol(matMutationTypes)))
+    return(matMutationTypes)   
+  }  
+  computeMutationTypesFast <- function(param_strSeq){
+    matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F)
+    #dimnames( matMutationTypes ) =  list(NUCLEOTIDES,1:(length(vecSeq)))
+    return(matMutationTypes)   
+  }  
+  mutationTypeOptimized <- function( matOfCodons ){
+   apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) 
+  }  
+
+  # Returns a vector of codons 1 mutation away from the given codon
+  permutateAllCodon <- function(codon){
+    cCodon = s2c(codon)
+    matCodons = t(array(cCodon,dim=c(3,12)))
+    matCodons[1:4,1] = NUCLEOTIDES
+    matCodons[5:8,2] = NUCLEOTIDES
+    matCodons[9:12,3] = NUCLEOTIDES
+    apply(matCodons,1,c2s)
+  }
+
+  # Given two codons, tells you if the mutation is R or S (based on your definition)
+  mutationType <- function(codonFrom,codonTo){
+    if(testID==4){
+      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
+        return(NA)
+      }else{
+        mutationType = "S"
+        if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){
+          mutationType = "R"                                                              
+        }
+        if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
+          mutationType = "Stop"
+        }
+        return(mutationType)
+      }  
+    }else if(testID==5){  
+      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
+        return(NA)
+      }else{
+        if(codonFrom==codonTo){
+          mutationType = "S"
+        }else{
+          codonFrom = s2c(codonFrom)
+          codonTo = s2c(codonTo)  
+          mutationType = "Stop"
+          nucOfI = codonFrom[which(codonTo!=codonFrom)]
+          if(nucOfI=="C"){
+            mutationType = "R"  
+          }else if(nucOfI=="G"){
+            mutationType = "S"
+          }
+        }
+        return(mutationType)
+      }
+    }else{
+      if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){
+        return(NA)
+      }else{
+        mutationType = "S"
+        if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){
+          mutationType = "R"                                                              
+        }
+        if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){
+          mutationType = "Stop"
+        }
+        return(mutationType)
+      }  
+    }    
+  }
+
+  
+  #given a mat of targeting & it's corresponding mutationtypes returns 
+  #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR
+  computeExpected <- function(paramTargeting,paramMutationTypes){
+    # Replacements
+    RPos = which(paramMutationTypes=="R")  
+      #FWR
+      Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T)
+      #CDR
+      Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T)
+    # Silents
+    SPos = which(paramMutationTypes=="S")  
+      #FWR
+      Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T)
+      #CDR
+      Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T)
+  
+      return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR))
+  }
+  
+  # Count the mutations in a sequence
+  # each mutation is treated independently 
+  analyzeMutations2NucUri_website <- function( rev_in_matrix ){
+    paramGL = rev_in_matrix[2,]
+    paramSeq = rev_in_matrix[1,]  
+    
+    #Fill seq with GL seq if gapped
+    #if( any(paramSeq=="-") ){
+    #  gapPos_Seq =  which(paramSeq=="-")
+    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"]
+    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
+    #}
+  
+  
+    #if( any(paramSeq=="N") ){
+    #  gapPos_Seq =  which(paramSeq=="N")
+    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
+    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
+    #}  
+      
+    analyzeMutations2NucUri(  matrix(c( paramGL, paramSeq  ),2,length(paramGL),byrow=T)  )
+    
+  }
+
+  #1 = GL 
+  #2 = Seq
+  analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){
+    paramGL = in_matrix[2,]
+    paramSeq = in_matrix[1,]
+    paramSeqUri = paramGL
+    #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]})
+    mutations_val = paramGL != paramSeq   
+    if(any(mutations_val)){
+      mutationPos = {1:length(mutations_val)}[mutations_val]  
+      mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})]
+      length_mutations =length(mutationPos)
+      mutationInfo = rep(NA,length_mutations)
+      if(any(mutationPos)){  
+
+        pos<- mutationPos
+        pos_array<-array(sapply(pos,getCodonPos))
+        codonGL =  paramGL[pos_array]
+        
+        codonSeq = sapply(pos,function(x){
+                                  seqP = paramGL[getCodonPos(x)]
+                                  muCodonPos = {x-1}%%3+1 
+                                  seqP[muCodonPos] = paramSeq[x]
+                                  return(seqP)
+                                })      
+        GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
+        Seqcodons =   apply(codonSeq,2,c2s)
+        mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
+        names(mutationInfo) = mutationPos
+    }
+    if(any(!is.na(mutationInfo))){
+      return(mutationInfo[!is.na(mutationInfo)])    
+    }else{
+      return(NA)
+    }
+    
+    
+    }else{
+      return (NA)
+    }
+  }
+  
+  processNucMutations2 <- function(mu){
+    if(!is.na(mu)){
+      #R
+      if(any(mu=="R")){
+        Rs = mu[mu=="R"]
+        nucNumbs = as.numeric(names(Rs))
+        R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
+        R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)      
+      }else{
+        R_CDR = 0
+        R_FWR = 0
+      }    
+      
+      #S
+      if(any(mu=="S")){
+        Ss = mu[mu=="S"]
+        nucNumbs = as.numeric(names(Ss))
+        S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T)
+        S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T)      
+      }else{
+        S_CDR = 0
+        S_FWR = 0
+      }    
+      
+      
+      retVec = c(R_CDR,S_CDR,R_FWR,S_FWR)
+      retVec[is.na(retVec)]=0
+      return(retVec)
+    }else{
+      return(rep(0,4))
+    }
+  }        
+  
+  
+  ## Z-score Test
+  computeZScore <- function(mat, test="Focused"){
+    matRes <- matrix(NA,ncol=2,nrow=(nrow(mat)))
+    if(test=="Focused"){
+      #Z_Focused_CDR
+      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
+      P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))})
+      R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
+      R_sd=sqrt(R_mean*(1-P))
+      matRes[,1] = (mat[,1]-R_mean)/R_sd
+    
+      #Z_Focused_FWR
+      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
+      P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))})
+      R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))})
+      R_sd=sqrt(R_mean*(1-P))
+      matRes[,2] = (mat[,3]-R_mean)/R_sd
+    }
+  
+    if(test=="Local"){
+      #Z_Focused_CDR
+      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
+      P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))})
+      R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))})
+      R_sd=sqrt(R_mean*(1-P))
+      matRes[,1] = (mat[,1]-R_mean)/R_sd
+    
+      #Z_Focused_FWR
+      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
+      P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))})
+      R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))})
+      R_sd=sqrt(R_mean*(1-P))
+      matRes[,2] = (mat[,3]-R_mean)/R_sd
+    }
+    
+    if(test=="Imbalanced"){
+      #Z_Focused_CDR
+      #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T )
+      P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))})
+      R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
+      R_sd=sqrt(R_mean*(1-P))
+      matRes[,1] = (mat[,1]-R_mean)/R_sd
+    
+      #Z_Focused_FWR
+      #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T )
+      P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))})
+      R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))})
+      R_sd=sqrt(R_mean*(1-P))
+      matRes[,2] = (mat[,3]-R_mean)/R_sd
+    }    
+      
+    matRes[is.nan(matRes)] = NA
+    return(matRes)
+  }
+
+  # Return a p-value for a z-score
+  z2p <- function(z){
+    p=NA
+    if( !is.nan(z) && !is.na(z)){   
+      if(z>0){
+        p = (1 - pnorm(z,0,1))
+      } else if(z<0){
+        p = (-1 * pnorm(z,0,1))
+      } else{
+        p = 0.5
+      }
+    }else{
+      p = NA
+    }
+    return(p)
+  }    
+  
+  
+  ## Bayesian  Test
+
+  # Fitted parameter for the bayesian framework
+BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986)
+  CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2))
+  
+  # Given x, M & p, returns a pdf 
+  calculate_bayes <- function ( x=3, N=10, p=0.33,
+                                i=CONST_i,
+                                max_sigma=20,length_sigma=4001
+                              ){
+    if(!0%in%N){
+      G <- max(length(x),length(N),length(p))
+      x=array(x,dim=G)
+      N=array(N,dim=G)
+      p=array(p,dim=G)
+      sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma)
+      sigma_1<-log({i/{1-i}}/{p/{1-p}})
+      index<-min(N,60)
+      y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1))
+      if(!sum(is.na(y))){
+        tmp<-approx(sigma_1,y,sigma_s)$y
+        tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}}
+      }else{
+        return(NA)
+      }
+    }else{
+      return(NA)
+    }
+  }  
+  # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection
+  computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){
+    flagOneSeq = F
+    if(nrow(mat)==1){
+      mat=rbind(mat,mat)
+      flagOneSeq = T
+    }
+    if(test=="Focused"){
+      #CDR
+      P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
+      N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0)
+      X = c(mat[,1],0)
+      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesCDR = bayesCDR[-length(bayesCDR)]
+  
+      #FWR
+      P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5)
+      N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0)
+      X = c(mat[,3],0)
+      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesFWR = bayesFWR[-length(bayesFWR)]     
+    }
+    
+    if(test=="Local"){
+      #CDR
+      P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5)
+      N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0)
+      X = c(mat[,1],0)
+      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesCDR = bayesCDR[-length(bayesCDR)]
+  
+      #FWR
+      P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5)
+      N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0)
+      X = c(mat[,3],0)
+      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesFWR = bayesFWR[-length(bayesFWR)]     
+    } 
+     
+    if(test=="Imbalanced"){
+      #CDR
+      P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5)
+      N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
+      X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0)
+      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesCDR = bayesCDR[-length(bayesCDR)]
+  
+      #FWR
+      P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5)
+      N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0)
+      X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0)
+      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesFWR = bayesFWR[-length(bayesFWR)]     
+    }
+
+    if(test=="ImbalancedSilent"){
+      #CDR
+      P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5)
+      N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
+      X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0)
+      bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesCDR = bayesCDR[-length(bayesCDR)]
+  
+      #FWR
+      P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5)
+      N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0)
+      X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0)
+      bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)})    
+      bayesFWR = bayesFWR[-length(bayesFWR)]     
+    }
+        
+    if(flagOneSeq==T){
+      bayesCDR = bayesCDR[1]  
+      bayesFWR = bayesFWR[1]
+    }
+    return( list("CDR"=bayesCDR, "FWR"=bayesFWR) )
+  }
+  
+  ##Covolution
+  break2chunks<-function(G=1000){
+  base<-2^round(log(sqrt(G),2),0)
+  return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base)))
+  }  
+  
+  PowersOfTwo <- function(G=100){
+    exponents <- array()
+    i = 0
+    while(G > 0){
+      i=i+1
+      exponents[i] <- floor( log2(G) )
+      G <- G-2^exponents[i]
+    }
+    return(exponents)
+  }
+  
+  convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){
+    G = ncol(cons)
+    if(G>1){
+      for(gen in log(G,2):1){
+        ll<-seq(from=2,to=2^gen,by=2)
+        sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)})
+      }
+    }
+    return( cons[,1] )
+  }
+  
+  convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){
+    if(length(ncol(cons))) G<-ncol(cons)
+    groups <- PowersOfTwo(G)
+    matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G )
+    startIndex = 1
+    for( i in 1:length(groups) ){
+      stopIndex <- 2^groups[i] + startIndex - 1
+      if(stopIndex!=startIndex){
+        matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma )
+        startIndex = stopIndex + 1
+      }
+      else {
+        if(G>1) matG[,i] <- cons[,startIndex:stopIndex]
+        else matG[,i] <- cons
+        #startIndex = stopIndex + 1
+      }
+    }
+    return( list( matG, groups ) )
+  }
+  
+  weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
+    lx<-length(x)
+    ly<-length(y)
+    if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
+      if(w<1){
+        y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
+        x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
+        lx<-length(x1)
+        ly<-length(y1)
+      }
+      else {
+        y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
+        x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
+        lx<-length(x1)
+        ly<-length(y1)
+      }
+    }
+    else{
+      x1<-x
+      y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
+      ly<-length(y1)
+    }
+    tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
+    tmp[tmp<=0] = 0
+    return(tmp/sum(tmp))
+  }
+  
+  calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
+    matG <- listMatG[[1]]
+    groups <- listMatG[[2]]
+    i = 1
+    resConv <- matG[,i]
+    denom <- 2^groups[i]
+    if(length(groups)>1){
+      while( i<length(groups) ){
+        i = i + 1
+        resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
+        #cat({{2^groups[i]}/denom},"\n")
+        denom <- denom + 2^groups[i]
+      }
+    }
+    return(resConv)
+  }
+  
+  # Given a list of PDFs, returns a convoluted PDF    
+  groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){  
+    listPosteriors = listPosteriors[ !is.na(listPosteriors) ]
+    Length_Postrior<-length(listPosteriors)
+    if(Length_Postrior>1 & Length_Postrior<=Threshold){
+      cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
+      listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
+      y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
+      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
+    }else if(Length_Postrior==1) return(listPosteriors[[1]])
+    else  if(Length_Postrior==0) return(NA)
+    else {
+      cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors))
+      y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma )
+      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
+    }
+  }
+
+  fastConv<-function(cons, max_sigma=20, length_sigma=4001){
+    chunks<-break2chunks(G=ncol(cons))
+    if(ncol(cons)==3) chunks<-2:1
+    index_chunks_end <- cumsum(chunks)
+    index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1)
+    index_chunks <- cbind(index_chunks_start,index_chunks_end)
+    
+    case <- sum(chunks!=chunks[1])
+    if(case==1) End <- max(1,((length(index_chunks)/2)-1))
+    else End <- max(1,((length(index_chunks)/2)))
+    
+    firsts <- sapply(1:End,function(i){
+          	    indexes<-index_chunks[i,1]:index_chunks[i,2]
+          	    convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]]
+          	  })
+    if(case==0){
+    	result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) )
+    }else if(case==1){
+      last<-list(calculate_bayesGHelper(
+      convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] )
+                                      ),0)
+      result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts))
+      result<-calculate_bayesGHelper(
+        list(
+          cbind(
+          result_first,last[[1]]),
+          c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2))
+        )
+      )
+    }
+    return(as.vector(result))
+  }
+    
+  # Computes the 95% CI for a pdf
+  calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
+    if(length(Pdf)!=length_sigma) return(NA)
+    sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
+    cdf = cumsum(Pdf)
+    cdf = cdf/cdf[length(cdf)]  
+    return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) 
+  }
+  
+  # Computes a mean for a pdf
+  calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){
+    if(length(Pdf)!=length_sigma) return(NA)
+    sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma)
+    norm = {length_sigma-1}/2/max_sigma
+    return( (Pdf%*%sigma_s/norm)  ) 
+  }
+  
+  # Returns the mean, and the 95% CI for a pdf
+  calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){
+    if(is.na(Pdf)) 
+     return(rep(NA,3))  
+    bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma)
+    bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma)
+    return(c(bMean, bCI))
+  }   
+
+  # Computes the p-value of a pdf
+  computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){
+    if(length(Pdf)>1){
+      norm = {length_sigma-1}/2/max_sigma
+      pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm
+      if(pVal>0.5){
+        pVal = pVal-1
+      }
+      return(pVal)
+    }else{
+      return(NA)
+    }
+  }    
+  
+  # Compute p-value of two distributions
+  compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
+  #print(c(length(dens1),length(dens2)))
+  if(length(dens1)>1 & length(dens2)>1 ){
+    dens1<-dens1/sum(dens1)
+    dens2<-dens2/sum(dens2)
+    cum2 <- cumsum(dens2)-dens2/2
+    tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
+    #print(tmp)
+    if(tmp>0.5)tmp<-tmp-1
+    return( tmp )
+    }
+    else {
+    return(NA)
+    }
+    #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
+  }  
+  
+  # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations)
+  numberOfSeqsWithMutations <- function(matMutations,test=1){
+    if(test==4)test=2
+    cdrSeqs <- 0
+    fwrSeqs <- 0    
+    if(test==1){#focused
+      cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) })
+      fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) })
+      if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
+      if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) 
+    }
+    if(test==2){#local
+      cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) })
+      fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) })
+      if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0)
+      if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) 
+    }
+  return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs))
+}  
+
+
+
+shadeColor <- function(sigmaVal=NA,pVal=NA){
+  if(is.na(sigmaVal) & is.na(pVal)) return(NA)
+  if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal)
+  if(is.na(pVal) || pVal==1 || pVal==0){
+    returnColor = "#FFFFFF";
+  }else{
+    colVal=abs(pVal);
+    
+    if(sigmaVal<0){      
+        if(colVal>0.1)
+          returnColor = "#CCFFCC";
+        if(colVal<=0.1)
+          returnColor = "#99FF99";
+        if(colVal<=0.050)
+          returnColor = "#66FF66";
+        if(colVal<=0.010)
+          returnColor = "#33FF33";
+        if(colVal<=0.005)
+          returnColor = "#00FF00";
+      
+    }else{
+      if(colVal>0.1)
+        returnColor = "#FFCCCC";
+      if(colVal<=0.1)
+        returnColor = "#FF9999";
+      if(colVal<=0.05)
+        returnColor = "#FF6666";
+      if(colVal<=0.01)
+        returnColor = "#FF3333";
+      if(colVal<0.005)
+        returnColor = "#FF0000";
+    }
+  }
+  
+  return(returnColor)
+}
+
+
+
+plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){
+  if(!log){
+    x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
+    y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
+  }else {
+    if(log==2){
+      x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac
+      y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
+    }
+    if(log==1){
+      x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
+      y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac
+    }
+    if(log==3){
+      x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac)
+      y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac)
+    }
+  }
+  return(c("x"=x,"y"=y))
+}
+
+# SHMulation
+
+  # Based on targeting, introduce a single mutation & then update the targeting 
+  oneMutation <- function(){
+    # Pick a postion + mutation
+    posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting))
+    posNucNumb = ceiling(posMutation/4)                    # Nucleotide number
+    posNucKind = 4 - ( (posNucNumb*4) - posMutation )   # Nuc the position mutates to
+  
+    #mutate the simulation sequence
+    seqSimVec <-  s2c(seqSim)
+    seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind]
+    seqSim <<-  c2s(seqSimVec)
+    
+    #update Mutability, Targeting & MutationsTypes
+    updateMutabilityNTargeting(posNucNumb)
+  
+    #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) 
+    return(posNucNumb)
+  }  
+  
+  updateMutabilityNTargeting <- function(position){
+    min_i<-max((position-2),1)
+    max_i<-min((position+2),nchar(seqSim))
+    min_ii<-min(min_i,3)
+    
+    #mutability - update locally
+    seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)]
+    
+    
+    #targeting - compute locally
+    seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i])                 
+    seqTargeting[is.na(seqTargeting)] <<- 0
+    #mutCodonPos = getCodonPos(position) 
+    mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3])
+    #cat(mutCodonPos,"\n")                                                  
+    mutTypeCodon = getCodonPos(position)
+    seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) 
+    # Stop = 0
+    if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){
+      seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0
+    }
+    
+  
+    #Selection
+    selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R"))  
+    # CDR
+    selectedCDR = selectedPos[which(matCDR[selectedPos]==T)]
+    seqTargeting[selectedCDR] <<-  seqTargeting[selectedCDR] *  exp(selCDR)
+    seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K
+        
+    # FWR
+    selectedFWR = selectedPos[which(matFWR[selectedPos]==T)]
+    seqTargeting[selectedFWR] <<-  seqTargeting[selectedFWR] *  exp(selFWR)
+    seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K      
+    
+  }  
+  
+
+
+  # Validate the mutation: if the mutation has not been sampled before validate it, else discard it.   
+  validateMutation <- function(){  
+    if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation
+      uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1
+      mutatedPositions[uniqueMutationsIntroduced] <<-  mutatedPos  
+    }else{
+      if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation
+        mutatedPositions <<-  mutatedPositions[-which(mutatedPositions==mutatedPos)]
+        uniqueMutationsIntroduced <<-  uniqueMutationsIntroduced - 1
+      }      
+    }
+  }  
+  
+  
+  
+  # Places text (labels) at normalized coordinates 
+  myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){
+    par(xpd=TRUE)
+    if(!log)
+      text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol)
+    else {
+    if(log==2)
+    text(
+      par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,
+      10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
+      w,cex=cex,adj=adj,col=thecol)
+    if(log==1)
+      text(
+      10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
+      par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,
+      w,cex=cex,adj=adj,col=thecol)
+    if(log==3)
+      text(
+      10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac),
+      10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac),
+      w,cex=cex,adj=adj,col=thecol)
+    }
+    par(xpd=FALSE)
+  }
+  
+  
+  
+  # Count the mutations in a sequence
+  analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){
+
+    paramGL = s2c(matInput[inputMatrixIndex,2])
+    paramSeq = s2c(matInput[inputMatrixIndex,1])            
+    
+    #if( any(paramSeq=="N") ){
+    #  gapPos_Seq =  which(paramSeq=="N")
+    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
+    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
+    #}        
+    mutations_val = paramGL != paramSeq   
+    
+    if(any(mutations_val)){
+      mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]  
+      length_mutations =length(mutationPos)
+      mutationInfo = rep(NA,length_mutations)
+                          
+      pos<- mutationPos
+      pos_array<-array(sapply(pos,getCodonPos))
+      codonGL =  paramGL[pos_array]
+      codonSeqWhole =  paramSeq[pos_array]
+      codonSeq = sapply(pos,function(x){
+                                seqP = paramGL[getCodonPos(x)]
+                                muCodonPos = {x-1}%%3+1 
+                                seqP[muCodonPos] = paramSeq[x]
+                                return(seqP)
+                              })
+      GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
+      SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)      
+      Seqcodons =   apply(codonSeq,2,c2s)
+      
+      mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
+      names(mutationInfo) = mutationPos     
+      
+      mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})           
+      names(mutationInfoWhole) = mutationPos
+
+      mutationInfo <- mutationInfo[!is.na(mutationInfo)]
+      mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
+      
+      if(any(!is.na(mutationInfo))){       
+  
+        #Filter based on Stop (at the codon level)
+        if(seqWithStops==1){
+          nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
+          mutationInfo = mutationInfo[nucleotidesAtStopCodons]
+          mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
+        }else{
+          countStops = sum(mutationInfoWhole=="Stop")
+          if(seqWithStops==2 & countStops==0) mutationInfo = NA
+          if(seqWithStops==3 & countStops>0) mutationInfo = NA
+        }         
+        
+        if(any(!is.na(mutationInfo))){
+          #Filter mutations based on multipleMutation
+          if(multipleMutation==1 & !is.na(mutationInfo)){
+            mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
+            tableMutationCodons <- table(mutationCodons)
+            codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
+            if(any(codonsWithMultipleMutations)){
+              #remove the nucleotide mutations in the codons with multiple mutations
+              mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
+              #replace those codons with Ns in the input sequence
+              paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
+              matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
+            }
+          }
+
+          #Filter mutations based on the model
+          if(any(mutationInfo)==T | is.na(any(mutationInfo))){        
+            
+            if(model==1 & !is.na(mutationInfo)){
+              mutationInfo <- mutationInfo[mutationInfo=="S"]
+            }  
+            if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo)
+            else return(NA)
+          }else{
+            return(NA)
+          }
+        }else{
+          return(NA)
+        }
+        
+        
+      }else{
+        return(NA)
+      }
+    
+    
+    }else{
+      return (NA)
+    }    
+  }  
+
+   analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){
+
+    paramGL = s2c(inputArray[2])
+    paramSeq = s2c(inputArray[1])            
+    inputSeq <- inputArray[1]
+    #if( any(paramSeq=="N") ){
+    #  gapPos_Seq =  which(paramSeq=="N")
+    #  gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"]
+    #  paramSeq[gapPos_Seq_ToReplace] =  paramGL[gapPos_Seq_ToReplace]
+    #}        
+    mutations_val = paramGL != paramSeq   
+    
+    if(any(mutations_val)){
+      mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val]  
+      length_mutations =length(mutationPos)
+      mutationInfo = rep(NA,length_mutations)
+                          
+      pos<- mutationPos
+      pos_array<-array(sapply(pos,getCodonPos))
+      codonGL =  paramGL[pos_array]
+      codonSeqWhole =  paramSeq[pos_array]
+      codonSeq = sapply(pos,function(x){
+                                seqP = paramGL[getCodonPos(x)]
+                                muCodonPos = {x-1}%%3+1 
+                                seqP[muCodonPos] = paramSeq[x]
+                                return(seqP)
+                              })
+      GLcodons =  apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s)
+      SeqcodonsWhole =  apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s)      
+      Seqcodons =   apply(codonSeq,2,c2s)
+      
+      mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})     
+      names(mutationInfo) = mutationPos     
+      
+      mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})           
+      names(mutationInfoWhole) = mutationPos
+
+      mutationInfo <- mutationInfo[!is.na(mutationInfo)]
+      mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)]
+      
+      if(any(!is.na(mutationInfo))){       
+  
+        #Filter based on Stop (at the codon level)
+        if(seqWithStops==1){
+          nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"])
+          mutationInfo = mutationInfo[nucleotidesAtStopCodons]
+          mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons]
+        }else{
+          countStops = sum(mutationInfoWhole=="Stop")
+          if(seqWithStops==2 & countStops==0) mutationInfo = NA
+          if(seqWithStops==3 & countStops>0) mutationInfo = NA
+        }         
+        
+        if(any(!is.na(mutationInfo))){
+          #Filter mutations based on multipleMutation
+          if(multipleMutation==1 & !is.na(mutationInfo)){
+            mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole)))
+            tableMutationCodons <- table(mutationCodons)
+            codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1]))
+            if(any(codonsWithMultipleMutations)){
+              #remove the nucleotide mutations in the codons with multiple mutations
+              mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)]
+              #replace those codons with Ns in the input sequence
+              paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N"
+              #matInput[inputMatrixIndex,1] <<- c2s(paramSeq)
+              inputSeq <- c2s(paramSeq)
+            }
+          }
+          
+          #Filter mutations based on the model
+          if(any(mutationInfo)==T | is.na(any(mutationInfo))){        
+            
+            if(model==1 & !is.na(mutationInfo)){
+              mutationInfo <- mutationInfo[mutationInfo=="S"]
+            }  
+            if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq))
+            else return(list(NA,inputSeq))
+          }else{
+            return(list(NA,inputSeq))
+          }
+        }else{
+          return(list(NA,inputSeq))
+        }
+        
+        
+      }else{
+        return(list(NA,inputSeq))
+      }
+    
+    
+    }else{
+      return (list(NA,inputSeq))
+    }    
+  }  
+ 
+  # triMutability Background Count
+  buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
+    
+    #rowOrigMatInput = matInput[inputMatrixIndex,]    
+    seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
+    seqInput = gsub("-", "", matInput[inputMatrixIndex,1])    
+    #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL)
+    tempInput <- cbind(seqInput,seqGL)
+    seqLength = nchar(seqGL)      
+    list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops)
+    mutationCount <- list_analyzeMutationsFixed[[1]]
+    seqInput <- list_analyzeMutationsFixed[[2]]
+    BackgroundMatrix = mutabilityMatrix
+    MutationMatrix = mutabilityMatrix    
+    MutationCountMatrix = mutabilityMatrix    
+    if(!is.na(mutationCount)){
+      if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ 
+                  
+        fivermerStartPos = 1:(seqLength-4)
+        fivemerLength <- length(fivermerStartPos)
+        fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
+        fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4))
+    
+        #Background
+        for(fivemerIndex in 1:fivemerLength){
+          fivemer = fivemerGL[fivemerIndex]
+          if(!any(grep("N",fivemer))){
+            fivemerCodonPos = fivemerCodon(fivemerIndex)
+            fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) 
+            fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3])          
+            
+            # All mutations model
+            #if(!any(grep("N",fivemerReadingFrameCodon))){
+              if(model==0){
+                if(stopMutations==0){
+                  if(!any(grep("N",fivemerReadingFrameCodonInputSeq)))
+                    BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1)              
+                }else{
+                  if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){
+                    positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1]
+                    BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon])
+                  }
+                }
+              }else{ # Only silent mutations
+                if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){
+                  positionWithinCodon = which(fivemerCodonPos==3)
+                  BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon])
+                }
+              }
+            #}
+          }
+        }
+        
+        #Mutations
+        if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
+        if(model==1) mutationCount = mutationCount[mutationCount=="S"]  
+        mutationPositions = as.numeric(names(mutationCount))
+        mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
+        mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
+        countMutations = 0 
+        for(mutationPosition in mutationPositions){
+          fivemerIndex = mutationPosition-2
+          fivemer = fivemerSeq[fivemerIndex]
+          GLfivemer = fivemerGL[fivemerIndex]
+          fivemerCodonPos = fivemerCodon(fivemerIndex)
+          fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) 
+          fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3])
+          if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){
+            if(model==0){
+                countMutations = countMutations + 1              
+                MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1)
+                MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)             
+            }else{
+              if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){
+                  countMutations = countMutations + 1
+                  positionWithinCodon = which(fivemerCodonPos==3)
+                  glNuc =  substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
+                  inputNuc =  substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
+                  MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc])
+                  MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1)                                    
+              }                
+            }                  
+          }              
+        }
+        
+        seqMutability = MutationMatrix/BackgroundMatrix
+        seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
+        #cat(inputMatrixIndex,"\t",countMutations,"\n")
+        return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))      
+        
+      }        
+    }
+  
+  }  
+  
+  #Returns the codon position containing the middle nucleotide
+  fivemerCodon <- function(fivemerIndex){
+    codonPos = list(2:4,1:3,3:5)
+    fivemerType = fivemerIndex%%3
+    return(codonPos[[fivemerType+1]])
+  }
+
+  #returns probability values for one mutation in codons resulting in R, S or Stop
+  probMutations <- function(typeOfMutation){    
+    matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3)))   
+    for(codon in rownames(matMutationProb)){
+        if( !any(grep("N",codon)) ){
+        for(muPos in 1:3){
+          matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T)
+          glNuc = matCodon[1,muPos]
+          matCodon[,muPos] = canMutateTo(glNuc) 
+          substitutionRate = substitution[glNuc,matCodon[,muPos]]
+          typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))})        
+          matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation])
+        }
+      }
+    }
+    
+    return(matMutationProb) 
+  }
+  
+  
+  
+  
+#Mapping Trinucleotides to fivemers
+mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){
+  rownames(triMutability) <- triMutability_Names
+  Fivemer<-rep(NA,1024)
+  names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5)
+  Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE)))
+  Fivemer<-Fivemer/sum(Fivemer)
+  return(Fivemer)
+}
+
+collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
+  Indices<-substring(names(Fivemer),3,3)==NUC
+  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
+  tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE))
+}
+
+
+
+CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){
+  Indices<-substring(names(Fivemer),3,3)==NUC
+  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
+  tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE))
+}
+
+#Uses the real counts of the mutated fivemers
+CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){
+  Indices<-substring(names(Fivemer),3,3)==NUC
+  Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position))
+  tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE))
+}
+
+bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){
+N<-sum(x)
+if(N){
+p<-x/N
+k<-length(x)-1
+tmp<-rmultinom(M, size = N, prob=p)
+tmp_p<-apply(tmp,2,function(y)y/N)
+(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k))))
+}
+else return(matrix(0,2,length(x)))
+}
+
+
+
+
+bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){
+
+N<-sum(x)
+k<-length(x)
+y<-rep(1:k,x)
+tmp<-sapply(1:M,function(i)sample(y,n))
+if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n
+if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n
+(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1)))))
+}
+
+
+
+p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){
+n=sum(x_obs)
+N<-sum(x)
+k<-length(x)
+y<-rep(1:k,x)
+tmp<-sapply(1:M,function(i)sample(y,n))
+if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))
+if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))
+tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M),
+sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M))
+sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])})
+}
+
+#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA"
+# Remove SNPs from IMGT germline segment alleles
+generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){
+  repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
+  alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2])
+  SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){
+    Indices<-NULL
+    for(i in 1:length(x)){
+      firstSeq = s2c(x[[1]])
+      iSeq = s2c(x[[i]])
+      Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="."  ))
+    }
+    return(sort(unique(Indices)))
+  })
+ repertoireOut <- repertoireIn
+ repertoireOut <- lapply(names(repertoireOut), function(repertoireName){
+                                        alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2]
+                                        geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1]
+                                        alleleSeq <- s2c(repertoireOut[[repertoireName]])
+                                        alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N"
+                                        alleleSeq <- c2s(alleleSeq)
+                                        repertoireOut[[repertoireName]] <- alleleSeq
+                                      })
+  names(repertoireOut) <- names(repertoireIn)
+  write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile)                                               
+                                      
+}
+
+
+
+
+
+
+############
+groupBayes2 = function(indexes, param_resultMat){
+  
+  BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])}))
+  BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])}))
+  #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])}))
+  #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])}))
+  #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])}))
+  #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])}))
+  return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR,
+                "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) )
+                #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR,
+                #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR))
+#                "BayesGDist_Global_CDR" = BayesGDist_Global_CDR,
+#                "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) )
+
+
+}
+
+
+calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){
+  G <- max(length(x),length(N),length(p))
+  x=array(x,dim=G)
+  N=array(N,dim=G)
+  p=array(p,dim=G)
+
+  indexOfZero = N>0 & p>0
+  N = N[indexOfZero]
+  x = x[indexOfZero]
+  p = p[indexOfZero]  
+  G <- length(x)
+  
+  if(G){
+    
+    cons<-array( dim=c(length_sigma,G) )
+    if(G==1) {
+    return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma))
+    }
+    else {
+      for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma)
+      listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma)
+      y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma)
+      return( y/sum(y)/(2*max_sigma/(length_sigma-1)) )
+    }
+  }else{
+    return(NA)
+  }
+}
+
+
+calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){
+  matG <- listMatG[[1]]  
+  groups <- listMatG[[2]]
+  i = 1  
+  resConv <- matG[,i]
+  denom <- 2^groups[i]
+  if(length(groups)>1){
+    while( i<length(groups) ){
+      i = i + 1
+      resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma)
+      #cat({{2^groups[i]}/denom},"\n")
+      denom <- denom + 2^groups[i]
+    }
+  }
+  return(resConv)  
+}
+
+weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){
+lx<-length(x)
+ly<-length(y)
+if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){
+if(w<1){
+y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y
+x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y
+lx<-length(x1)
+ly<-length(y1)
+}
+else {
+y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y
+x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y
+lx<-length(x1)
+ly<-length(y1)
+}
+}
+else{
+x1<-x
+y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y
+ly<-length(y1)
+}
+tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y
+tmp[tmp<=0] = 0 
+return(tmp/sum(tmp))
+}
+
+########################
+
+
+
+
+mutabilityMatrixONE<-rep(0,4)
+names(mutabilityMatrixONE)<-NUCLEOTIDES
+
+  # triMutability Background Count
+  buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){
+    
+    #rowOrigMatInput = matInput[inputMatrixIndex,]    
+    seqGL =  gsub("-", "", matInput[inputMatrixIndex,2])
+    seqInput = gsub("-", "", matInput[inputMatrixIndex,1])    
+    matInput[inputMatrixIndex,] <<- c(seqInput,seqGL)
+    seqLength = nchar(seqGL)      
+    mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops)
+    BackgroundMatrix = mutabilityMatrixONE
+    MutationMatrix = mutabilityMatrixONE    
+    MutationCountMatrix = mutabilityMatrixONE    
+    if(!is.na(mutationCount)){
+      if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ 
+                  
+#         ONEmerStartPos = 1:(seqLength)
+#         ONEmerLength <- length(ONEmerStartPos)
+        ONEmerGL <- s2c(seqGL)
+        ONEmerSeq <- s2c(seqInput)
+    
+        #Background
+        for(ONEmerIndex in 1:seqLength){
+          ONEmer = ONEmerGL[ONEmerIndex]
+          if(ONEmer!="N"){
+            ONEmerCodonPos = getCodonPos(ONEmerIndex)
+            ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) 
+            ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] )         
+            
+            # All mutations model
+            #if(!any(grep("N",ONEmerReadingFrameCodon))){
+              if(model==0){
+                if(stopMutations==0){
+                  if(!any(grep("N",ONEmerReadingFrameCodonInputSeq)))
+                    BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1)              
+                }else{
+                  if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){
+                    positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1]
+                    BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon])
+                  }
+                }
+              }else{ # Only silent mutations
+                if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){
+                  positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
+                  BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon])
+                }
+              }
+            }
+          }
+        }
+        
+        #Mutations
+        if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"]
+        if(model==1) mutationCount = mutationCount[mutationCount=="S"]  
+        mutationPositions = as.numeric(names(mutationCount))
+        mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)]
+        mutationPositions =  mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)]
+        countMutations = 0 
+        for(mutationPosition in mutationPositions){
+          ONEmerIndex = mutationPosition
+          ONEmer = ONEmerSeq[ONEmerIndex]
+          GLONEmer = ONEmerGL[ONEmerIndex]
+          ONEmerCodonPos = getCodonPos(ONEmerIndex)
+          ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos])  
+          ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos])  
+          if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){
+            if(model==0){
+                countMutations = countMutations + 1              
+                MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1)
+                MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)             
+            }else{
+              if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){
+                  countMutations = countMutations + 1
+                  positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)
+                  glNuc =  substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon)
+                  inputNuc =  substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon)
+                  MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc])
+                  MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1)                                    
+              }                
+            }                  
+          }              
+        }
+        
+        seqMutability = MutationMatrix/BackgroundMatrix
+        seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE)
+        #cat(inputMatrixIndex,"\t",countMutations,"\n")
+        return(list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix))      
+#         tmp<-list("seqMutability"  = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix)
+      }        
+    }
+  
+################
+# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $
+
+trim <- function(s, recode.factor=TRUE, ...)
+  UseMethod("trim", s)
+
+trim.default <- function(s, recode.factor=TRUE, ...)
+  s
+
+trim.character <- function(s, recode.factor=TRUE, ...)
+{
+  s <- sub(pattern="^ +", replacement="", x=s)
+  s <- sub(pattern=" +$", replacement="", x=s)
+  s
+}
+
+trim.factor <- function(s, recode.factor=TRUE, ...)
+{
+  levels(s) <- trim(levels(s))
+  if(recode.factor) {
+    dots <- list(x=s, ...)
+    if(is.null(dots$sort)) dots$sort <- sort
+    s <- do.call(what=reorder.factor, args=dots)
+  }
+  s
+}
+
+trim.list <- function(s, recode.factor=TRUE, ...)
+  lapply(s, trim, recode.factor=recode.factor, ...)
+
+trim.data.frame <- function(s, recode.factor=TRUE, ...)
+{
+  s[] <- trim.list(s, recode.factor=recode.factor, ...)
+  s
+}
+#######################################
+# Compute the expected for each sequence-germline pair by codon 
+getExpectedIndividualByCodon <- function(matInput){    
+if( any(grep("multicore",search())) ){  
+  facGL <- factor(matInput[,2])
+  facLevels = levels(facGL)
+  LisGLs_MutabilityU = mclapply(1:length(facLevels),  function(x){
+    computeMutabilities(facLevels[x])
+  })
+  facIndex = match(facGL,facLevels)
+  
+  LisGLs_Mutability = mclapply(1:nrow(matInput),  function(x){
+    cInput = rep(NA,nchar(matInput[x,1]))
+    cInput[s2c(matInput[x,1])!="N"] = 1
+    LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
+  })
+  
+  LisGLs_Targeting =  mclapply(1:dim(matInput)[1],  function(x){
+    computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+  })
+  
+  LisGLs_MutationTypes  = mclapply(1:length(matInput[,2]),function(x){
+    #print(x)
+    computeMutationTypes(matInput[x,2])
+  })
+  
+  LisGLs_R_Exp = mclapply(1:nrow(matInput),  function(x){
+    Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
+                        function(codonNucs){                                                      
+                          RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") 
+                          sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) 
+                        }
+    )                                                   
+  })
+  
+  LisGLs_S_Exp = mclapply(1:nrow(matInput),  function(x){
+    Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
+                        function(codonNucs){                                                      
+                          SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   
+                          sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
+                        }
+    )                                                 
+  })                                                
+  
+  Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
+  Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
+  return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )
+  }else{
+    facGL <- factor(matInput[,2])
+    facLevels = levels(facGL)
+    LisGLs_MutabilityU = lapply(1:length(facLevels),  function(x){
+      computeMutabilities(facLevels[x])
+    })
+    facIndex = match(facGL,facLevels)
+    
+    LisGLs_Mutability = lapply(1:nrow(matInput),  function(x){
+      cInput = rep(NA,nchar(matInput[x,1]))
+      cInput[s2c(matInput[x,1])!="N"] = 1
+      LisGLs_MutabilityU[[facIndex[x]]] * cInput                                                   
+    })
+    
+    LisGLs_Targeting =  lapply(1:dim(matInput)[1],  function(x){
+      computeTargeting(matInput[x,2],LisGLs_Mutability[[x]])
+    })
+    
+    LisGLs_MutationTypes  = lapply(1:length(matInput[,2]),function(x){
+      #print(x)
+      computeMutationTypes(matInput[x,2])
+    })
+    
+    LisGLs_R_Exp = lapply(1:nrow(matInput),  function(x){
+      Exp_R <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
+                          function(codonNucs){                                                      
+                            RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") 
+                            sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) 
+                          }
+      )                                                   
+    })
+    
+    LisGLs_S_Exp = lapply(1:nrow(matInput),  function(x){
+      Exp_S <-  rollapply(as.zoo(1:readEnd),width=3,by=3,
+                          function(codonNucs){                                                      
+                            SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S")   
+                            sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T )
+                          }
+      )                                                 
+    })                                                
+    
+    Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
+    Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T)  
+    return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) )    
+  }
+}
+
+# getObservedMutationsByCodon <- function(listMutations){
+#   numbSeqs <- length(listMutations) 
+#   obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
+#   obsMu_S <- obsMu_R
+#   temp <- mclapply(1:length(listMutations), function(i){
+#     arrMutations = listMutations[[i]]
+#     RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
+#     RPos <- sapply(RPos,getCodonNumb)                                                                    
+#     if(any(RPos)){
+#       tabR <- table(RPos)
+#       obsMu_R[i,as.numeric(names(tabR))] <<- tabR
+#     }                                    
+#     
+#     SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
+#     SPos <- sapply(SPos,getCodonNumb)
+#     if(any(SPos)){
+#       tabS <- table(SPos)
+#       obsMu_S[i,names(tabS)] <<- tabS
+#     }                                          
+#   }
+#   )
+#   return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) 
+# }
+
+getObservedMutationsByCodon <- function(listMutations){
+  numbSeqs <- length(listMutations) 
+  obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3))))
+  obsMu_S <- obsMu_R
+  temp <- lapply(1:length(listMutations), function(i){
+    arrMutations = listMutations[[i]]
+    RPos = as.numeric(names(arrMutations)[arrMutations=="R"])
+    RPos <- sapply(RPos,getCodonNumb)                                                                    
+    if(any(RPos)){
+      tabR <- table(RPos)
+      obsMu_R[i,as.numeric(names(tabR))] <<- tabR
+    }                                    
+    
+    SPos = as.numeric(names(arrMutations)[arrMutations=="S"])
+    SPos <- sapply(SPos,getCodonNumb)
+    if(any(SPos)){
+      tabS <- table(SPos)
+      obsMu_S[i,names(tabS)] <<- tabS
+    }                                          
+  }
+  )
+  return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) 
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/Baseline_Main.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,388 @@
+#########################################################################################
+# License Agreement
+# 
+# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE 
+# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER 
+# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE 
+# OR COPYRIGHT LAW IS PROHIBITED.
+# 
+# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE 
+# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED 
+# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN 
+# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
+#
+# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences
+# Coded by: Mohamed Uduman & Gur Yaari
+# Copyright 2012 Kleinstein Lab
+# Version: 1.3 (01/23/2014)
+#########################################################################################
+
+op <- options();
+options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1)
+library('seqinr')
+if( F & Sys.info()[1]=="Linux"){
+  library("multicore")
+}
+
+# Load functions and initialize global variables
+source("Baseline_Functions.r")
+
+# Initialize parameters with user provided arguments
+  arg <- commandArgs(TRUE)                       
+  #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample")
+  #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample")
+  #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu")
+  testID <- as.numeric(arg[1])                    # 1 = Focused, 2 = Local
+  species <- as.numeric(arg[2])                   # 1 = Human. 2 = Mouse
+  substitutionModel <- as.numeric(arg[3])         # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS
+  mutabilityModel <- as.numeric(arg[4])           # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002)  , 5 = FiveS
+  clonal <- as.numeric(arg[5])                    # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations
+  fixIndels <- as.numeric(arg[6])                 # 0 = Do nothing, 1 = Try and fix Indels
+  region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3
+  inputFilePath <- arg[8]                         # Full path to input file
+  outputPath <- arg[9]                            # Full path to location of output files
+  outputID <- arg[10]                             # ID for session output  
+  
+
+  if(testID==5){
+    traitChangeModel <- 1
+    if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11])    # 1 <- Chothia 1998
+    initializeTraitChange(traitChangeModel)    
+  }
+  
+# Initialize other parameters/variables
+    
+  # Initialzie the codon table ( definitions of R/S )
+  computeCodonTable(testID) 
+
+  # Initialize   
+  # Test Name
+  testName<-"Focused"
+  if(testID==2) testName<-"Local"
+  if(testID==3) testName<-"Imbalanced"    
+  if(testID==4) testName<-"ImbalancedSilent"    
+    
+  # Indel placeholders initialization
+  indelPos <- NULL
+  delPos <- NULL
+  insPos <- NULL
+
+  # Initialize in Tranistion & Mutability matrixes
+  substitution <- initializeSubstitutionMatrix(substitutionModel,species)
+  mutability <- initializeMutabilityMatrix(mutabilityModel,species)
+  
+  # FWR/CDR boundaries
+  flagTrim <- F
+  if( is.na(region[7])){
+    flagTrim <- T
+    region[7]<-region[6]
+  }
+  readStart = min(region,na.rm=T)
+  readEnd = max(region,na.rm=T)
+  if(readStart>1){
+    region = region - (readStart - 1)
+  }
+  region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) )
+  region_Cod = region
+  
+  readStart = (readStart*3)-2
+  readEnd = (readEnd*3)
+    
+    FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])),
+                  rep(FALSE,(region_Nuc[3]-region_Nuc[2])),
+                  rep(TRUE,(region_Nuc[4]-region_Nuc[3])),
+                  rep(FALSE,(region_Nuc[5]-region_Nuc[4])),
+                  rep(TRUE,(region_Nuc[6]-region_Nuc[5])),
+                  rep(FALSE,(region_Nuc[7]-region_Nuc[6]))
+                )
+    CDR_Nuc <- (1-FWR_Nuc)
+    CDR_Nuc <- as.logical(CDR_Nuc)
+    FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T)
+    CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T)
+    
+    FWR_Codon <- c( rep(TRUE,(region[2])),
+                  rep(FALSE,(region[3]-region[2])),
+                  rep(TRUE,(region[4]-region[3])),
+                  rep(FALSE,(region[5]-region[4])),
+                  rep(TRUE,(region[6]-region[5])),
+                  rep(FALSE,(region[7]-region[6]))
+                )
+    CDR_Codon <- (1-FWR_Codon)
+    CDR_Codon <- as.logical(CDR_Codon)
+
+
+# Read input FASTA file
+  tryCatch(
+    inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F)
+    , error = function(ex){
+      cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
+      q()
+    }
+  )
+  
+  if (length(inputFASTA)==1) {
+    cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n")
+    q()
+  }
+
+  # Process sequence IDs/names
+  names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)})
+  
+  # Convert non nucleotide characters to N
+  inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)])
+  inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars)
+
+  # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence]
+  processedInput <- processInputAdvanced(inputFASTA)
+  matInput <- processedInput[[1]]
+  germlines <- processedInput[[2]]
+  lenGermlines = length(unique(germlines))
+  groups <- processedInput[[3]]
+  lenGroups = length(unique(groups))
+  rm(processedInput)
+  rm(inputFASTA)
+
+#   # remove clones with less than 2 seqeunces
+#   tableGL <- table(germlines)
+#   singletons <- which(tableGL<8)
+#   rowsToRemove <- match(singletons,germlines)
+#   if(any(rowsToRemove)){    
+#     matInput <- matInput[-rowsToRemove,]
+#     germlines <- germlines[-rowsToRemove]    
+#     groups <- groups[-rowsToRemove]
+#   }
+# 
+#   # remove unproductive seqs
+#   nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))})
+#   if(any(nonFuctionalSeqs)){
+#     if(sum(nonFuctionalSeqs)==length(germlines)){
+#       write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+#       q()      
+#     }
+#     matInput <- matInput[-which(nonFuctionalSeqs),]
+#     germlines <- germlines[-which(nonFuctionalSeqs)]
+#     germlines[1:length(germlines)] <- 1:length(germlines)
+#     groups <- groups[-which(nonFuctionalSeqs)]
+#   }
+# 
+#   if(class(matInput)=="character"){
+#     write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+#     q()    
+#   }
+#   
+#   if(nrow(matInput)<10 | is.null(nrow(matInput))){
+#     write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+#     q()
+#   }
+
+# replace leading & trailing "-" with "N:
+  matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd))
+    
+  # Trim (nucleotide) input sequences to the last codon
+  #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon) 
+
+#   # Check for Indels
+#   if(fixIndels){
+#     delPos <- fixDeletions(matInput)
+#     insPos <- fixInsertions(matInput)
+#   }else{
+#     # Check for indels
+#     indelPos <- checkForInDels(matInput)
+#     indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)})
+#   }
+  
+  # If indels are present, remove mutations in the seqeunce & throw warning at end
+  #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) })
+  
+  colnames(matInput)=c("Input","Germline")
+
+  # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions
+  germlinesOriginal = NULL
+  if(clonal){
+    germlinesOriginal <- germlines
+    collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){
+                                                                collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1))
+                                                              })
+    matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])}))
+    names_groups = tapply(groups,germlines,function(x){names(x[1])})  
+    groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))})  
+    names(groups) = names_groups
+  
+    names_germlines =  tapply(germlines,germlines,function(x){names(x[1])})  
+    germlines = tapply(   germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))}   )
+    names(germlines) = names_germlines
+    matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])})  
+  }
+
+
+# Selection Analysis
+
+  
+#  if (length(germlines)>sequenceLimit) {
+#    # Code to parallelize processing goes here
+#    stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") )
+#  }
+
+#  if (length(germlines)<sequenceLimit) {}
+  
+    # Compute expected mutation frequencies
+    matExpected <- getExpectedIndividual(matInput)
+    
+    # Count observed number of mutations in the different regions
+    mutations <- lapply( 1:nrow(matInput),  function(i){
+                                              #cat(i,"\n")
+                                              seqI = s2c(matInput[i,1])
+                                              seqG = s2c(matInput[i,2])
+                                              matIGL = matrix(c(seqI,seqG),ncol=length(seqI),nrow=2,byrow=T)    
+                                              retVal <- NA
+                                              tryCatch(
+                                                retVal <- analyzeMutations2NucUri(matIGL)
+                                                , error = function(ex){
+                                                  retVal <- NA
+                                                }
+                                              )                                              
+                                              
+                                              
+                                              return( retVal )
+                                            })
+
+    matObserved <- t(sapply( mutations, processNucMutations2 ))
+    numberOfSeqsWithMutations <- numberOfSeqsWithMutations(matObserved, testID)
+
+    #if(sum(numberOfSeqsWithMutations)==0){
+    #  write.table("No mutated sequences",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T)
+    #  q()      
+    #}
+    
+    matMutationInfo <- cbind(matObserved,matExpected)
+    rm(matObserved,matExpected)
+    
+     
+    #Bayesian  PDFs
+    bayes_pdf = computeBayesianScore(matMutationInfo, test=testName, max_sigma=20,length_sigma=4001)
+    bayesPDF_cdr = bayes_pdf[[1]]
+    bayesPDF_fwr = bayes_pdf[[2]]    
+    rm(bayes_pdf)
+
+    bayesPDF_germlines_cdr = tapply(bayesPDF_cdr,germlines,function(x) groupPosteriors(x,length_sigma=4001))
+    bayesPDF_germlines_fwr = tapply(bayesPDF_fwr,germlines,function(x) groupPosteriors(x,length_sigma=4001))
+    
+    bayesPDF_groups_cdr = tapply(bayesPDF_cdr,groups,function(x) groupPosteriors(x,length_sigma=4001))
+    bayesPDF_groups_fwr = tapply(bayesPDF_fwr,groups,function(x) groupPosteriors(x,length_sigma=4001))
+    
+    if(lenGroups>1){
+      groups <- c(groups,lenGroups+1)
+      names(groups)[length(groups)] = "All sequences combined"
+      bayesPDF_groups_cdr[[lenGroups+1]] =   groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001)
+      bayesPDF_groups_fwr[[lenGroups+1]] =   groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001)
+    }
+    
+    #Bayesian  Outputs
+    bayes_cdr =  t(sapply(bayesPDF_cdr,calcBayesOutputInfo))
+    bayes_fwr =  t(sapply(bayesPDF_fwr,calcBayesOutputInfo))
+    bayes_germlines_cdr =  t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo))
+    bayes_germlines_fwr =  t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo))
+    bayes_groups_cdr =  t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo))
+    bayes_groups_fwr =  t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo))
+    
+    #P-values
+    simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP)
+    simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP)
+    
+    simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP)
+    simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP)
+    
+    simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP)
+    simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP)
+    
+    
+    #Format output
+    
+    # Round expected mutation frequencies to 3 decimal places
+    matMutationInfo[germlinesOriginal[indelPos],] = NA
+    if(nrow(matMutationInfo)==1){
+      matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3)
+    }else{
+      matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3))
+    }
+    
+    listPDFs = list()
+    nRows = length(unique(groups)) + length(unique(germlines)) + length(groups)
+    
+    matOutput = matrix(NA,ncol=18,nrow=nRows)
+    rowNumb = 1
+    for(G in unique(groups)){
+      #print(G)
+      matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G])
+      listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]])
+      names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1]
+      #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){
+      gs = unique(germlines[groups==G])
+      rowNumb = rowNumb+1
+      if( !is.na(gs) ){
+        for( g in gs ){
+          matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g])
+          listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]])
+          names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1]
+          rowNumb = rowNumb+1
+          indexesOfInterest = which(germlines==g)
+          numbSeqsOfInterest =  length(indexesOfInterest)
+          rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1))
+          matOutput[rowNumb,] = matrix(   c(  rep("Sequence",numbSeqsOfInterest),
+                                              rownames(matInput)[indexesOfInterest],
+                                              c(matMutationInfo[indexesOfInterest,1:4]),
+                                              c(matMutationInfo[indexesOfInterest,5:8]),
+                                              c(bayes_cdr[indexesOfInterest,]),
+                                              c(bayes_fwr[indexesOfInterest,]),
+                                              c(simgaP_cdr[indexesOfInterest]),
+                                              c(simgaP_fwr[indexesOfInterest])                                              
+          ), ncol=18, nrow=numbSeqsOfInterest,byrow=F)
+          increment=0
+          for( ioi in indexesOfInterest){
+            listPDFs[[min(rowNumb)+increment]] =  list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]])
+            names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi]
+            increment = increment + 1
+          }
+          rowNumb=max(rowNumb)+1
+
+        }
+      }
+    }
+    colsToFormat = 11:18
+    matOutput[,colsToFormat] = formatC(  matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) ,  digits=3)
+    matOutput[matOutput== " NaN"] = NA
+    
+    
+    
+    colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S",
+                            "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S",
+                            paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"),
+                            paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_")
+    )
+    fileName = paste(outputPath,outputID,".txt",sep="")
+    write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA)
+    fileName = paste(outputPath,outputID,".RData",sep="")
+    save(listPDFs,file=fileName)
+
+indelWarning = FALSE
+if(sum(indelPos)>0){
+  indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis.";
+  indelWarning = paste( indelWarning , "<UL>", sep="" )
+  for(indels in names(indelPos)[indelPos]){
+    indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" )
+  }
+  indelWarning = paste( indelWarning , "</UL></P>", sep="" )
+}
+
+cloneWarning = FALSE
+if(clonal==1){
+  if(sum(matInputErrors)>0){
+    cloneWarning = "<P>Warning: The following clones have sequences of unequal length.";
+    cloneWarning = paste( cloneWarning , "<UL>", sep="" )
+    for(clone in names(matInputErrors)[matInputErrors]){
+      cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" )
+    }
+    cloneWarning = paste( cloneWarning , "</UL></P>", sep="" )
+  }
+}
+cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
Binary file shm_csr/baseline/FiveS_Mutability.RData has changed
Binary file shm_csr/baseline/FiveS_Substitution.RData has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,703 @@
+>IGHV1-18*01
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-18*02
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
+>IGHV1-18*03
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1-18*04
+caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
+>IGHV1-2*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*04
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
+>IGHV1-2*05
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
+>IGHV1-24*01
+caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
+>IGHV1-3*01
+caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
+>IGHV1-3*02
+caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
+>IGHV1-38-4*01
+caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
+>IGHV1-45*01
+cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
+>IGHV1-45*02
+cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
+>IGHV1-45*03
+.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
+>IGHV1-46*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-46*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-46*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
+>IGHV1-58*01
+caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
+>IGHV1-58*02
+caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
+>IGHV1-68*01
+caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
+>IGHV1-69*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*02
+caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1-69*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
+>IGHV1-69*04
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*05
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1-69*06
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*07
+.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
+>IGHV1-69*08
+caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*09
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*10
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*11
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*12
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*13
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69*14
+caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-69-2*01
+gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
+>IGHV1-69-2*02
+.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
+>IGHV1-69D*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1-8*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
+>IGHV1-8*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
+>IGHV1-NL1*01
+caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
+>IGHV1/OR15-1*01
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
+>IGHV1/OR15-1*02
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
+>IGHV1/OR15-1*03
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
+>IGHV1/OR15-1*04
+caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
+>IGHV1/OR15-2*01
+caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1/OR15-2*02
+caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1/OR15-2*03
+caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
+>IGHV1/OR15-3*01
+caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1/OR15-3*02
+caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
+>IGHV1/OR15-3*03
+caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1/OR15-4*01
+caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
+>IGHV1/OR15-5*01
+.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
+>IGHV1/OR15-5*02
+caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
+>IGHV1/OR15-9*01
+caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
+>IGHV1/OR21-1*01
+caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
+>IGHV2-10*01
+caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
+>IGHV2-26*01
+caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
+>IGHV2-5*01
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*02
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*03
+................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
+>IGHV2-5*04|
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
+>IGHV2-5*05
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*06
+cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
+>IGHV2-5*08
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-5*09
+caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-70*01
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70*02
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*03
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*04
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
+>IGHV2-70*05
+..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
+>IGHV2-70*06
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*07
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*08
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
+>IGHV2-70*09
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
+>IGHV2-70*10
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70*11
+cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70*12
+cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
+>IGHV2-70*13
+caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
+>IGHV2-70D*04
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2-70D*14
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
+>IGHV2/OR16-5*01
+caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
+>IGHV3-11*01
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-11*03
+caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
+>IGHV3-11*04
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-11*05
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-11*06
+caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-13*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-13*02
+gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-13*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
+>IGHV3-13*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-13*05
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
+>IGHV3-15*01
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*02
+gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*03
+gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*04
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*05
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*06
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*07
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
+>IGHV3-15*08
+gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
+>IGHV3-16*01
+gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-16*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-19*01
+acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
+>IGHV3-20*01
+gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
+>IGHV3-20*02
+gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
+>IGHV3-21*01
+gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-21*02
+gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-21*03
+gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
+>IGHV3-21*04
+gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-22*01
+gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
+>IGHV3-22*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
+>IGHV3-23*01
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*02
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*03
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23*05
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
+>IGHV3-23D*01
+gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-23D*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
+>IGHV3-25*01
+gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-25*02
+gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-25*03
+gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
+>IGHV3-25*04
+gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
+>IGHV3-25*05
+gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
+>IGHV3-29*01
+gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
+>IGHV3-30*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*02
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30*03
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*04
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*05
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
+>IGHV3-30*06
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*07
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*08
+caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
+>IGHV3-30*09
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*10
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*11
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*12
+caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*13
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*14
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*15
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*16
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*17
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30*18
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30*19
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-2*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
+>IGHV3-30-22*01
+gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
+>IGHV3-30-3*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-3*02
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-3*03
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-30-33*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
+>IGHV3-30-42*01
+gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
+>IGHV3-30-5*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-5*02
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-30-52*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
+>IGHV3-32*01
+gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
+>AIGHV3-33*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*02
+caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*03
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-33*04
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*05
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-33*06
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3-33-2*01
+gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
+>IGHV3-35*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
+>IGHV3-38*01|
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
+>IGHV3-38*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
+>IGHV3-38*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
+>IGHV3-38-3*01
+gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
+>IGHV3-43*01
+gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-43*02
+gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-43D*01
+gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
+>IGHV3-47*01
+gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
+>IGHV3-47*02
+gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
+>IGHV3-48*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-48*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-48*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
+>IGHV3-48*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-49*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*04
+gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-49*05
+gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
+>IGHV3-52*01
+gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
+>IGHV3-52*02
+gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
+>IGHV3-52*03
+gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
+>IGHV3-53*01
+gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-53*02
+gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-53*03
+gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
+>IGHV3-53*04
+gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-54*01
+gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
+>IGHV3-54*02
+gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
+>IGHV3-54*04
+gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
+>IGHV3-62*01
+gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
+>IGHV3-63*01
+gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
+>IGHV3-63*02
+gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
+>IGHV3-64*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
+>IGHV3-64*02
+gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
+>IGHV3-64*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-64*04
+caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-64*05
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-64D*06
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
+>IGHV3-66*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-66*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
+>IGHV3-66*03
+gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-66*04
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
+>IGHV3-69-1*01
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-69-1*02
+gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
+>IGHV3-7*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-7*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
+>IGHV3-7*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-71*01
+gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
+>IGHV3-71*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
+>IGHV3-71*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
+>IGHV3-72*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
+>IGHV3-72*02
+....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
+>IGHV3-73*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
+>IGHV3-73*02
+gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
+>IGHV3-74*01
+gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
+>IGHV3-74*02
+gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
+>IGHV3-74*03
+gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
+>IGHV3-9*01
+gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
+>IGHV3-9*02
+gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
+>IGHV3-9*03
+gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
+>IGHV3-NL1*01
+caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
+>IGHV3/OR15-7*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
+>IGHV3/OR15-7*02
+gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
+>IGHV3/OR15-7*03
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
+>IGHV3/OR15-7*05
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
+>IGHV3/OR16-10*01
+gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
+>IGHV3/OR16-10*02
+gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
+>IGHV3/OR16-10*03
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
+>IGHV3/OR16-12*01
+gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
+>IGHV3/OR16-13*01
+gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
+>IGHV3/OR16-14*01
+gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
+>IGHV3/OR16-15*01
+gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
+>IGHV3/OR16-15*02
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
+>IGHV3/OR16-16*01
+gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
+>IGHV3/OR16-6*02
+gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
+>IGHV3/OR16-8*01
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
+>IGHV3/OR16-8*02
+gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
+>IGHV3/OR16-9*01
+gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
+>IGHV4-28*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
+>IGHV4-28*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
+>IGHV4-28*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*06
+caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
+>IGHV4-28*07
+caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-30-2*01
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-30-2*02
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
+>IGHV4-30-2*03
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
+>IGHV4-30-2*04
+...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-30-2*05
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-2*06
+cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>XIGHV4-30-4*04
+caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
+>IGHV4-30-4*05
+..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*06
+...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
+>IGHV4-30-4*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-31*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-31*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-31*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-31*04
+caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
+>IGHV4-31*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
+>IGHV4-31*06
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>IGHV4-31*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>IGHV4-31*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
+>IGHV4-31*09
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-31*10
+caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
+>IGHV4-34*01
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*02
+caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*03
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-34*04
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*05
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-34*06
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-34*07
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-34*08
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
+>IGHV4-34*09
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-34*10
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-34*11
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
+>IGHV4-34*12
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
+>IGHV4-34*13
+...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
+>IGHV4-38-2*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
+>IGHV4-38-2*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-39*01
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
+>IGHV4-39*02
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
+>IGHV4-39*03
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
+>IGHV4-39*04
+..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
+>IGHV4-39*05
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
+>IGHV4-39*06
+cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-39*07
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
+>IGHV4-4*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-4*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-4*05
+caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-4*06
+............................................................
+...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-4*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-55*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-55*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-55*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-55*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-55*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
+>IGHV4-55*06
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
+>IGHV4-55*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
+>IGHV4-55*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-55*09
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
+>IGHV4-59*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-59*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-59*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
+>IGHV4-59*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
+>IGHV4-59*05
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
+>IGHV4-59*06
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
+>IGHV4-59*07
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
+>IGHV4-59*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
+>IGHV4-59*09
+...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
+>IGHV4-59*10
+caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
+>IGHV4-61*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-61*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
+>IGHV4-61*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4-61*04
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
+>IGHV4-61*05
+cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
+>IGHV4-61*06
+...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
+>IGHV4-61*07
+...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
+>IGHV4-61*08
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
+>IGHV4/OR15-8*01
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4/OR15-8*02
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV4/OR15-8*03
+caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
+>IGHV5-10-1*01
+gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-10-1*02
+gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
+>IGHV5-10-1*03
+gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-10-1*04
+gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-51*01
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
+>IGHV5-51*02
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
+>IGHV5-51*03
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-51*04
+gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
+>IGHV5-51*05
+.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
+>IGHV5-78*01
+gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
+>IGHV6-1*01
+caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
+>IGHV6-1*02
+caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
+>IGHV7-34-1*01
+...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
+>IGHV7-34-1*02
+...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
+>IGHV7-4-1*01
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
+>IGHV7-4-1*02
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
+>IGHV7-4-1*03
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
+>IGHV7-4-1*04
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
+>IGHV7-4-1*05
+caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
+>AIGHV7-40*03|
+ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
+>IGHV7-81*01
+caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/IMGTVHreferencedataset20161215.fa	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,1 @@
+>IGHV1-18*01
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-18*02
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
>IGHV1-18*03
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1-18*04
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-2*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*05
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-24*01
caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-3*01
caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
>IGHV1-3*02
caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
>IGHV1-38-4*01
caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
>IGHV1-45*01
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
>IGHV1-45*02
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
>IGHV1-45*03
.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
>IGHV1-46*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
>IGHV1-58*01
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-58*02
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-68*01
caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
>IGHV1-69*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*02
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
>IGHV1-69*04
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*05
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*06
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*07
.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69*08
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*09
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*10
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*11
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*12
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*13
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*14
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69-2*01
gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-69-2*02
.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69D*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-8*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-8*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-NL1*01
caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
>IGHV1/OR15-1*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
>IGHV1/OR15-1*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-1*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
>IGHV1/OR15-1*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-2*01
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*02
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*03
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*01
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-3*02
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*03
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-4*01
caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-5*01
.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-5*02
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-9*01
caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV1/OR21-1*01
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV2-10*01
caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
>IGHV2-26*01
caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
>IGHV2-5*01
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*02
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*03
................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
>IGHV2-5*04
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
>IGHV2-5*05
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*06
cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
>IGHV2-5*08
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*09
caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*01
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*02
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*03
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
>IGHV2-70*05
..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
>IGHV2-70*06
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*07
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*08
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*09
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
>IGHV2-70*10
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*11
cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*12
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*13
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
>IGHV2-70D*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70D*14
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2/OR16-5*01
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
>IGHV3-11*01
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*03
caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
>IGHV3-11*04
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-11*05
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*06
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-13*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*02
gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
>IGHV3-13*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*05
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-15*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*02
gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*03
gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*04
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*06
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*07
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*08
gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3-16*01
gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-16*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-19*01
acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-20*01
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-20*02
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-21*01
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*02
gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*03
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
>IGHV3-21*04
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-22*01
gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-22*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-23*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*02
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*03
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*05
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
>IGHV3-23D*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-25*01
gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*02
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*03
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
>IGHV3-25*04
gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
>IGHV3-25*05
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-29*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
>IGHV3-30*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*07
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*08
caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-30*09
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*10
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*11
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*12
caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*13
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*14
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*15
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*16
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*17
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*18
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*19
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
>IGHV3-30-22*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
>IGHV3-30-3*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-3*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-3*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-33*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
>IGHV3-30-42*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30-5*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-5*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-52*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
>IGHV3-32*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
>IGHV3-33*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*02
caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
>IGHV3-35*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
>IGHV3-38*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
>IGHV3-38*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38-3*01
gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
>IGHV3-43*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43*02
gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43D*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-47*01
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
>IGHV3-47*02
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
>IGHV3-48*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-48*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-49*01
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*02
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*03
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*04
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-52*01
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
>IGHV3-52*02
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-52*03
gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-53*01
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*02
gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
>IGHV3-53*04
gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
>IGHV3-54*01
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-54*02
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
>IGHV3-54*04
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-62*01
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
>IGHV3-63*01
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
>IGHV3-63*02
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
>IGHV3-64*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*02
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64*04
caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-64*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64D*06
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-66*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-66*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*04
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
>IGHV3-69-1*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-69-1*02
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-7*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
>IGHV3-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*01
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
>IGHV3-71*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-72*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
>IGHV3-72*02
....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
>IGHV3-73*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-73*02
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-74*01
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-74*02
gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
>IGHV3-74*03
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-9*01
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*02
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*03
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
>IGHV3-NL1*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3/OR15-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*02
gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
>IGHV3/OR16-10*01
gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*02
gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
>IGHV3/OR16-12*01
gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
>IGHV3/OR16-13*01
gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-14*01
gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-15*01
gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
>IGHV3/OR16-15*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
>IGHV3/OR16-16*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
>IGHV3/OR16-6*02
gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3/OR16-8*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
>IGHV3/OR16-8*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
>IGHV3/OR16-9*01
gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
>IGHV4-28*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
>IGHV4-28*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
>IGHV4-28*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*06
caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*07
caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-30-2*01
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-2*02
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-30-2*03
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
>IGHV4-30-2*04
...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-30-2*05
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-2*06
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-30-4*04
caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
>IGHV4-30-4*05
..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*06
...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-31*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*04
caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-31*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
>IGHV4-31*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-31*10
caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
>IGHV4-34*01
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*02
caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*03
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*04
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*05
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*06
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*07
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*08
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
>IGHV4-34*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-34*10
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-34*11
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-34*12
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
>IGHV4-34*13
...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-38-2*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
>IGHV4-38-2*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-39*01
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
>IGHV4-39*02
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
>IGHV4-39*03
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-39*04
..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
>IGHV4-39*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-39*06
cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-39*07
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*05
caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*06
...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-55*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
>IGHV4-55*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-55*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-55*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-59*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-59*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-59*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
>IGHV4-59*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
>IGHV4-59*09
...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
>IGHV4-59*10
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-61*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-61*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
>IGHV4-61*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
>IGHV4-61*06
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-61*07
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
>IGHV4-61*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV5-10-1*01
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*02
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
>IGHV5-10-1*03
gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*04
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*01
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*02
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*03
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*04
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*05
.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
>IGHV5-78*01
gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
>IGHV6-1*01
caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV6-1*02
caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV7-34-1*01
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-34-1*02
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-4-1*01
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
>IGHV7-4-1*02
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*03
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
>IGHV7-4-1*04
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*05
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
>IGHV7-40*03
ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
>IGHV7-81*01
caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/IMGTVHreferencedataset20161215.fasta	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,1 @@
+>IGHV1-18*01
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-18*02
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc
>IGHV1-18*03
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1-18*04
caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-2*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga
>IGHV1-2*05
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga
>IGHV1-24*01
caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-3*01
caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga
>IGHV1-3*02
caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga
>IGHV1-38-4*01
caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca
>IGHV1-45*01
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana
>IGHV1-45*02
cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata
>IGHV1-45*03
.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga
>IGHV1-46*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-46*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga
>IGHV1-58*01
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-58*02
caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga
>IGHV1-68*01
caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata
>IGHV1-69*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*02
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc
>IGHV1-69*04
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*05
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1-69*06
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*07
.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69*08
caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*09
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*10
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*11
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*12
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*13
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69*14
caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-69-2*01
gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga
>IGHV1-69-2*02
.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag
>IGHV1-69D*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1-8*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-8*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg
>IGHV1-NL1*01
caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga
>IGHV1/OR15-1*01
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga
>IGHV1/OR15-1*02
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-1*03
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga
>IGHV1/OR15-1*04
caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga
>IGHV1/OR15-2*01
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*02
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-2*03
caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*01
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-3*02
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga
>IGHV1/OR15-3*03
caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-4*01
caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga
>IGHV1/OR15-5*01
.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-5*02
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga
>IGHV1/OR15-9*01
caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV1/OR21-1*01
caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga
>IGHV2-10*01
caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac
>IGHV2-26*01
caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac
>IGHV2-5*01
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*02
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*03
................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt
>IGHV2-5*04
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac
>IGHV2-5*05
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*06
cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga
>IGHV2-5*08
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-5*09
caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*01
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*02
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*03
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac
>IGHV2-70*05
..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga
>IGHV2-70*06
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*07
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*08
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg
>IGHV2-70*09
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg
>IGHV2-70*10
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*11
cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70*12
cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac
>IGHV2-70*13
caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac
>IGHV2-70D*04
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2-70D*14
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac
>IGHV2/OR16-5*01
caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag
>IGHV3-11*01
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*03
caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga
>IGHV3-11*04
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-11*05
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-11*06
caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-13*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*02
gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga
>IGHV3-13*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-13*05
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga
>IGHV3-15*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*02
gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*03
gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*04
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*06
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*07
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga
>IGHV3-15*08
gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3-16*01
gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-16*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-19*01
acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa
>IGHV3-20*01
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-20*02
gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga
>IGHV3-21*01
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*02
gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-21*03
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga
>IGHV3-21*04
gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-22*01
gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-22*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga
>IGHV3-23*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*02
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*03
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-23*05
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa
>IGHV3-23D*01
gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga
>IGHV3-25*01
gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*02
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-25*03
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga
>IGHV3-25*04
gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga
>IGHV3-25*05
gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga
>IGHV3-29*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga
>IGHV3-30*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*07
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*08
caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-30*09
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*10
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*11
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*12
caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*13
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*14
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*15
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*16
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*17
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30*18
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30*19
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca
>IGHV3-30-22*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc
>IGHV3-30-3*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-3*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-3*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-30-33*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg
>IGHV3-30-42*01
gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt
>IGHV3-30-5*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-5*02
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-30-52*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg
>IGHV3-32*01
gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc
>IGHV3-33*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*02
caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*03
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33*04
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*05
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-33*06
caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga
>IGHV3-33-2*01
gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca
>IGHV3-35*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa
>IGHV3-38*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata
>IGHV3-38*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata
>IGHV3-38-3*01
gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga
>IGHV3-43*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43*02
gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-43D*01
gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata
>IGHV3-47*01
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga
>IGHV3-47*02
gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga
>IGHV3-48*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga
>IGHV3-48*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-48*04
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-49*01
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*02
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*03
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*04
gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-49*05
gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga
>IGHV3-52*01
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg
>IGHV3-52*02
gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-52*03
gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga
>IGHV3-53*01
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*02
gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-53*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga
>IGHV3-53*04
gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga
>IGHV3-54*01
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-54*02
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg
>IGHV3-54*04
gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt
>IGHV3-62*01
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga
>IGHV3-63*01
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt
>IGHV3-63*02
gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa
>IGHV3-64*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*02
gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga
>IGHV3-64*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64*04
caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-64*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-64D*06
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga
>IGHV3-66*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga
>IGHV3-66*03
gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga
>IGHV3-66*04
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca
>IGHV3-69-1*01
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-69-1*02
gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga
>IGHV3-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-7*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga
>IGHV3-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*01
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga
>IGHV3-71*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga
>IGHV3-71*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga
>IGHV3-72*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga
>IGHV3-72*02
....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat
>IGHV3-73*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-73*02
gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca
>IGHV3-74*01
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-74*02
gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga
>IGHV3-74*03
gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga
>IGHV3-9*01
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*02
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata
>IGHV3-9*03
gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata
>IGHV3-NL1*01
caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga
>IGHV3/OR15-7*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*02
gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*03
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga
>IGHV3/OR15-7*05
gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga
>IGHV3/OR16-10*01
gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*02
gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga
>IGHV3/OR16-10*03
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga
>IGHV3/OR16-12*01
gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga
>IGHV3/OR16-13*01
gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-14*01
gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga
>IGHV3/OR16-15*01
gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa
>IGHV3/OR16-15*02
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga
>IGHV3/OR16-16*01
gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga
>IGHV3/OR16-6*02
gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg
>IGHV3/OR16-8*01
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa
>IGHV3/OR16-8*02
gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca
>IGHV3/OR16-9*01
gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa
>IGHV4-28*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga
>IGHV4-28*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga
>IGHV4-28*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*06
caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa
>IGHV4-28*07
caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-30-2*01
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-2*02
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-30-2*03
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca
>IGHV4-30-2*04
...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-30-2*05
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-2*06
cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-30-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-30-4*04
caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg
>IGHV4-30-4*05
..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*06
...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga
>IGHV4-30-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-31*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-31*04
caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-31*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg
>IGHV4-31*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg
>IGHV4-31*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-31*10
caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga
>IGHV4-34*01
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*02
caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*03
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*04
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*05
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-34*06
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*07
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-34*08
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg
>IGHV4-34*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-34*10
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-34*11
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-34*12
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga
>IGHV4-34*13
...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg
>IGHV4-38-2*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga
>IGHV4-38-2*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-39*01
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca
>IGHV4-39*02
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga
>IGHV4-39*03
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-39*04
..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac
>IGHV4-39*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-39*06
cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-39*07
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga
>IGHV4-4*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*05
caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-4*06
...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-4*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-55*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-55*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg
>IGHV4-55*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg
>IGHV4-55*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg
>IGHV4-55*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4-55*09
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa
>IGHV4-59*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-59*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg
>IGHV4-59*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*05
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg
>IGHV4-59*06
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg
>IGHV4-59*07
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga
>IGHV4-59*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca
>IGHV4-59*09
...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg
>IGHV4-59*10
caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata
>IGHV4-61*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga
>IGHV4-61*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4-61*04
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg
>IGHV4-61*05
cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga
>IGHV4-61*06
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga
>IGHV4-61*07
...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca
>IGHV4-61*08
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*01
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*02
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV4/OR15-8*03
caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga
>IGHV5-10-1*01
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*02
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca
>IGHV5-10-1*03
gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-10-1*04
gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*01
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*02
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca
>IGHV5-51*03
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*04
gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga
>IGHV5-51*05
.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg
>IGHV5-78*01
gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga
>IGHV6-1*01
caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV6-1*02
caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga
>IGHV7-34-1*01
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-34-1*02
...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta
>IGHV7-4-1*01
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga
>IGHV7-4-1*02
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*03
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg
>IGHV7-4-1*04
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga
>IGHV7-4-1*05
caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga
>IGHV7-40*03
ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga
>IGHV7-81*01
caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/baseline_url.txt	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,1 @@
+http://selection.med.yale.edu/baseline/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/comparePDFs.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,225 @@
+options("warn"=-1)
+
+#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r
+# Compute p-value of two distributions
+compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){
+#print(c(length(dens1),length(dens2)))
+if(length(dens1)>1 & length(dens2)>1 ){
+	dens1<-dens1/sum(dens1)
+	dens2<-dens2/sum(dens2)
+	cum2 <- cumsum(dens2)-dens2/2
+	tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i])))
+	#print(tmp)
+	if(tmp>0.5)tmp<-tmp-1
+	return( tmp )
+	}
+	else {
+	return(NA)
+	}
+	#return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N)
+}  
+
+
+require("grid")
+arg <- commandArgs(TRUE)
+#arg <- c("300143","4","5")
+arg[!arg=="clonal"]
+input <- arg[1]
+output <- arg[2]
+rowIDs <- as.numeric(  sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } )  )
+
+numbSeqs = length(rowIDs)
+
+if ( is.na(rowIDs[1]) | numbSeqs>10 ) {
+  stop( paste("Error: Please select between one and 10 seqeunces to compare.") )
+}
+
+#load( paste("output/",sessionID,".RData",sep="") )
+load( input )
+#input
+
+xMarks = seq(-20,20,length.out=4001)
+
+plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){
+  yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1
+
+  if(length(xlim==2)){
+    xMin=xlim[1]
+    xMax=xlim[2]
+  } else {
+    xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
+    xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
+    xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
+    xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
+  
+    xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1]
+    xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1]
+    xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])]
+    xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])]
+  
+    xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE)
+    xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE)
+  }
+
+  sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x
+  grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex))
+  x <- sigma
+  pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc"))
+  #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex))
+  pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05)))
+  grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc")
+  grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc")
+  grid.rect()
+  grid.xaxis(gp = gpar(cex=cex/1.1))
+  yticks = pretty(c(-yMax,yMax),8)
+  yticks = yticks[yticks>(-yMax) & yticks<(yMax)]
+  grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1))
+  if(length(listPDFs[pdf1][[1]][["CDR"]])>1){
+    ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+    grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2))
+  }
+  if(length(listPDFs[pdf1][[1]][["FWR"]])>1){
+    yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+    grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2))
+   }
+
+  if(length(listPDFs[pdf2][[1]][["CDR"]])>1){
+    ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+    grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2))
+  }
+  if(length(listPDFs[pdf2][[1]][["FWR"]])>1){
+    yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y
+    grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2))
+   }
+
+  grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1))
+  grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3))
+
+  grid.text("All", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex))
+  grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex))
+  
+  if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){
+    pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]])       
+    pval = formatC(as.numeric(pCDRFWR),digits=3)
+    grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2))
+  }
+  grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5))
+  grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5))
+  popViewport(2)
+}
+#plot_grid_s(1)
+
+
+p2col<-function(p=0.01){
+  breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51)
+  i<-findInterval(p,breaks)
+  cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0),
+            rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) )
+  return(cols[i])
+}
+
+
+plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){
+  if(upper){
+    pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]])       
+    pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]])
+    pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]])       
+    pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]])
+    grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc")
+    grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc")
+    grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc")
+    grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc")
+         
+    grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925)))
+    grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925)))
+
+    grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+    grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+    grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+    grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex))
+    
+           
+ #   grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex))
+ #   grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex))
+  }
+  else{
+  }
+}
+
+
+##################################################################################
+################## The whole OCD's matrix ########################################
+##################################################################################
+
+#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3)
+pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) 
+
+pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null")))))
+
+for( seqOne in 1:numbSeqs+1){
+  pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1))
+  if(seqOne>2){ 
+    grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
+    grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
+    grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc")
+       
+    grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center")
+    grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center")
+  }
+  grid.rect(gp = gpar(col=grey(0.9)))
+  grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center")
+  popViewport(1)
+}
+
+for( seqOne in 1:numbSeqs+1){
+  pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1))
+  if(seqOne<=numbSeqs){   
+    grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc")
+    grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc")
+    grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc")
+    grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270)
+    grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270)
+  }
+  grid.rect(gp = gpar(col=grey(0.9)))
+  grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center")
+  popViewport(1)
+}
+
+for( seqOne in 1:numbSeqs+1){
+  for(seqTwo in 1:numbSeqs+1){
+    pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne))
+    if(seqTwo>seqOne){
+      plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2)
+      grid.rect()
+    }    
+    popViewport(1)
+  }
+}
+   
+
+xMin=0
+xMax=0.01
+for(pdf1 in rowIDs){
+  xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1]
+  xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1]
+  xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])]
+  xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])]
+  xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE)
+  xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE)
+}
+
+
+
+for(i in 1:numbSeqs+1){
+  for(j in (i-1):numbSeqs){    
+    pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1))
+    grid.rect()
+    plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1)
+    popViewport(1)
+  }
+}
+
+dev.off() 
+
+cat("Success", paste(rowIDs,collapse="_"),sep=":")
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/filter.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,55 @@
+arg = commandArgs(TRUE)
+summaryfile = arg[1]
+gappedfile = arg[2]
+selection = arg[3]
+output = arg[4]
+print(paste("selection = ", selection))
+
+
+summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "")
+gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "")
+
+fix_column_names = function(df){
+    if("V.DOMAIN.Functionality" %in% names(df)){
+        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"
+        print("found V.DOMAIN.Functionality, changed")
+    }
+    if("V.DOMAIN.Functionality.comment" %in% names(df)){
+        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"
+        print("found V.DOMAIN.Functionality.comment, changed")
+    }
+    return(df)
+}
+
+gappeddat = fix_column_names(gappeddat)
+
+#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
+
+dat = cbind(gappeddat, summarydat$AA.JUNCTION)
+
+colnames(dat)[length(dat)] = "AA.JUNCTION"
+
+dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
+dat$VGene = gsub("[*].*", "", dat$VGene)
+
+dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
+dat$DGene = gsub("[*].*", "", dat$DGene)
+
+dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
+dat$JGene = gsub("[*].*", "", dat$JGene)
+
+print(str(dat))
+
+dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
+
+dat = dat[!duplicated(dat$past), ]
+
+print(paste("Sequences remaining after duplicate filter:", nrow(dat)))
+
+dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",]
+
+print(paste("Sequences remaining after functionality filter:", nrow(dat)))
+
+print(paste("Sequences remaining:", nrow(dat)))
+
+write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/script_imgt.py	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,86 @@
+#import xlrd #avoid dep
+import argparse
+import re
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
+parser.add_argument("--ref", help="Reference file")
+parser.add_argument("--output", help="Output file")
+parser.add_argument("--id", help="ID to be used at the '>>>' line in the output")
+
+args = parser.parse_args()
+
+print "script_imgt.py"
+print "input:", args.input
+print "ref:", args.ref
+print "output:", args.output
+print "id:", args.id
+
+refdic = dict()
+with open(args.ref, 'rU') as ref:
+	currentSeq = ""
+	currentId = ""
+	for line in ref:
+		if line.startswith(">"):
+			if currentSeq is not "" and currentId is not "":
+				refdic[currentId[1:]] = currentSeq
+			currentId = line.rstrip()
+			currentSeq = ""
+		else:
+			currentSeq += line.rstrip()
+	refdic[currentId[1:]] = currentSeq
+
+print "Have", str(len(refdic)), "reference sequences"
+
+vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
+#						r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
+#						r"(IGKV[0-3]D?-[0-9]{1,2})",
+#						r"(IGLV[0-9]-[0-9]{1,2})",
+#						r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)",
+#						r"(TRGV[234589])",
+#						r"(TRDV[1-3])"]
+
+#vPattern = re.compile(r"|".join(vPattern))
+vPattern = re.compile("|".join(vPattern))
+
+def filterGene(s, pattern):
+    if type(s) is not str:
+        return None
+    res = pattern.search(s)
+    if res:
+        return res.group(0)
+    return None
+
+
+
+currentSeq = ""
+currentId = ""
+first=True
+with open(args.input, 'r') as i:
+	with open(args.output, 'a') as o:
+		o.write(">>>" + args.id + "\n")
+		outputdic = dict()
+		for line in i:
+			if first:
+				first = False
+				continue
+			linesplt = line.split("\t")
+			ref = filterGene(linesplt[1], vPattern)
+			if not ref or not linesplt[2].rstrip():
+				continue
+			if ref in outputdic:
+				outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
+			else:
+				outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())]
+		#print outputdic
+		
+		for k in outputdic.keys():
+			if k in refdic:
+				o.write(">>" + k + "\n")
+				o.write(refdic[k] + "\n")
+				for seq in outputdic[k]:
+					#print seq
+					o.write(">" + seq[0] + "\n")
+					o.write(seq[1] + "\n")
+			else:
+				print k + " not in reference, skipping " + k
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/script_xlsx.py	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,58 @@
+import xlrd
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence")
+parser.add_argument("--ref", help="Reference file")
+parser.add_argument("--output", help="Output file")
+
+args = parser.parse_args()
+
+gene_column = 6
+id_column = 7
+seq_column = 8
+LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"]
+
+
+refdic = dict()
+with open(args.ref, 'r') as ref:
+	currentSeq = ""
+	currentId = ""
+	for line in ref.readlines():
+		if line[0] is ">":
+			if currentSeq is not "" and currentId is not "":
+				refdic[currentId[1:]] = currentSeq
+			currentId = line.rstrip()
+			currentSeq = ""
+		else:
+			currentSeq += line.rstrip()
+	refdic[currentId[1:]] = currentSeq
+	
+currentSeq = ""
+currentId = ""
+with xlrd.open_workbook(args.input, 'r') as wb:
+	with open(args.output, 'a') as o:
+		for sheet in wb.sheets():
+			if sheet.cell(1,gene_column).value.find("IGHV") < 0:
+				print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name
+				continue
+			o.write(">>>" + sheet.name + "\n")
+			outputdic = dict()
+			for rowindex in range(1, sheet.nrows):
+				ref = sheet.cell(rowindex, gene_column).value.replace(">", "")
+				if ref in outputdic:
+					outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
+				else:
+					outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)]
+			#print outputdic
+			
+			for k in outputdic.keys():
+				if k in refdic:
+					o.write(">>" + k + "\n")
+					o.write(refdic[k] + "\n")
+					for seq in outputdic[k]:
+						#print seq
+						o.write(">" + seq[0] + "\n")
+						o.write(seq[1] + "\n")
+				else:
+					print k + " not in reference, skipping " + k
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/baseline/wrapper.sh	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,92 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+testID=$1
+species=$2
+substitutionModel=$3
+mutabilityModel=$4
+clonal=$5
+fixIndels=$6
+region=$7
+inputs=$8
+inputs=($inputs)
+IDs=$9
+IDs=($IDs)
+ref=${10}
+output=${11}
+selection=${12}
+output_table=${13}
+outID="result"
+
+echo "$PWD"
+
+echo "testID = $testID"
+echo "species = $species"
+echo "substitutionModel = $substitutionModel"
+echo "mutabilityModel = $mutabilityModel"
+echo "clonal = $clonal"
+echo "fixIndels = $fixIndels"
+echo "region = $region"
+echo "inputs = ${inputs[@]}"
+echo "IDs = ${IDs[@]}"
+echo "ref = $ref"
+echo "output = $output"
+echo "outID = $outID"
+
+fasta="$PWD/baseline.fasta"
+
+
+count=0
+for current in ${inputs[@]}
+do
+	f=$(file $current)
+	zipType="Zip archive"
+	if [[ "$f" == *"Zip archive"* ]] || [[ "$f" == *"XZ compressed data"* ]]
+	then
+		id=${IDs[$count]}
+		echo "id=$id"
+		if [[ "$f" == *"Zip archive"* ]] ; then
+			echo "Zip archive"
+			echo "unzip $input -d $PWD/files/"
+			unzip $current -d "$PWD/$id/"
+		elif [[ "$f" == *"XZ compressed data"* ]] ; then
+			echo "ZX archive"
+			echo "tar -xJf $input -C $PWD/files/"
+			mkdir -p "$PWD/$id/files"
+			tar -xJf $current -C "$PWD/$id/files/"
+		fi
+		filtered="$PWD/filtered_${id}.txt"
+		imgt_1_file="`find $PWD/$id -name '1_*.txt'`"
+		imgt_2_file="`find $PWD/$id -name '2_*.txt'`"
+		echo "1_Summary file: ${imgt_1_file}"
+		echo "2_IMGT-gapped file: ${imgt_2_file}"
+		echo "filter.r for $id"
+		Rscript $dir/filter.r ${imgt_1_file} ${imgt_2_file} "$selection" $filtered 2>&1
+		
+		final="$PWD/final_${id}.txt"
+		cat $filtered | cut -f2,4,7 > $final
+		python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id
+	else
+		python $dir/script_xlsx.py --input $current --ref $ref --output $fasta
+	fi
+	count=$((count+1))
+done
+workdir="$PWD"
+cd $dir
+echo "file: ${inputs[0]}"
+#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1
+Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1
+
+echo "$workdir/${outID}.txt"
+
+rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
+rows=($rows)
+#unset rows[${#rows[@]}-1]
+
+cd $dir
+Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1
+cp $workdir/result.txt ${output_table}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/change_o/change_o_url.txt	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,1 @@
+https://changeo.readthedocs.io/en/version-0.4.4/
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/change_o/define_clones.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,15 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input=args[1]
+output=args[2]
+
+change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F)
+
+freq = data.frame(table(change.o$CLONE))
+freq2 = data.frame(table(freq$Freq))
+
+freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1))
+
+names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences")
+
+write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/change_o/define_clones.sh	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,39 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+#define_clones.sh $input $noparse $scores $regions $out_file
+
+type=$1
+input=$2
+
+mkdir -p $PWD/outdir
+
+cp $input $PWD/input.tab #file has to have a ".tab" extension
+
+if [ "bygroup" == "$type" ] ; then	
+	mode=$3
+	act=$4
+	model=$5
+	norm=$6
+	sym=$7
+	link=$8
+	dist=$9
+	output=${10}
+	output2=${11}
+	
+	DefineClones.py -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link
+	
+	Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
+else
+	method=$3
+	output=$4
+	output2=$5
+	
+	DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method
+	
+	Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1
+fi
+
+cp $PWD/outdir/output_clone-pass.tab $output
+
+rm -rf $PWD/outdir/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/change_o/makedb.sh	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,36 @@
+#!/bin/bash
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+input=$1
+noparse=$2
+scores=$3
+regions=$4
+output=$5
+
+if [ "true" == "$noparse" ] ; then
+	noparse="--noparse"
+else
+	noparse=""
+fi
+
+if [ "true" == "$scores" ] ; then
+	scores="--scores"
+else
+	scores=""
+fi
+
+if [ "true" == "$regions" ] ; then
+	regions="--regions"
+else
+	regions=""
+fi
+
+mkdir $PWD/outdir
+
+echo "makedb: $PWD/outdir"
+
+MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions
+
+mv $PWD/outdir/output_db-pass.tab $output
+
+rm -rf $PWD/outdir/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/change_o/select_first_in_clone.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,16 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input.file = args[1]
+output.file = args[2]
+
+print("select_in_first_clone.r")
+print(input.file)
+print(output.file)
+
+input = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+
+input = input[!duplicated(input$CLONE),]
+
+names(input)[1] = "Sequence.ID"
+
+write.table(input, output.file, quote=F, sep="\t", row.names=F, col.names=T, na="")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/check_unique_id.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,25 @@
+args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the 
+
+current_file = args[1]
+
+current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F)
+
+if(!("Sequence number" %in% names(current))){
+	stop("First argument doesn't contain the 'Sequence number' column")
+}
+
+tbl = table(current[,"Sequence ID"])
+l_tbl = length(tbl)
+check = any(tbl > 1)
+
+#if(l_tbl != nrow(current)){ # non unique IDs?
+if(check){
+	print("Sequence.ID is not unique for every sequence, adding sequence number to IDs")
+	for(i in 1:length(args)){
+		current_file = args[i]
+		print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file))
+		current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F)
+		current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_")
+		write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T)
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/datatypes_conf.xml	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<datatypes>
+    <registration>
+        <datatype extension="imgt_archive" type="galaxy.datatypes.binary:CompressedArchive" display_in_upload="True" subclass="True"/>
+    </registration>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/gene_identification.py	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,226 @@
+import re
+import argparse
+import time
+starttime= int(time.time() * 1000)
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file")
+parser.add_argument("--output", help="The annotated output file to be merged back with the summary file")
+
+args = parser.parse_args()
+
+infile = args.input
+#infile = "test_VH-Ca_Cg_25nt/1_Summary_test_VH-Ca_Cg_25nt_241013.txt"
+output = args.output
+#outfile = "identified.txt"
+
+dic = dict()
+total = 0
+
+
+first = True
+IDIndex = 0
+seqIndex = 0
+
+with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence
+	for line in f:
+		total += 1
+		linesplt = line.split("\t")
+		if first:
+			print "linesplt", linesplt
+			IDIndex = linesplt.index("Sequence ID")
+			seqIndex = linesplt.index("Sequence")
+			first = False
+			continue
+		
+		ID = linesplt[IDIndex]
+		if len(linesplt) < 28: #weird rows without a sequence
+			dic[ID] = ""
+		else:
+			dic[ID] = linesplt[seqIndex]
+			
+print "Number of input sequences:", len(dic)
+
+#old cm sequence: gggagtgcatccgccccaacccttttccccctcgtctcctgtgagaattccc
+#old cg sequence: ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggccctgggctgcctggtcaaggactacttccccgaaccggtgacggtgtcgtggaactcaggcgccctgaccag
+
+#lambda/kappa reference sequence
+searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg",
+                 "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc",
+                 "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc",
+                 "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence
+
+compiledregex = {"ca": [],
+                 "cg": [],
+                 "ce": [],
+                 "cm": []}
+
+#lambda/kappa reference sequence variable nucleotides
+ca1 = {38: 't', 39: 'g', 48: 'a', 49: 'g', 51: 'c', 68: 'a', 73: 'c'}
+ca2 = {38: 'g', 39: 'a', 48: 'c', 49: 'c', 51: 'a', 68: 'g', 73: 'a'}
+cg1 = {0: 'c', 33: 'a', 38: 'c', 44: 'a', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'}
+cg2 = {0: 'c', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'g', 132: 't'}
+cg3 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'}
+cg4 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'c', 132: 'c'}
+
+#remove last snp for shorter cg sequence --- note, also change varsInCG
+del cg1[132]
+del cg2[132]
+del cg3[132]
+del cg4[132]
+
+#reference sequences are cut into smaller parts of 'chunklength' length, and with 'chunklength' / 2 overlap
+chunklength = 8
+
+#create the chunks of the reference sequence with regular expressions for the variable nucleotides
+for i in range(0, len(searchstrings["ca"]) - chunklength, chunklength / 2):
+  pos = i
+  chunk = searchstrings["ca"][i:i+chunklength]
+  result = ""
+  varsInResult = 0
+  for c in chunk:
+    if pos in ca1.keys():
+      varsInResult += 1
+      result += "[" + ca1[pos] + ca2[pos] + "]"
+    else:
+      result += c
+    pos += 1
+  compiledregex["ca"].append((re.compile(result), varsInResult))
+
+for i in range(0, len(searchstrings["cg"]) - chunklength, chunklength / 2):
+  pos = i
+  chunk = searchstrings["cg"][i:i+chunklength]
+  result = ""
+  varsInResult = 0
+  for c in chunk:
+    if pos in cg1.keys():
+      varsInResult += 1
+      result += "[" + "".join(set([cg1[pos], cg2[pos], cg3[pos], cg4[pos]])) + "]"
+    else:
+      result += c
+    pos += 1
+  compiledregex["cg"].append((re.compile(result), varsInResult))
+
+for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2):
+  compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False))
+
+for i in range(0, len(searchstrings["ce"]) - chunklength + 1, chunklength / 2):
+  compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False))
+
+def removeAndReturnMaxIndex(x): #simplifies a list comprehension
+  m = max(x)
+  index = x.index(m)
+  x[index] = 0
+  return index
+  
+
+start_location = dict()
+hits = dict()
+alltotal = 0
+for key in compiledregex.keys(): #for ca/cg/cm/ce
+	regularexpressions = compiledregex[key] #get the compiled regular expressions
+	for ID in dic.keys()[0:]: #for every ID
+		if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists
+			hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0}
+		currentIDHits = hits[ID]
+		seq = dic[ID]
+		lastindex = 0
+		start_zero = len(searchstrings[key]) #allows the reference sequence to start before search sequence (start_locations of < 0)
+		start = [0] * (len(seq) + start_zero)
+		for i, regexp in enumerate(regularexpressions): #for every regular expression
+			relativeStartLocation = lastindex - (chunklength / 2) * i
+			if relativeStartLocation >= len(seq):
+				break
+			regex, hasVar = regexp
+			matches = regex.finditer(seq[lastindex:])
+			for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop
+				lastindex += match.start()
+				start[relativeStartLocation + start_zero] += 1
+				if hasVar: #if the regex has a variable nt in it
+					chunkstart = chunklength / 2 * i #where in the reference does this chunk start
+					chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end
+					if key == "ca": #just calculate the variable nt score for 'ca', cheaper
+						currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]])
+						currentIDHits["ca2"] += len([1 for x in ca2 if chunkstart <= x < chunkend and ca2[x] == seq[lastindex + x - chunkstart]])
+					elif key == "cg": #just calculate the variable nt score for 'cg', cheaper
+						currentIDHits["cg1"] += len([1 for x in cg1 if chunkstart <= x < chunkend and cg1[x] == seq[lastindex + x - chunkstart]])
+						currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]])
+						currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]])
+						currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]])
+					else: #key == "cm" #no variable regions in 'cm' or 'ce'
+						pass
+				break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped
+			else: #only runs if there were no hits
+				continue
+			#print "found ", regex.pattern , "at", lastindex, "adding one to", (lastindex - chunklength / 2 * i), "to the start array of", ID, "gene", key, "it's now:", start[lastindex - chunklength / 2 * i]
+			currentIDHits[key + "_hits"] += 1
+		start_location[ID + "_" + key] = str([(removeAndReturnMaxIndex(start) + 1 - start_zero) for x in range(5) if len(start) > 0 and max(start) > 1])
+		#start_location[ID + "_" + key] = str(start.index(max(start)))
+
+
+varsInCA = float(len(ca1.keys()) * 2)
+varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice
+varsInCM = 0
+varsInCE = 0
+
+def round_int(val):
+	return int(round(val))
+
+first = True
+seq_write_count=0
+with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence
+	with open(output, 'w') as o:
+		for line in f:
+			total += 1
+			if first:
+				o.write("Sequence ID\tbest_match\tnt_hit_percentage\tchunk_hit_percentage\tstart_locations\n")
+				first = False
+				continue
+			linesplt = line.split("\t")
+			if linesplt[2] == "No results":
+				pass
+			ID = linesplt[1]
+			currentIDHits = hits[ID]
+			possibleca = float(len(compiledregex["ca"]))
+			possiblecg = float(len(compiledregex["cg"]))
+			possiblecm = float(len(compiledregex["cm"]))
+			possiblece = float(len(compiledregex["ce"]))
+			cahits = currentIDHits["ca_hits"]
+			cghits = currentIDHits["cg_hits"]
+			cmhits = currentIDHits["cm_hits"]
+			cehits = currentIDHits["ce_hits"]
+			if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene
+				ca1hits = currentIDHits["ca1"]
+				ca2hits = currentIDHits["ca2"]
+				if ca1hits >= ca2hits:
+					o.write(ID + "\tIGA1\t" + str(round_int(ca1hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n")
+				else:
+					o.write(ID + "\tIGA2\t" + str(round_int(ca2hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n")
+			elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene
+				cg1hits = currentIDHits["cg1"]
+				cg2hits = currentIDHits["cg2"]
+				cg3hits = currentIDHits["cg3"]
+				cg4hits = currentIDHits["cg4"]
+				if cg1hits >= cg2hits and cg1hits >= cg3hits and cg1hits >= cg4hits: #cg1 gene
+					o.write(ID + "\tIGG1\t" + str(round_int(cg1hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
+				elif cg2hits >= cg1hits and cg2hits >= cg3hits and cg2hits >= cg4hits: #cg2 gene
+					o.write(ID + "\tIGG2\t" + str(round_int(cg2hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
+				elif cg3hits >= cg1hits and cg3hits >= cg2hits and cg3hits >= cg4hits: #cg3 gene
+					o.write(ID + "\tIGG3\t" + str(round_int(cg3hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
+				else: #cg4 gene
+					o.write(ID + "\tIGG4\t" + str(round_int(cg4hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n")
+			else: #its a cm or ce gene
+				if cmhits >= cehits:
+					o.write(ID + "\tIGM\t100\t" + str(round_int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n")
+				else:
+					o.write(ID + "\tIGE\t100\t" + str(round_int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n")
+			seq_write_count += 1
+
+print "Time: %i" % (int(time.time() * 1000) - starttime)
+
+print "Number of sequences written to file:", seq_write_count
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/imgt_loader.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,98 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+summ.file = args[1]
+aa.file = args[2]
+junction.file = args[3]
+out.file = args[4]
+
+summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T)
+aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T)
+junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T)
+
+fix_column_names = function(df){
+    if("V.DOMAIN.Functionality" %in% names(df)){
+        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"
+        print("found V.DOMAIN.Functionality, changed")
+    }
+    if("V.DOMAIN.Functionality.comment" %in% names(df)){
+        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"
+        print("found V.DOMAIN.Functionality.comment, changed")
+    }
+    return(df)
+}
+
+summ = fix_column_names(summ)
+aa = fix_column_names(aa)
+junction = fix_column_names(junction)
+
+old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation')
+old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT')
+old_junction_columns=c('JUNCTION')
+
+added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence')
+added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT')
+
+added_junction_columns=c('P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION')
+added_junction_columns=c(added_junction_columns, 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')
+
+out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")]
+
+out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"]
+out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"]
+
+out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"]
+out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"]
+
+out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"]
+out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"]
+
+out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"]
+out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"]))
+out[,"Strand"] = summ[,"Orientation"]
+out[,"CDR3.Found.How"] = "a"
+
+out[,added_summary_columns] = summ[,added_summary_columns]
+
+out[,added_sequence_columns] = aa[,added_sequence_columns]
+
+out[,added_junction_columns] = junction[,added_junction_columns]
+
+out[,"Top V Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"]))
+out[,"Top D Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"]))
+out[,"Top J Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"]))
+
+out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')]
+
+names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')
+
+out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"])
+
+fltr = out[,"VDJ Frame"] == "in-frame"
+if(any(fltr, na.rm = T)){
+	out[fltr, "VDJ Frame"] = "In-frame"
+}
+
+fltr = out[,"VDJ Frame"] == "null"
+if(any(fltr, na.rm = T)){
+	out[fltr, "VDJ Frame"] = "Out-of-frame"
+}
+
+fltr = out[,"VDJ Frame"] == "out-of-frame"
+if(any(fltr, na.rm = T)){
+	out[fltr, "VDJ Frame"] = "Out-of-frame"
+}
+
+fltr = out[,"VDJ Frame"] == ""
+if(any(fltr, na.rm = T)){
+	out[fltr, "VDJ Frame"] = "Out-of-frame"
+}
+
+for(col in c('Top V Gene','Top D Gene','Top J Gene')){
+	out[,col] = as.character(out[,col])
+	fltr = out[,col] == ""
+	if(any(fltr, na.rm = T)){
+		out[fltr,col] = "NA"
+	}
+}
+
+write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/merge.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,27 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input.1 = args[1]
+input.2 = args[2]
+
+fields.1 = args[3]
+fields.2 = args[4]
+
+field.1 = args[5]
+field.2 = args[6]
+
+output = args[7]
+
+dat1 = read.table(input.1, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL)
+if(fields.1 != "all"){
+	fields.1 = unlist(strsplit(fields.1, ","))
+	dat1 = dat1[,fields.1]
+}
+dat2 = read.table(input.2, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL)
+if(fields.2 != "all"){
+	fields.2 = unlist(strsplit(fields.2, ","))
+	dat2 = dat2[,fields.2]
+}
+
+dat3 = merge(dat1, dat2, by.x=field.1, by.y=field.2)
+
+write.table(dat3, output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/merge_and_filter.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,304 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+
+summaryfile = args[1]
+sequencesfile = args[2]
+mutationanalysisfile = args[3]
+mutationstatsfile = args[4]
+hotspotsfile = args[5]
+aafile = args[6]
+gene_identification_file= args[7]
+output = args[8]
+before.unique.file = args[9]
+unmatchedfile = args[10]
+method=args[11]
+functionality=args[12]
+unique.type=args[13]
+filter.unique=args[14]
+filter.unique.count=as.numeric(args[15])
+class.filter=args[16]
+empty.region.filter=args[17]
+
+print(paste("filter.unique.count:", filter.unique.count))
+
+summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+
+fix_column_names = function(df){
+    if("V.DOMAIN.Functionality" %in% names(df)){
+        names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality"
+        print("found V.DOMAIN.Functionality, changed")
+    }
+    if("V.DOMAIN.Functionality.comment" %in% names(df)){
+        names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment"
+        print("found V.DOMAIN.Functionality.comment, changed")
+    }
+    return(df)
+}
+
+fix_non_unique_ids = function(df){
+	df$Sequence.ID = paste(df$Sequence.ID, 1:nrow(df))
+	return(df)
+}
+
+summ = fix_column_names(summ)
+sequences = fix_column_names(sequences)
+mutationanalysis = fix_column_names(mutationanalysis)
+mutationstats = fix_column_names(mutationstats)
+hotspots = fix_column_names(hotspots)
+AAs = fix_column_names(AAs)
+
+if(method == "blastn"){
+	#"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore"
+	gene_identification = gene_identification[!duplicated(gene_identification$qseqid),]
+	ref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52))
+	gene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T)
+	gene_identification$chunk_hit_percentage = (gene_identification$length / gene_identification$ref.length) * 100
+	gene_identification = gene_identification[,c("qseqid", "chunk_hit_percentage", "pident", "qstart", "sseqid")]
+	colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")
+}
+
+#print("Summary analysis files columns")
+#print(names(summ))
+
+
+
+input.sequence.count = nrow(summ)
+print(paste("Number of sequences in summary file:", input.sequence.count))
+
+filtering.steps = data.frame(character(0), numeric(0))
+
+filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count))
+
+filtering.steps[,1] = as.character(filtering.steps[,1])
+filtering.steps[,2] = as.character(filtering.steps[,2])
+#filtering.steps[,3] = as.numeric(filtering.steps[,3])
+
+#print("summary files columns")
+#print(names(summ))
+
+summ = merge(summ, gene_identification, by="Sequence.ID")
+
+print(paste("Number of sequences after merging with gene identification:", nrow(summ)))
+
+summ = summ[summ$Functionality != "No results",]
+
+print(paste("Number of sequences after 'No results' filter:", nrow(summ)))
+
+filtering.steps = rbind(filtering.steps, c("After 'No results' filter", nrow(summ)))
+
+if(functionality == "productive"){
+	summ = summ[summ$Functionality == "productive (see comment)" | summ$Functionality == "productive",]
+} else if (functionality == "unproductive"){
+	summ = summ[summ$Functionality == "unproductive (see comment)" | summ$Functionality == "unproductive",]
+} else if (functionality == "remove_unknown"){
+	summ = summ[summ$Functionality != "No results" & summ$Functionality != "unknown (see comment)" & summ$Functionality != "unknown",]
+}
+
+print(paste("Number of sequences after functionality filter:", nrow(summ)))
+
+filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
+
+if(F){ #to speed up debugging
+    set.seed(1)
+    summ = summ[sample(nrow(summ), floor(nrow(summ) * 0.03)),]
+    print(paste("Number of sequences after sampling 3%:", nrow(summ)))
+
+    filtering.steps = rbind(filtering.steps, c("Number of sequences after sampling 3%", nrow(summ)))
+}
+
+print("mutation analysis files columns")
+print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
+
+result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
+
+print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
+
+#print("mutation stats files columns")
+#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
+
+result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
+
+print(paste("Number of sequences after merging with mutation stats file:", nrow(result)))
+
+print("hotspots files columns")
+print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
+
+result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")
+
+print(paste("Number of sequences after merging with hotspots file:", nrow(result)))
+
+print("sequences files columns")
+print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT"))
+
+sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")]
+names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
+result = merge(result, sequences, by="Sequence.ID", all.x=T)
+
+AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
+names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
+result = merge(result, AAs, by="Sequence.ID", all.x=T)
+
+print(paste("Number of sequences in result after merging with sequences:", nrow(result)))
+
+result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele)
+result$VGene = gsub("[*].*", "", result$VGene)
+result$DGene = gsub("^Homsap ", "", result$D.GENE.and.allele)
+result$DGene = gsub("[*].*", "", result$DGene)
+result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele)
+result$JGene = gsub("[*].*", "", result$JGene)
+
+splt = strsplit(class.filter, "_")[[1]]
+chunk_hit_threshold = as.numeric(splt[1])
+nt_hit_threshold = as.numeric(splt[2])
+
+higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold)
+
+if(!all(higher_than, na.rm=T)){ #check for no unmatched
+	result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"])
+}
+
+if(class.filter == "101_101"){
+	result$best_match = "all"
+}
+
+write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
+
+print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "", na.rm=T)))
+print(paste("Number of empty FR2 sequences:", sum(result$FR2.IMGT.seq == "", na.rm=T)))
+print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == "", na.rm=T)))
+print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == "", na.rm=T)))
+
+if(empty.region.filter == "leader"){
+	result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+} else if(empty.region.filter == "FR1"){
+	result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+} else if(empty.region.filter == "CDR1"){
+	result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+} else if(empty.region.filter == "FR2"){
+	result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
+}
+
+print(paste("After removal sequences that are missing a gene region:", nrow(result)))
+filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result)))
+
+if(empty.region.filter == "leader"){
+	result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
+} else if(empty.region.filter == "FR1"){
+	result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
+} else if(empty.region.filter == "CDR1"){
+	result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
+} else if(empty.region.filter == "FR2"){
+	result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
+}
+
+print(paste("Number of sequences in result after n filtering:", nrow(result)))
+filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result)))
+
+cleanup_columns = c("FR1.IMGT.Nb.of.mutations", 
+                    "CDR1.IMGT.Nb.of.mutations", 
+                    "FR2.IMGT.Nb.of.mutations", 
+                    "CDR2.IMGT.Nb.of.mutations", 
+                    "FR3.IMGT.Nb.of.mutations")
+
+for(col in cleanup_columns){
+  result[,col] = gsub("\\(.*\\)", "", result[,col])
+  result[,col] = as.numeric(result[,col])
+  result[is.na(result[,col]),] = 0
+}
+
+write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T)
+
+
+if(filter.unique != "no"){
+	clmns = names(result)
+	if(filter.unique == "remove_vjaa"){
+		result$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA)
+	} else if(empty.region.filter == "leader"){
+		result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	} else if(empty.region.filter == "FR1"){
+		result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	} else if(empty.region.filter == "CDR1"){
+		result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	} else if(empty.region.filter == "FR2"){
+		result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
+	}
+	
+	if(grepl("remove", filter.unique)){
+		result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
+		unique.defs = data.frame(table(result$unique.def))
+		unique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,]
+		result = result[result$unique.def %in% unique.defs$Var1,]
+	}
+
+	if(filter.unique != "remove_vjaa"){
+		result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it
+	}
+
+	result = result[!duplicated(result$unique.def),]
+}
+
+write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T)
+
+filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
+
+print(paste("Number of sequences in result after unique filtering:", nrow(result)))
+
+if(nrow(summ) == 0){
+	stop("No data remaining after filter")
+}
+
+result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it
+
+#result$past = ""
+#cls = unlist(strsplit(unique.type, ","))
+#for (i in 1:nrow(result)){
+#	result[i,"past"] = paste(result[i,cls], collapse=":")
+#}
+
+
+
+result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":"))
+
+result.matched = result[!grepl("unmatched", result$best_match),]
+result.unmatched = result[grepl("unmatched", result$best_match),]
+
+result = rbind(result.matched, result.unmatched)
+
+result = result[!(duplicated(result$past)), ]
+
+result = result[,!(names(result) %in% c("past", "best_match_class"))]
+
+print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))
+
+filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result)))
+
+unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
+
+print(paste("Number of rows in result:", nrow(result)))
+print(paste("Number of rows in unmatched:", nrow(unmatched)))
+
+matched.sequences = result[!grepl("^unmatched", result$best_match),]
+
+write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
+
+matched.sequences.count = nrow(matched.sequences)
+unmatched.sequences.count = sum(grepl("^unmatched", result$best_match))
+if(matched.sequences.count <= unmatched.sequences.count){
+	print("WARNING NO MATCHED (SUB)CLASS SEQUENCES!!")
+}
+
+filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count))
+filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count))
+filtering.steps[,2] = as.numeric(filtering.steps[,2])
+filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2)
+
+write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\t",quote=F,row.names=F,col.names=F)
+
+write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T)
+write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/mutation_column_checker.py	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,27 @@
+import re
+
+mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
+
+with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle:
+    first = True
+    fr3_index = -1
+    for i, line in enumerate(file_handle):
+        line_split = line.split("\t")
+        if first:
+            fr3_index = line_split.index("FR3-IMGT")
+            first = False
+            continue
+
+        if len(line_split) < fr3_index:
+            continue
+        
+        fr3_data = line_split[fr3_index]
+        if len(fr3_data) > 5:
+            try:
+                test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x]
+            except:
+                print(line_split[1])
+                print("Something went wrong at line {line} with:".format(line=line_split[0]))
+                #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)])
+        if i % 100000 == 0:
+            print(i)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/naive_output.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,45 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+naive.file = args[1]
+shm.file = args[2]
+output.file.ca = args[3]
+output.file.cg = args[4]
+output.file.cm = args[5]
+
+naive = read.table(naive.file, sep="\t", header=T, quote="", fill=T)
+shm.merge = read.table(shm.file, sep="\t", header=T, quote="", fill=T)
+
+
+final = merge(naive, shm.merge[,c("Sequence.ID", "best_match")], by.x="ID", by.y="Sequence.ID")
+print(paste("nrow final:", nrow(final)))
+names(final)[names(final) == "best_match"] = "Sample"
+final.numeric = final[,sapply(final, is.numeric)]
+final.numeric[is.na(final.numeric)] = 0
+final[,sapply(final, is.numeric)] = final.numeric
+
+final.ca = final[grepl("^ca", final$Sample),]
+final.cg = final[grepl("^cg", final$Sample),]
+final.cm = final[grepl("^cm", final$Sample),]
+
+if(nrow(final.ca) > 0){
+	final.ca$Replicate = 1
+}
+
+if(nrow(final.cg) > 0){
+	final.cg$Replicate = 1
+}
+
+if(nrow(final.cm) > 0){
+	final.cm$Replicate = 1
+}
+
+#print(paste("nrow final:", nrow(final)))
+#final2 = final
+#final2$Sample = gsub("[0-9]", "", final2$Sample)
+#final = rbind(final, final2)
+#final$Replicate = 1
+
+write.table(final.ca, output.file.ca, quote=F, sep="\t", row.names=F, col.names=T)
+write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T)
+write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/new_imgt.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,40 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+imgt.dir = args[1]
+merged.file = args[2]
+gene = args[3]
+
+merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="")
+
+if(!("Sequence.ID" %in% names(merged))){ #change-o db
+	print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'")
+	names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID"
+}
+
+if(gene != "-"){
+	merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
+}
+
+if("best_match" %in% names(merged)){
+	merged = merged[!grepl("unmatched", merged$best_match),]
+}
+
+nrow_dat = 0
+
+for(f in list.files(imgt.dir, pattern="*.txt$")){
+	#print(paste("filtering", f))
+	path = file.path(imgt.dir, f)
+	dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="")
+	
+	dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
+	
+	nrow_dat = nrow(dat)
+	
+	if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
+		dat[,grepl("^FR1", names(dat))] = 0
+	}
+	
+	write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
+}
+
+print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/pattern_plots.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,178 @@
+library(ggplot2)
+library(reshape2)
+library(scales)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
+
+plot1.path = args[2]
+plot1.png = paste(plot1.path, ".png", sep="")
+plot1.txt = paste(plot1.path, ".txt", sep="")
+plot1.pdf = paste(plot1.path, ".pdf", sep="")
+
+plot2.path = args[3]
+plot2.png = paste(plot2.path, ".png", sep="")
+plot2.txt = paste(plot2.path, ".txt", sep="")
+plot2.pdf = paste(plot2.path, ".pdf", sep="")
+
+plot3.path = args[4]
+plot3.png = paste(plot3.path, ".png", sep="")
+plot3.txt = paste(plot3.path, ".txt", sep="")
+plot3.pdf = paste(plot3.path, ".pdf", sep="")
+
+clean.output = args[5]
+
+dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
+
+classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE")
+xyz = c("x", "y", "z")
+new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
+
+names(dat) = new.names
+
+clean.dat = dat
+clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))]
+
+write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA)
+
+dat["RGYW.WRCY",] = colSums(dat[c(14,15),], na.rm=T)
+dat["TW.WA",] = colSums(dat[c(16,17),], na.rm=T)
+
+data1 = dat[c("RGYW.WRCY", "TW.WA"),]
+
+data1 = data1[,names(data1)[grepl(".z", names(data1))]]
+names(data1) = gsub("\\..*", "", names(data1))
+
+data1 = melt(t(data1))
+
+names(data1) = c("Class", "Type", "value")
+
+chk = is.na(data1$value)
+if(any(chk)){
+	data1[chk, "value"] = 0
+}
+
+data1 = data1[order(data1$Type),]
+
+write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+
+p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives")
+p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4"))
+#p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4"))
+png(filename=plot1.png, width=510, height=300)
+print(p)
+dev.off()
+
+ggsave(plot1.pdf, p)
+
+data2 = dat[c(1, 5:8),]
+
+data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
+names(data2) = gsub(".x", "", names(data2))
+
+data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]]
+
+data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1)
+
+data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]]
+data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1)
+
+data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
+data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
+data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
+data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
+
+data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),]))
+
+names(data2) = c("Class", "Type", "value")
+
+chk = is.na(data2$value)
+if(any(chk)){
+	data2[chk, "value"] = 0
+}
+
+data2 = data2[order(data2$Type),]
+
+write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+
+p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns")
+p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
+#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
+png(filename=plot2.png, width=480, height=300)
+print(p)
+dev.off()
+
+ggsave(plot2.pdf, p)
+
+data3 = dat[c(5, 6, 8, 18:21),]
+data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
+names(data3) = gsub(".x", "", names(data3))
+
+data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1)
+
+data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1)
+
+data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1)
+
+data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
+data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
+
+data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
+data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
+
+data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
+data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
+
+data3 = melt(t(data3[8:10,]))
+names(data3) = c("Class", "Type", "value")
+
+chk = is.na(data3$value)
+if(any(chk)){
+	data3[chk, "value"] = 0
+}
+
+data3 = data3[order(data3$Type),]
+
+write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
+
+p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns")
+p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white"))
+#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black"))
+png(filename=plot3.png, width=480, height=300)
+print(p)
+dev.off()
+
+ggsave(plot3.pdf, p)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/plot_pdf.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,17 @@
+library(ggplot2)
+
+args <- commandArgs(trailingOnly = TRUE)
+print(args)
+
+input = args[1]
+outputdir = args[2]
+setwd(outputdir)
+
+load(input)
+
+print(names(pdfplots))
+
+for(n in names(pdfplots)){
+    print(paste("n:", n))
+    ggsave(pdfplots[[n]], file=n)
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/sequence_overview.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,363 @@
+library(reshape2)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+before.unique.file = args[1]
+merged.file = args[2]
+outputdir = args[3]
+gene.classes = unlist(strsplit(args[4], ","))
+hotspot.analysis.sum.file = args[5]
+NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
+NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
+main.html = "index.html"
+empty.region.filter = args[6]
+
+
+setwd(outputdir)
+
+before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
+
+#before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
+
+if(empty.region.filter == "leader"){
+	before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+} else if(empty.region.filter == "FR1"){
+	before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+} else if(empty.region.filter == "CDR1"){
+	before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+} else if(empty.region.filter == "FR2"){
+	before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+}
+
+IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
+IDs$best_match = as.character(IDs$best_match)
+
+dat = data.frame(table(before.unique$seq_conc))
+
+names(dat) = c("seq_conc", "Freq")
+
+dat$seq_conc = factor(dat$seq_conc)
+
+dat = dat[order(as.character(dat$seq_conc)),]
+
+#writing html from R...
+get.bg.color = function(val){
+	if(val %in% c("TRUE", "FALSE", "T", "F")){ #if its a logical value, give the background a green/red color
+		return(ifelse(val,"#eafaf1","#f9ebea"))
+	} else if (!is.na(as.numeric(val))) { #if its a numerical value, give it a grey tint if its >0
+		return(ifelse(val > 0,"#eaecee","white"))
+	} else {
+		return("white")
+	}
+}
+td = function(val) {
+  return(paste("<td bgcolor='", get.bg.color(val), "'>", val, "</td>", sep=""))
+}
+tr = function(val) { 
+	return(paste(c("<tr>", sapply(val, td), "</tr>"), collapse="")) 
+}
+
+make.link = function(id, clss, val) { 
+	paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") 
+}
+tbl = function(df) {
+	res = "<table border='1'>"
+	for(i in 1:nrow(df)){ 
+		res = paste(res, tr(df[i,]), sep="")
+	}
+	res = paste(res, "</table>")
+}
+
+cat("<center><img src=''> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences occuring more than once are included. </center>", file=main.html, append=F)
+cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T)
+
+if(empty.region.filter == "leader"){
+	cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+} else if(empty.region.filter == "FR1"){
+	cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+} else if(empty.region.filter == "CDR1"){
+	cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+} else if(empty.region.filter == "FR2"){
+	cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
+}
+
+cat("<tr>", file=main.html, append=T)
+cat("<th>Sequence</th><th>Functionality</th><th>IGA1</th><th>IGA2</th><th>IGG1</th><th>IGG2</th><th>IGG3</th><th>IGG4</th><th>IGM</th><th>IGE</th><th>UN</th>", file=main.html, append=T)
+cat("<th>total IGA</th><th>total IGG</th><th>total IGM</th><th>total IGE</th><th>number of subclasses</th><th>present in both IGA and IGG</th><th>present in IGA, IGG and IGM</th><th>present in IGA, IGG and IGE</th><th>present in IGA, IGG, IGM and IGE</th><th>IGA1+IGA2</th>", file=main.html, append=T)
+cat("<th>IGG1+IGG2</th><th>IGG1+IGG3</th><th>IGG1+IGG4</th><th>IGG2+IGG3</th><th>IGG2+IGG4</th><th>IGG3+IGG4</th>", file=main.html, append=T)
+cat("<th>IGG1+IGG2+IGG3</th><th>IGG2+IGG3+IGG4</th><th>IGG1+IGG2+IGG4</th><th>IGG1+IGG3+IGG4</th><th>IGG1+IGG2+IGG3+IGG4</th>", file=main.html, append=T)
+cat("</tr>", file=main.html, append=T)
+
+
+
+single.sequences=0 #sequence only found once, skipped
+in.multiple=0 #same sequence across multiple subclasses
+multiple.in.one=0 #same sequence multiple times in one subclass
+unmatched=0 #all of the sequences are unmatched
+some.unmatched=0 #one or more sequences in a clone are unmatched
+matched=0 #should be the same als matched sequences
+
+sequence.id.page="by_id.html"
+
+for(i in 1:nrow(dat)){
+	
+	ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA1", IDs$best_match),]
+	ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA2", IDs$best_match),]
+	
+	cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG1", IDs$best_match),]
+	cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG2", IDs$best_match),]
+	cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG3", IDs$best_match),]
+	cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG4", IDs$best_match),]
+	
+	cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGM", IDs$best_match),]
+	
+	ce = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGE", IDs$best_match),]
+	
+	un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),]
+	
+	allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, ce, un)
+	
+	ca1.n = nrow(ca1)
+	ca2.n = nrow(ca2)
+	
+	cg1.n = nrow(cg1)
+	cg2.n = nrow(cg2)
+	cg3.n = nrow(cg3)
+	cg4.n = nrow(cg4)
+	
+	cm.n = nrow(cm)
+	
+	ce.n = nrow(ce)
+	
+	un.n = nrow(un)
+	
+	classes = c(ca1.n, ca2.n, cg1.n, cg2.n, cg3.n, cg4.n, cm.n, ce.n, un.n)
+	
+	classes.sum = sum(classes)
+	
+	if(classes.sum == 1){
+		single.sequences = single.sequences + 1
+		next
+	}
+	
+	if(un.n == classes.sum){
+		unmatched = unmatched + 1
+		next
+	}
+	
+	classes.no.un = classes[-length(classes)]
+	
+	in.classes = sum(classes.no.un > 0)
+	
+	matched = matched + in.classes #count in how many subclasses the sequence occurs.
+	
+	if(any(classes == classes.sum)){
+		multiple.in.one = multiple.in.one + 1
+	} else if (un.n > 0) {
+		some.unmatched = some.unmatched + 1
+	} else {
+		in.multiple = in.multiple + 1
+	}
+	
+	id = as.numeric(dat[i,"seq_conc"])
+	
+	functionality = paste(unique(allc[,"Functionality"]), collapse=",")
+	
+	by.id.row = c()
+	
+	if(ca1.n > 0){
+		cat(tbl(ca1), file=paste("IGA1_", id, ".html", sep=""))
+	}
+
+	if(ca2.n > 0){
+		cat(tbl(ca2), file=paste("IGA2_", id, ".html", sep=""))
+	}
+
+	if(cg1.n > 0){
+		cat(tbl(cg1), file=paste("IGG1_", id, ".html", sep=""))
+	}
+
+	if(cg2.n > 0){
+		cat(tbl(cg2), file=paste("IGG2_", id, ".html", sep=""))
+	}
+
+	if(cg3.n > 0){
+		cat(tbl(cg3), file=paste("IGG3_", id, ".html", sep=""))
+	}
+
+	if(cg4.n > 0){
+		cat(tbl(cg4), file=paste("IGG4_", id, ".html", sep=""))
+	}
+
+	if(cm.n > 0){
+		cat(tbl(cm), file=paste("IGM_", id, ".html", sep=""))
+	}
+
+	if(ce.n > 0){
+		cat(tbl(ce), file=paste("IGE_", id, ".html", sep=""))
+	}
+
+	if(un.n > 0){
+		cat(tbl(un), file=paste("un_", id, ".html", sep=""))
+	}
+	
+	ca1.html = make.link(id, "IGA1", ca1.n)
+	ca2.html = make.link(id, "IGA2", ca2.n)
+	
+	cg1.html = make.link(id, "IGG1", cg1.n)
+	cg2.html = make.link(id, "IGG2", cg2.n)
+	cg3.html = make.link(id, "IGG3", cg3.n)
+	cg4.html = make.link(id, "IGG4", cg4.n)
+	
+	cm.html = make.link(id, "IGM", cm.n)
+	
+	ce.html = make.link(id, "IGE", ce.n)
+	
+	un.html = make.link(id, "un", un.n)
+	
+	#extra columns
+	ca.n = ca1.n + ca2.n
+	
+	cg.n = cg1.n + cg2.n + cg3.n + cg4.n
+	
+	#in.classes
+	
+	in.ca.cg = (ca.n > 0 & cg.n > 0)
+	
+	in.ca.cg.cm = (ca.n > 0 & cg.n > 0 & cm.n > 0)
+	
+	in.ca.cg.ce = (ca.n > 0 & cg.n > 0 & ce.n > 0)
+	
+	in.ca.cg.cm.ce = (ca.n > 0 & cg.n > 0 & cm.n > 0 & ce.n > 0)
+	
+	in.ca1.ca2 = (ca1.n > 0 & ca2.n > 0)
+	
+	in.cg1.cg2 = (cg1.n > 0 & cg2.n > 0)
+	in.cg1.cg3 = (cg1.n > 0 & cg3.n > 0)
+	in.cg1.cg4 = (cg1.n > 0 & cg4.n > 0)
+	in.cg2.cg3 = (cg2.n > 0 & cg3.n > 0)
+	in.cg2.cg4 = (cg2.n > 0 & cg4.n > 0)
+	in.cg3.cg4 = (cg3.n > 0 & cg4.n > 0)
+	
+	in.cg1.cg2.cg3 = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0)
+	in.cg2.cg3.cg4 = (cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
+	in.cg1.cg2.cg4 = (cg1.n > 0 & cg2.n > 0 & cg4.n > 0)
+	in.cg1.cg3.cg4 = (cg1.n > 0 & cg3.n > 0 & cg4.n > 0)
+	
+	in.cg.all = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0 & cg4.n > 0)
+	
+	#rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html)
+	rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, ce.html, un.html)
+	rw = c(rw, ca.n, cg.n, cm.n, ce.n, in.classes, in.ca.cg, in.ca.cg.cm, in.ca.cg.ce, in.ca.cg.cm.ce, in.ca1.ca2, in.cg1.cg2, in.cg1.cg3, in.cg1.cg4, in.cg2.cg3, in.cg2.cg4, in.cg3.cg4, in.cg1.cg2.cg3, in.cg2.cg3.cg4, in.cg1.cg2.cg4, in.cg1.cg3.cg4, in.cg.all)
+	
+	
+
+	cat(tr(rw), file=main.html, append=T)
+	
+	
+	for(i in 1:nrow(allc)){ #generate html by id
+		html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"])
+		cat(paste(html, "<br />"), file=sequence.id.page, append=T)
+	}
+}
+
+cat("</table>", file=main.html, append=T)
+
+print(paste("Single sequences:", single.sequences))
+print(paste("Sequences in multiple subclasses:", in.multiple))
+print(paste("Multiple sequences in one subclass:", multiple.in.one))
+print(paste("Matched with unmatched:", some.unmatched))
+print(paste("Count that should match 'matched' sequences:", matched))
+
+#ACGT overview
+
+#NToverview = merged[!grepl("^unmatched", merged$best_match),]
+NToverview = merged
+
+if(empty.region.filter == "leader"){
+	NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+} else if(empty.region.filter == "FR1"){
+	NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+} else if(empty.region.filter == "CDR1"){
+	NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+} else if(empty.region.filter == "FR2"){
+	NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
+}
+
+NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
+NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
+NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
+NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))
+
+#Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T))
+
+#NToverview = rbind(NToverview, NTsum)
+
+NTresult = data.frame(nt=c("A", "C", "T", "G"))
+
+for(clazz in gene.classes){
+	print(paste("class:", clazz))
+	NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),]
+	print(paste("nrow:", nrow(NToverview.sub)))
+	new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G))
+	new.col.y = sum(new.col.x)
+	new.col.z = round(new.col.x / new.col.y * 100, 2)
+	
+	tmp = names(NTresult)
+	NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
+	names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep=""))
+}
+
+NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")]
+
+names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T")
+
+write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
+
+NToverview = NToverview[!grepl("unmatched", NToverview$best_match),]
+
+new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G))
+new.col.y = sum(new.col.x)
+new.col.z = round(new.col.x / new.col.y * 100, 2)
+
+tmp = names(NTresult)
+NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
+names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep=""))
+
+names(hotspot.analysis.sum) = names(NTresult)
+
+hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult)
+
+write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_clonality.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,144 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+@font-face
+	{font-family:Tahoma;
+	panose-1:2 11 6 4 3 5 4 4 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+p
+	{margin-right:0in;
+	margin-left:0in;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
+	{mso-style-link:"Balloon Text Char";
+	margin:0in;
+	margin-bottom:.0001pt;
+	font-size:8.0pt;
+	font-family:"Tahoma","sans-serif";}
+p.msochpdefault, li.msochpdefault, div.msochpdefault
+	{mso-style-name:msochpdefault;
+	margin-right:0in;
+	margin-left:0in;
+	font-size:12.0pt;
+	font-family:"Calibri","sans-serif";}
+p.msopapdefault, li.msopapdefault, div.msopapdefault
+	{mso-style-name:msopapdefault;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+span.BalloonTextChar
+	{mso-style-name:"Balloon Text Char";
+	mso-style-link:"Balloon Text";
+	font-family:"Tahoma","sans-serif";}
+.MsoChpDefault
+	{font-size:10.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><b><span lang=EN-GB style='color:black'>References</span></b></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>Gupta,
+Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria,
+Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a
+name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell
+immunoglobulin repertoire sequencing data: Table 1. In<span
+class=apple-converted-space>&nbsp;</span><em>Bioinformatics, 31 (20), pp.
+3356–3358.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><a
+href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span
+lang=EN-GB style='color:#303030'>doi:10.1093/bioinformatics/btv359</span></a><span
+lang=EN-GB style='color:black'>][</span><a
+href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span
+lang=EN-GB style='color:#303030'>Link</span></a><span lang=EN-GB
+style='color:black'>]</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>&nbsp;</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><a name="OLE_LINK110"><u><span lang=EN-GB
+style='color:black'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>In
+these tabs information on the clonal relation of transcripts can be found. To
+calculate clonal relation Change-O is used (Gupta et al, PMID: 26069265).
+Transcripts are considered clonally related if they have maximal three nucleotides
+difference in their CDR3 sequence and the same first V segment (as assigned by
+IMGT). Results are represented in a table format showing the clone size and the
+number of clones or sequences with this clone size. Change-O settings used are
+the </span><span lang=EN-GB>nucleotide hamming distance substitution model with
+a complete distance of maximal three. For clonal assignment the first gene
+segments were used, and the distances were not normalized. In case of
+asymmetric distances, the minimal distance was used.<span style='color:black'> </span></span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>&nbsp;</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><u><span lang=EN-GB style='color:black'>Overlap
+tab</span></u><span lang=EN-GB style='color:black'> </span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span lang=EN-GB style='color:black'>This
+tab gives information on with which (sub)classe(s) each unique analyzed region
+(based on the exact nucleotide sequence of the analyzes region and the CDR3
+nucleotide sequence) is found with. This gives information if the combination
+of the exact same nucleotide sequence of the analyzed region and the CDR3
+sequence can be found in multiple (sub)classes.</span></p>
+
+<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in;
+text-align:justify;background:white'><span style='color:black'><img src=""> Please note that this tab is based on all
+sequences before filter unique sequences and the remove duplicates based on
+filters are applied. In this table only sequences occuring more than once are
+included. </span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_csr.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,95 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+graphs in this tab give insight into the subclass distribution of IGG and IGA
+transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Human Cµ, C&#945;, C&#947; and C&#949;
+constant genes are assigned using a </span><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script
+specifically designed for human (sub)class assignment in repertoire data as
+described in van Schouwenburg and IJspeert et al, submitted for publication. In
+this script the reference sequences for the subclasses are divided in 8
+nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are
+then individually aligned in the right order to each input sequence. The
+percentage of the chunks identified in each rearrangement is calculated in the
+‘chunk hit percentage’. </span><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>C&#945; and C&#947;
+subclasses are very homologous and only differ in a few nucleotides. To assign
+subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'>‘nt hit percentage’ is calculated.
+This percentage indicates how well the chunks covering the subclass specific
+nucleotide match with the different subclasses. </span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information
+on normal distribution of subclasses in healthy individuals of different ages
+can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a
+name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA
+subclass distribution</span></u></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
+chart showing the relative distribution of IGA1 and IGA2 transcripts in the
+sample.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG
+subclass distribution</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie
+chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4
+transcripts in the sample.</span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_csr.py	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,508 @@
+import argparse
+import logging
+import sys
+import os
+import re
+
+from collections import defaultdict
+
+def main():
+	parser = argparse.ArgumentParser()
+	parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation")
+	parser.add_argument("--genes", help="The genes available in the 'best_match' column")
+	parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=['leader', 'FR1', 'CDR1', 'FR2'])
+	parser.add_argument("--output", help="Output file")
+
+	args = parser.parse_args()
+
+	infile = args.input
+	genes = str(args.genes).split(",")
+	empty_region_filter = args.empty_region_filter
+	outfile = args.output
+
+	genedic = dict()
+
+	mutationdic = dict()
+	mutationMatcher = re.compile("^(.)(\d+).(.),?[ ]?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?")
+	mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?.?([A-Z])?(.*)?")
+	mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
+	mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
+	NAMatchResult = (None, None, None, None, None, None, '')
+	geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes}
+	linecount = 0
+
+	IDIndex = 0
+	best_matchIndex = 0
+	fr1Index = 0
+	cdr1Index = 0
+	fr2Index = 0
+	cdr2Index = 0
+	fr3Index = 0
+	first = True
+	IDlist = []
+	mutationList = []
+	mutationListByID = {}
+	cdr1LengthDic = {}
+	cdr2LengthDic = {}
+
+	fr1LengthDict = {}
+	fr2LengthDict = {}
+	fr3LengthDict = {}
+
+	cdr1LengthIndex = 0
+	cdr2LengthIndex = 0
+
+	fr1SeqIndex = 0
+	fr2SeqIndex = 0
+	fr3SeqIndex = 0
+
+	tandem_sum_by_class = defaultdict(int)
+	expected_tandem_sum_by_class = defaultdict(float)
+
+	with open(infile, 'ru') as i:
+		for line in i:
+			if first:
+				linesplt = line.split("\t")
+				IDIndex = linesplt.index("Sequence.ID")
+				best_matchIndex = linesplt.index("best_match")
+				fr1Index = linesplt.index("FR1.IMGT")
+				cdr1Index = linesplt.index("CDR1.IMGT")
+				fr2Index = linesplt.index("FR2.IMGT")
+				cdr2Index = linesplt.index("CDR2.IMGT")
+				fr3Index = linesplt.index("FR3.IMGT")
+				cdr1LengthIndex = linesplt.index("CDR1.IMGT.length")
+				cdr2LengthIndex = linesplt.index("CDR2.IMGT.length")
+				fr1SeqIndex = linesplt.index("FR1.IMGT.seq")
+				fr2SeqIndex = linesplt.index("FR2.IMGT.seq")
+				fr3SeqIndex = linesplt.index("FR3.IMGT.seq")
+				first = False
+				continue
+			linecount += 1
+			linesplt = line.split("\t")
+			ID = linesplt[IDIndex]
+			genedic[ID] = linesplt[best_matchIndex]
+			
+			mutationdic[ID + "_FR1"] = []
+			if len(linesplt[fr1Index]) > 5 and empty_region_filter == "leader":
+				mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x]
+
+			mutationdic[ID + "_CDR1"] = []
+			if len(linesplt[cdr1Index]) > 5 and empty_region_filter in ["leader", "FR1"]:
+				mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
+
+			mutationdic[ID + "_FR2"] = []
+			if len(linesplt[fr2Index]) > 5 and empty_region_filter in ["leader", "FR1", "CDR1"]:
+				mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
+
+			mutationdic[ID + "_CDR2"] = []
+			if len(linesplt[cdr2Index]) > 5:
+				mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
+			
+			mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
+
+			mutationdic[ID + "_FR3"] = []
+			if len(linesplt[fr3Index]) > 5:
+				mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
+				
+			mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+			mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+
+			try:
+				cdr1Length = int(linesplt[cdr1LengthIndex])
+			except:
+				cdr1Length = 0
+			
+			try:
+				cdr2Length = int(linesplt[cdr2LengthIndex])
+			except:
+				cdr2Length = 0
+
+			#print linesplt[fr2SeqIndex]
+			fr1Length = len(linesplt[fr1SeqIndex]) if empty_region_filter == "leader" else 0
+			fr2Length = len(linesplt[fr2SeqIndex]) if empty_region_filter in ["leader", "FR1", "CDR1"] else 0
+			fr3Length = len(linesplt[fr3SeqIndex])
+
+			cdr1LengthDic[ID] = cdr1Length
+			cdr2LengthDic[ID] = cdr2Length
+
+			fr1LengthDict[ID] = fr1Length
+			fr2LengthDict[ID] = fr2Length
+			fr3LengthDict[ID] = fr3Length
+
+			IDlist += [ID]
+	print "len(mutationdic) =", len(mutationdic)
+
+	with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle:
+		for ID, lst in mutationdic.iteritems():
+			for mut in lst:
+				out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in mut])))
+
+	#tandem mutation stuff
+	tandem_frequency = defaultdict(int)
+	mutation_frequency = defaultdict(int)
+	
+	mutations_by_id_dic = {}
+	first = True
+	mutation_by_id_file = os.path.join(os.path.dirname(outfile), "mutation_by_id.txt")
+	with open(mutation_by_id_file, 'r') as mutation_by_id:
+		for l in mutation_by_id:
+			if first:
+				first = False
+				continue
+			splt = l.split("\t")
+			mutations_by_id_dic[splt[0]] = int(splt[1])
+    
+	tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt")
+	with open(tandem_file, 'w') as o:
+		highest_tandem_length = 0
+
+		o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n")
+		for ID in IDlist:
+			mutations = mutationListByID[ID]
+			if len(mutations) == 0:
+				continue
+			last_mut = max(mutations, key=lambda x: int(x[1]))
+
+			last_mut_pos = int(last_mut[1])
+
+			mut_positions = [False] * (last_mut_pos + 1)
+
+			for mutation in mutations:
+				frm, where, to, frmAA, whereAA, toAA, thing = mutation
+				where = int(where)
+				mut_positions[where] = True
+
+			tandem_muts = []
+			tandem_start = -1
+			tandem_length = 0
+			for i in range(len(mut_positions)):
+				if mut_positions[i]:
+					if tandem_start == -1:
+						tandem_start = i
+					tandem_length += 1
+					#print "".join(["1" if x else "0" for x in mut_positions[:i+1]])
+				else:
+					if tandem_length > 1:
+						tandem_muts.append((tandem_start, tandem_length))
+						#print "{0}{1} {2}:{3}".format(" " * (i - tandem_length), "^" * tandem_length, tandem_start, tandem_length)
+					tandem_start = -1
+					tandem_length = 0
+			if tandem_length > 1:  # if the sequence ends with a tandem mutation
+				tandem_muts.append((tandem_start, tandem_length))
+
+			if len(tandem_muts) > 0:
+				if highest_tandem_length < len(tandem_muts):
+					highest_tandem_length = len(tandem_muts)
+
+			region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID]
+			longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0)
+			num_mutations = mutations_by_id_dic[ID] # len(mutations)
+			f_num_mutations = float(num_mutations)
+			num_tandem_muts = len(tandem_muts)
+			expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length)
+			o.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(ID,
+																str(num_mutations),
+																str(num_tandem_muts),
+																str(region_length),
+																str(round(expected_tandem_muts, 2)),
+																str(longest_tandem[1]),
+																str(tandem_muts)))
+			gene = genedic[ID]
+			if gene.find("unmatched") == -1:
+				tandem_sum_by_class[gene] += num_tandem_muts
+				expected_tandem_sum_by_class[gene] += expected_tandem_muts
+
+				tandem_sum_by_class["all"] += num_tandem_muts
+				expected_tandem_sum_by_class["all"] += expected_tandem_muts
+
+				gene = gene[:3]
+				if gene in ["IGA", "IGG"]:
+					tandem_sum_by_class[gene] += num_tandem_muts
+					expected_tandem_sum_by_class[gene] += expected_tandem_muts
+			else:
+				tandem_sum_by_class["unmatched"] += num_tandem_muts
+				expected_tandem_sum_by_class["unmatched"] += expected_tandem_muts
+
+
+			for tandem_mut in tandem_muts:
+				tandem_frequency[str(tandem_mut[1])] += 1
+			#print "\t".join([ID, str(len(tandem_muts)), str(longest_tandem[1]) , str(tandem_muts)])
+
+	tandem_freq_file = os.path.join(os.path.dirname(outfile), "tandem_frequency.txt")
+	with open(tandem_freq_file, 'w') as o:
+		for frq in sorted([int(x) for x in tandem_frequency.keys()]):
+			o.write("{0}\t{1}\n".format(frq, tandem_frequency[str(frq)]))
+
+	tandem_row = []
+	genes_extra = list(genes)
+	genes_extra.append("all")
+	for x, y, in zip([tandem_sum_by_class[x] for x in genes_extra], [expected_tandem_sum_by_class[x] for x in genes_extra]):
+		if y != 0:
+			tandem_row += [x, round(y, 2), round(x / y, 2)]
+		else:
+			tandem_row += [x, round(y, 2), 0]
+
+	tandem_freq_file = os.path.join(os.path.dirname(outfile), "shm_overview_tandem_row.txt")
+	with open(tandem_freq_file, 'w') as o:
+		o.write("Tandems/Expected (ratio),{0}\n".format(",".join([str(x) for x in tandem_row])))
+
+	#print mutationList, linecount
+
+	AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] and i[5] != ";" else 0)[4]) + 1)  # [4] is the position of the AA mutation, None if silent
+	if AALength < 60:
+		AALength = 64
+
+	AA_mutation = [0] * AALength
+	AA_mutation_dic = {"IGA": AA_mutation[:], "IGG": AA_mutation[:], "IGM": AA_mutation[:], "IGE": AA_mutation[:], "unm": AA_mutation[:], "all": AA_mutation[:]}
+	AA_mutation_empty = AA_mutation[:]
+
+	print "AALength:", AALength
+	aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt"
+	with open(aa_mutations_by_id_file, 'w') as o:
+		o.write("ID\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n")
+		for ID in mutationListByID.keys():
+			AA_mutation_for_ID = AA_mutation_empty[:]
+			for mutation in mutationListByID[ID]:
+				if mutation[4] and mutation[5] != ";":
+					AA_mutation_position = int(mutation[4])
+					try:
+						AA_mutation[AA_mutation_position] += 1
+						AA_mutation_for_ID[AA_mutation_position] += 1
+					except Exception as e:
+						print e
+						print mutation
+						sys.exit()
+					clss = genedic[ID][:3]
+					AA_mutation_dic[clss][AA_mutation_position] += 1
+			o.write(ID + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in AA_mutation_for_ID[1:]]) + "\n")
+
+
+
+	#absent AA stuff
+	absentAACDR1Dic = defaultdict(list)
+	absentAACDR1Dic[5] = range(29,36)
+	absentAACDR1Dic[6] = range(29,35)
+	absentAACDR1Dic[7] = range(30,35)
+	absentAACDR1Dic[8] = range(30,34)
+	absentAACDR1Dic[9] = range(31,34)
+	absentAACDR1Dic[10] = range(31,33)
+	absentAACDR1Dic[11] = [32]
+
+	absentAACDR2Dic = defaultdict(list)
+	absentAACDR2Dic[0] = range(55,65)
+	absentAACDR2Dic[1] = range(56,65)
+	absentAACDR2Dic[2] = range(56,64)
+	absentAACDR2Dic[3] = range(57,64)
+	absentAACDR2Dic[4] = range(57,63)
+	absentAACDR2Dic[5] = range(58,63)
+	absentAACDR2Dic[6] = range(58,62)
+	absentAACDR2Dic[7] = range(59,62)
+	absentAACDR2Dic[8] = range(59,61)
+	absentAACDR2Dic[9] = [60]
+
+	absentAA = [len(IDlist)] * (AALength-1)
+	for k, cdr1Length in cdr1LengthDic.iteritems():
+		for c in absentAACDR1Dic[cdr1Length]:
+			absentAA[c] -= 1
+
+	for k, cdr2Length in cdr2LengthDic.iteritems():
+		for c in absentAACDR2Dic[cdr2Length]:
+			absentAA[c] -= 1
+
+
+	aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/absent_aa_id.txt"
+	with open(aa_mutations_by_id_file, 'w') as o:
+		o.write("ID\tcdr1length\tcdr2length\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n")
+		for ID in IDlist:
+			absentAAbyID = [1] * (AALength-1)
+			cdr1Length = cdr1LengthDic[ID]
+			for c in absentAACDR1Dic[cdr1Length]:
+				absentAAbyID[c] -= 1
+
+			cdr2Length = cdr2LengthDic[ID]
+			for c in absentAACDR2Dic[cdr2Length]:
+				absentAAbyID[c] -= 1
+			o.write(ID + "\t" + str(cdr1Length) + "\t" + str(cdr2Length) + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in absentAAbyID]) + "\n")
+
+	if linecount == 0:
+		print "No data, exiting"
+		with open(outfile, 'w') as o:
+			o.write("RGYW (%)," + ("0,0,0\n" * len(genes)))
+			o.write("WRCY (%)," + ("0,0,0\n" * len(genes)))
+			o.write("WA (%)," + ("0,0,0\n" * len(genes)))
+			o.write("TW (%)," + ("0,0,0\n" * len(genes)))
+		import sys
+
+		sys.exit()
+
+	hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)")
+	RGYWCount = {}
+	WRCYCount = {}
+	WACount = {}
+	TWCount = {}
+
+	#IDIndex = 0
+	ataIndex = 0
+	tatIndex = 0
+	aggctatIndex = 0
+	atagcctIndex = 0
+	first = True
+	with open(infile, 'ru') as i:
+		for line in i:
+			if first:
+				linesplt = line.split("\t")
+				ataIndex = linesplt.index("X.a.t.a")
+				tatIndex = linesplt.index("t.a.t.")
+				aggctatIndex = linesplt.index("X.a.g.g.c.t..a.t.")
+				atagcctIndex = linesplt.index("X.a.t..a.g.c.c.t.")
+				first = False
+				continue
+			linesplt = line.split("\t")
+			gene = linesplt[best_matchIndex]
+			ID = linesplt[IDIndex]
+			RGYW = [(int(x), int(y), z) for (x, y, z) in
+					[hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]]
+			WRCY = [(int(x), int(y), z) for (x, y, z) in
+					[hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]]
+			WA = [(int(x), int(y), z) for (x, y, z) in
+				[hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]]
+			TW = [(int(x), int(y), z) for (x, y, z) in
+				[hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
+			RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0
+
+			with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "RGYW.txt"), 'a') as out_handle:
+				for hotspot in RGYW:
+					out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in hotspot])))
+
+			mutationList = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
+			for mutation in mutationList:
+				frm, where, to, AAfrm, AAwhere, AAto, junk = mutation
+				mutation_in_RGYW = any(((start <= int(where) <= end) for (start, end, region) in RGYW))
+				mutation_in_WRCY = any(((start <= int(where) <= end) for (start, end, region) in WRCY))
+				mutation_in_WA = any(((start <= int(where) <= end) for (start, end, region) in WA))
+				mutation_in_TW = any(((start <= int(where) <= end) for (start, end, region) in TW))
+
+				in_how_many_motifs = sum([mutation_in_RGYW, mutation_in_WRCY, mutation_in_WA, mutation_in_TW])
+
+				if in_how_many_motifs > 0:
+					RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs
+					WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs
+					WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs
+					TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs
+			
+			mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt")
+			if not os.path.exists(mutation_by_id_file):
+				with open(mutations_in_motifs_file, 'w') as out_handle:
+					out_handle.write("{0}\n".format("\t".join([
+						"Sequence.ID",
+						"mutation_position",
+						"region",
+						"from_nt",
+						"to_nt",
+						"mutation_position_AA",
+						"from_AA",
+						"to_AA",
+						"motif",
+						"motif_start_nt",
+						"motif_end_nt",
+						"rest"
+					])))
+
+			with open(mutations_in_motifs_file, 'a') as out_handle:
+				motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW}
+				for mutation in mutationList:
+					frm, where, to, AAfrm, AAwhere, AAto, junk = mutation
+					for motif in motif_dic.keys():
+							
+						for start, end, region in motif_dic[motif]:
+							if start <= int(where) <= end:
+								out_handle.write("{0}\n".format(
+									"\t".join([
+										ID,
+										where,
+										region,
+										frm,
+										to,
+										str(AAwhere),
+										str(AAfrm),
+										str(AAto),
+										motif,
+										str(start),
+										str(end),
+										str(junk)
+									])
+								))
+
+
+
+	def mean(lst):
+		return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0
+
+
+	def median(lst):
+		lst = sorted(lst)
+		l = len(lst)
+		if l == 0:
+			return 0
+		if l == 1:
+			return lst[0]
+			
+		l = int(l / 2)
+		
+		if len(lst) % 2 == 0:
+			return float(lst[l] + lst[(l - 1)]) / 2.0
+		else:
+			return lst[l]
+
+	funcs = {"mean": mean, "median": median, "sum": sum}
+
+	directory = outfile[:outfile.rfind("/") + 1]
+	value = 0
+	valuedic = dict()
+
+	for fname in funcs.keys():
+		for gene in genes:
+			with open(directory + gene + "_" + fname + "_value.txt", 'r') as v:
+				valuedic[gene + "_" + fname] = float(v.readlines()[0].rstrip())
+		with open(directory + "all_" + fname + "_value.txt", 'r') as v:
+			valuedic["total_" + fname] = float(v.readlines()[0].rstrip())
+		
+
+	def get_xyz(lst, gene, f, fname):
+		x = round(round(f(lst), 1))
+		y = valuedic[gene + "_" + fname]
+		z = str(round(x / float(y) * 100, 1)) if y != 0 else "0"
+		return (str(x), str(y), z)
+
+	dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount}
+	arr = ["RGYW", "WRCY", "WA", "TW"]
+
+	for fname in funcs.keys():
+		func = funcs[fname]
+		foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt"
+		with open(foutfile, 'w') as o:
+			for typ in arr:
+				o.write(typ + " (%)")
+				curr = dic[typ]
+				for gene in genes:
+					geneMatcher = geneMatchers[gene]
+					if valuedic[gene + "_" + fname] is 0:
+						o.write(",0,0,0")
+					else:
+						x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname)
+						o.write("," + x + "," + y + "," + z)
+				x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname)
+				#x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname)
+				o.write("," + x + "," + y + "," + z + "\n")
+
+
+	# for testing
+	seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt"
+	with open(seq_motif_file, 'w') as o:
+		o.write("ID\tRGYW\tWRCY\tWA\tTW\n")
+		for ID in IDlist:
+			#o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n")
+			o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n")
+
+if __name__ == "__main__":
+	main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_csr.r	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,561 @@
+library(data.table)
+library(ggplot2)
+library(reshape2)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+input = args[1]
+genes = unlist(strsplit(args[2], ","))
+outputdir = args[3]
+empty.region.filter = args[4]
+setwd(outputdir)
+
+#dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
+
+dat = data.frame(fread(input, sep="\t", header=T, stringsAsFactors=F)) #fread because read.table suddenly skips certain rows...
+
+if(length(dat$Sequence.ID) == 0){
+  setwd(outputdir)
+  result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
+  row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)")
+  write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
+  transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
+  row.names(transitionTable) = c("A", "C", "G", "T")
+  transitionTable["A","A"] = NA
+  transitionTable["C","C"] = NA
+  transitionTable["G","G"] = NA
+  transitionTable["T","T"] = NA
+
+  write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
+  cat("0", file="n.txt")
+  stop("No data")
+}
+
+cleanup_columns = c("FR1.IMGT.c.a",
+					"FR2.IMGT.g.t",
+					"CDR1.IMGT.Nb.of.nucleotides",
+					"CDR2.IMGT.t.a",
+					"FR1.IMGT.c.g",
+					"CDR1.IMGT.c.t",
+					"FR2.IMGT.a.c",
+					"FR2.IMGT.Nb.of.mutations",
+					"FR2.IMGT.g.c",
+					"FR2.IMGT.a.g",
+					"FR3.IMGT.t.a",
+					"FR3.IMGT.t.c",
+					"FR2.IMGT.g.a",
+					"FR3.IMGT.c.g",
+					"FR1.IMGT.Nb.of.mutations",
+					"CDR1.IMGT.g.a",
+					"CDR1.IMGT.t.g",
+					"CDR1.IMGT.g.c",
+					"CDR2.IMGT.Nb.of.nucleotides",
+					"FR2.IMGT.a.t",
+					"CDR1.IMGT.Nb.of.mutations",
+					"CDR3.IMGT.Nb.of.nucleotides",
+					"CDR1.IMGT.a.g",
+					"FR3.IMGT.a.c",
+					"FR1.IMGT.g.a",
+					"FR3.IMGT.a.g",
+					"FR1.IMGT.a.t",
+					"CDR2.IMGT.a.g",
+					"CDR2.IMGT.Nb.of.mutations",
+					"CDR2.IMGT.g.t",
+					"CDR2.IMGT.a.c",
+					"CDR1.IMGT.t.c",
+					"FR3.IMGT.g.c",
+					"FR1.IMGT.g.t",
+					"FR3.IMGT.g.t",
+					"CDR1.IMGT.a.t",
+					"FR1.IMGT.a.g",
+					"FR3.IMGT.a.t",
+					"FR3.IMGT.Nb.of.nucleotides",
+					"FR2.IMGT.t.c",
+					"CDR2.IMGT.g.a",
+					"FR2.IMGT.t.a",
+					"CDR1.IMGT.t.a",
+					"FR2.IMGT.t.g",
+					"FR3.IMGT.t.g",
+					"FR2.IMGT.Nb.of.nucleotides",
+					"FR1.IMGT.t.a",
+					"FR1.IMGT.t.g",
+					"FR3.IMGT.c.t",
+					"FR1.IMGT.t.c",
+					"CDR2.IMGT.a.t",
+					"FR2.IMGT.c.t",
+					"CDR1.IMGT.g.t",
+					"CDR2.IMGT.t.g",
+					"FR1.IMGT.Nb.of.nucleotides",
+					"CDR1.IMGT.c.g",
+					"CDR2.IMGT.t.c",
+					"FR3.IMGT.g.a",
+					"CDR1.IMGT.a.c",
+					"FR2.IMGT.c.a",
+					"FR3.IMGT.Nb.of.mutations",
+					"FR2.IMGT.c.g",
+					"CDR2.IMGT.g.c",
+					"FR1.IMGT.g.c",
+					"CDR2.IMGT.c.t",
+					"FR3.IMGT.c.a",
+					"CDR1.IMGT.c.a",
+					"CDR2.IMGT.c.g",
+					"CDR2.IMGT.c.a",
+					"FR1.IMGT.c.t",
+					"FR1.IMGT.Nb.of.silent.mutations",
+					"FR2.IMGT.Nb.of.silent.mutations",
+					"FR3.IMGT.Nb.of.silent.mutations",
+					"FR1.IMGT.Nb.of.nonsilent.mutations",
+					"FR2.IMGT.Nb.of.nonsilent.mutations",
+					"FR3.IMGT.Nb.of.nonsilent.mutations")
+
+print("Cleaning up columns")
+
+for(col in cleanup_columns){
+  dat[,col] = gsub("\\(.*\\)", "", dat[,col])
+  #dat[dat[,col] == "",] = "0"
+  dat[,col] = as.numeric(dat[,col])
+  dat[is.na(dat[,col]),col] = 0
+}
+
+regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3")
+if(empty.region.filter == "FR1") {
+	regions = c("CDR1", "FR2", "CDR2", "FR3")
+} else if (empty.region.filter == "CDR1") {
+	regions = c("FR2", "CDR2", "FR3")
+} else if (empty.region.filter == "FR2") {
+	regions = c("CDR2", "FR3")
+}
+
+pdfplots = list() #save() this later to create the pdf plots in another script (maybe avoids the "address (nil), cause memory not mapped")
+
+sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) }
+
+print("aggregating data into new columns")
+
+VRegionMutations_columns = paste(regions, ".IMGT.Nb.of.mutations", sep="")
+dat$VRegionMutations =  apply(dat, FUN=sum_by_row, 1, columns=VRegionMutations_columns)
+
+VRegionNucleotides_columns = paste(regions, ".IMGT.Nb.of.nucleotides", sep="")
+dat$FR3.IMGT.Nb.of.nucleotides = nchar(dat$FR3.IMGT.seq)
+dat$VRegionNucleotides =  apply(dat, FUN=sum_by_row, 1, columns=VRegionNucleotides_columns)
+
+transitionMutations_columns = paste(rep(regions, each=4), c(".IMGT.a.g", ".IMGT.g.a", ".IMGT.c.t", ".IMGT.t.c"), sep="")
+dat$transitionMutations = apply(dat, FUN=sum_by_row, 1, columns=transitionMutations_columns)
+
+transversionMutations_columns = paste(rep(regions, each=8), c(".IMGT.a.c",".IMGT.c.a",".IMGT.a.t",".IMGT.t.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t",".IMGT.t.g"), sep="")
+dat$transversionMutations = apply(dat, FUN=sum_by_row, 1, columns=transversionMutations_columns)
+
+transitionMutationsAtGC_columns = paste(rep(regions, each=2), c(".IMGT.g.a",".IMGT.c.t"), sep="")
+dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns)
+
+totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.c.g",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.g.a",".IMGT.g.t"), sep="")
+#totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.c.g",".IMGT.g.t"), sep="")
+dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns)
+
+transitionMutationsAtAT_columns = paste(rep(regions, each=2), c(".IMGT.a.g",".IMGT.t.c"), sep="")
+dat$transitionMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtAT_columns)
+
+totalMutationsAtAT_columns = paste(rep(regions, each=6), c(".IMGT.a.g",".IMGT.a.c",".IMGT.a.t",".IMGT.t.g",".IMGT.t.c",".IMGT.t.a"), sep="")
+#totalMutationsAtAT_columns = paste(rep(regions, each=5), c(".IMGT.a.g",".IMGT.t.c",".IMGT.a.c",".IMGT.g.c",".IMGT.t.g"), sep="")
+dat$totalMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtAT_columns)
+
+FRRegions = regions[grepl("FR", regions)]
+CDRRegions = regions[grepl("CDR", regions)]
+
+FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
+dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns)
+
+CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
+dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns)
+
+FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
+dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns)
+
+CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
+dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns)
+
+mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR")
+write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+setwd(outputdir)
+
+write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T)
+
+base.order.x = data.frame(base=c("A", "C", "G", "T"), order.x=1:4)
+base.order.y = data.frame(base=c("T", "G", "C", "A"), order.y=1:4)
+
+calculate_result = function(i, gene, dat, matrx, f, fname, name){
+	tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),]
+
+	j = i - 1
+	x = (j * 3) + 1
+	y = (j * 3) + 2
+	z = (j * 3) + 3
+
+	if(nrow(tmp) > 0){
+		if(fname == "sum"){
+			matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+			matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
+			matrx[1,z] = round(f(matrx[1,x] / matrx[1,y]) * 100, digits=1)
+		} else {
+			matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+			matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
+			matrx[1,z] = round(f(tmp$VRegionMutations / tmp$VRegionNucleotides) * 100, digits=1)
+		}
+
+		matrx[2,x] = round(f(tmp$transitionMutations, na.rm=T), digits=1)
+		matrx[2,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+		matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
+
+		matrx[3,x] = round(f(tmp$transversionMutations, na.rm=T), digits=1)
+		matrx[3,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+		matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
+
+		matrx[4,x] = round(f(tmp$transitionMutationsAtGC, na.rm=T), digits=1)
+		matrx[4,y] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
+		matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
+
+		matrx[5,x] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1)
+		matrx[5,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+		matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
+
+		matrx[6,x] = round(f(tmp$transitionMutationsAtAT, na.rm=T), digits=1)
+		matrx[6,y] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
+		matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
+
+		matrx[7,x] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1)
+		matrx[7,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1)
+		matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1)
+
+		matrx[8,x] = round(f(tmp$nonSilentMutationsFR, na.rm=T), digits=1)
+		matrx[8,y] = round(f(tmp$silentMutationsFR, na.rm=T), digits=1)
+		matrx[8,z] = round(matrx[8,x] / matrx[8,y], digits=1)
+
+		matrx[9,x] = round(f(tmp$nonSilentMutationsCDR, na.rm=T), digits=1)
+		matrx[9,y] = round(f(tmp$silentMutationsCDR, na.rm=T), digits=1)
+		matrx[9,z] = round(matrx[9,x] / matrx[9,y], digits=1)
+
+		if(fname == "sum"){
+			
+			regions.fr = regions[grepl("FR", regions)]
+			regions.fr = paste(regions.fr, ".IMGT.Nb.of.nucleotides", sep="")
+			regions.cdr = regions[grepl("CDR", regions)]
+			regions.cdr = paste(regions.cdr, ".IMGT.Nb.of.nucleotides", sep="")
+			
+			if(length(regions.fr) > 1){ #in case there is only on FR region (rowSums needs >1 column)
+				matrx[10,x] = round(f(rowSums(tmp[,regions.fr], na.rm=T)), digits=1)
+			} else {
+				matrx[10,x] = round(f(tmp[,regions.fr], na.rm=T), digits=1)
+			}
+			matrx[10,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
+			matrx[10,z] = round(matrx[10,x] / matrx[10,y] * 100, digits=1)
+
+			if(length(regions.cdr) > 1){ #in case there is only on CDR region
+				matrx[11,x] = round(f(rowSums(tmp[,regions.cdr], na.rm=T)), digits=1)
+			} else {
+				matrx[11,x] = round(f(tmp[,regions.cdr], na.rm=T), digits=1)
+			}
+			matrx[11,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1)
+			matrx[11,z] = round(matrx[11,x] / matrx[11,y] * 100, digits=1)
+		}
+	}
+  
+	transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
+	row.names(transitionTable) = c("A", "C", "G", "T")
+	transitionTable["A","A"] = NA
+	transitionTable["C","C"] = NA
+	transitionTable["G","G"] = NA
+	transitionTable["T","T"] = NA
+
+	if(nrow(tmp) > 0){
+		for(nt1 in nts){
+			for(nt2 in nts){
+				if(nt1 == nt2){
+					next
+				}
+				NT1 = LETTERS[letters == nt1]
+				NT2 = LETTERS[letters == nt2]
+				FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
+				CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
+				FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
+				CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
+				FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
+				if (empty.region.filter == "leader"){
+					transitionTable[NT1,NT2] = sum(tmp[,c(FR1, CDR1, FR2, CDR2, FR3)])
+				} else if (empty.region.filter == "FR1") {
+					transitionTable[NT1,NT2] = sum(tmp[,c(CDR1, FR2, CDR2, FR3)])
+				} else if (empty.region.filter == "CDR1") {
+					transitionTable[NT1,NT2] = sum(tmp[,c(FR2, CDR2, FR3)])
+				} else if (empty.region.filter == "FR2") {
+					transitionTable[NT1,NT2] = sum(tmp[,c(CDR2, FR3)])
+				}
+			}
+		}
+		transition = transitionTable
+		transition$id = names(transition)
+		
+		transition2 = melt(transition, id.vars="id")
+
+		transition2 = merge(transition2, base.order.x, by.x="id", by.y="base")
+
+		transition2 = merge(transition2, base.order.y, by.x="variable", by.y="base")
+
+		transition2[is.na(transition2$value),]$value = 0
+		
+		if(any(transition2$value != 0)){ #having a transition table filled with 0 is bad
+			print("Plotting heatmap and transition")
+			png(filename=paste("transitions_stacked_", name, ".png", sep=""))
+			p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar
+			p = p + xlab("From base") + ylab("") + ggtitle("Bargraph transition information") + guides(fill=guide_legend(title=NULL))
+			p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4"))
+			#p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black"))
+			print(p)
+			dev.off()
+			
+			pdfplots[[paste("transitions_stacked_", name, ".pdf", sep="")]] <<- p
+			
+			png(filename=paste("transitions_heatmap_", name, ".png", sep=""))
+			p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap
+			p = p + xlab("To base") + ylab("From Base") + ggtitle("Heatmap transition information")  + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
+			print(p)
+			dev.off()
+			
+			pdfplots[[paste("transitions_heatmap_", name, ".pdf", sep="")]] <<- p
+		} else {
+			#print("No data to plot")
+		}
+	}
+
+	#print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep=""))
+	write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
+	write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
+	cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep=""))
+	cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep=""))
+	#print(paste(fname, name, nrow(tmp)))
+	matrx
+}
+nts = c("a", "c", "g", "t")
+zeros=rep(0, 4)
+funcs = c(median, sum, mean)
+fnames = c("median", "sum", "mean")
+
+print("Creating result tables")
+
+for(i in 1:length(funcs)){
+	func = funcs[[i]]
+	fname = fnames[[i]]
+	
+	print(paste("Creating table for", fname))
+	
+	rows = 9
+	if(fname == "sum"){
+		rows = 11
+	}
+	matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows)
+	for(i in 1:length(genes)){
+		matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i])
+	}
+	matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all")
+
+	result = data.frame(matrx)
+	if(fname == "sum"){
+		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR")
+	} else {
+		row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
+	}
+	write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F)
+}
+
+print("Adding median number of mutations to sum table")
+sum.table = read.table("mutations_sum.txt", sep=",", header=F)
+median.table = read.table("mutations_median.txt", sep=",", header=F)
+
+new.table = sum.table[1,]
+new.table[2,] = median.table[1,]
+new.table[3:12,] = sum.table[2:11,]
+new.table[,1] = as.character(new.table[,1])
+new.table[2,1] = "Median of Number of Mutations (%)"
+
+#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),]
+
+write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F)
+
+print("Plotting IGA piechart")
+
+dat = dat[!grepl("^unmatched", dat$best_match),]
+
+#blegh
+
+genesForPlot = dat[grepl("IGA", dat$best_match),]$best_match
+
+if(length(genesForPlot) > 0){
+	genesForPlot = data.frame(table(genesForPlot))
+	colnames(genesForPlot) = c("Gene","Freq")
+	genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
+
+	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
+	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4"))
+	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
+	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
+	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclass distribution", "( n =", sum(genesForPlot$Freq), ")"))
+	write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+	png(filename="IGA.png")
+	print(pc)
+	dev.off()
+	
+	pdfplots[["IGA.pdf"]] <- pc	
+}
+
+print("Plotting IGG piechart")
+
+genesForPlot = dat[grepl("IGG", dat$best_match),]$best_match
+
+if(length(genesForPlot) > 0){
+	genesForPlot = data.frame(table(genesForPlot))
+	colnames(genesForPlot) = c("Gene","Freq")
+	genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
+
+	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene))
+	pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred"))
+	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
+	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
+	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclass distribution", "( n =", sum(genesForPlot$Freq), ")"))
+	write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+	png(filename="IGG.png")
+	print(pc)
+	dev.off()
+	
+	pdfplots[["IGG.pdf"]] <- pc	
+}
+
+print("Plotting scatterplot")
+
+dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2)
+dat.clss = dat
+
+dat.clss$best_match = substr(dat.clss$best_match, 0, 3)
+
+dat.clss = rbind(dat, dat.clss)
+
+p = ggplot(dat.clss, aes(best_match, percentage_mutations))
+p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA)
+p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
+p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
+p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
+
+png(filename="scatter.png")
+print(p)
+dev.off()
+
+pdfplots[["scatter.pdf"]] <- p
+
+write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+print("Plotting frequency ranges plot")
+
+dat$best_match_class = substr(dat$best_match, 0, 3)
+freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20")
+dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels)
+
+frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match_class")])
+
+frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match_class", "frequency_bins")])
+
+frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class")
+
+frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
+
+p = ggplot(frequency_bins_data, aes(frequency_bins, frequency))
+p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"))
+p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4"))
+
+png(filename="frequency_ranges.png")
+print(p)
+dev.off()
+
+pdfplots[["frequency_ranges.pdf"]] <- p
+
+save(pdfplots, file="pdfplots.RData")
+
+frequency_bins_data_by_class = frequency_bins_data
+
+frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),]
+
+frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins)
+frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
+frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
+
+write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")])
+
+frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")])
+
+frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match")
+
+frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2)
+
+frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),]
+frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins)
+frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher"
+frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower"
+
+write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_csr.xml	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,240 @@
+<tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
+	<description></description>
+	<requirements>
+		<requirement type="package" version="2.7">python</requirement>
+		<requirement type="package" version="1.16.0">numpy</requirement>
+		<requirement type="package" version="1.2.0">xlrd</requirement>
+		<requirement type="package" version="3.0.0">r-ggplot2</requirement>
+		<requirement type="package" version="1.4.3">r-reshape2</requirement>
+		<requirement type="package" version="0.5.0">r-scales</requirement>
+		<requirement type="package" version="3.4_5">r-seqinr</requirement>
+		<requirement type="package" version="1.11.4">r-data.table</requirement>
+	</requirements>
+	<command interpreter="bash">
+		#if str ( $filter_unique.filter_unique_select ) == "remove":
+			wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast
+		#else:
+			wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast
+		#end if
+	</command>
+	<inputs>
+		<param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" />
+		<param name="empty_region_filter" type="select" label="Sequence starts at" help="" >
+			<option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option>
+			<option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option>
+			<option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option>
+			<option value="FR2">FR2: include CDR2,FR3 in filters</option>
+		</param>
+		<param name="functionality" type="select" label="Functionality filter" help="" >
+			<option value="productive" selected="true">Productive (Productive and Productive see comment)</option>
+			<option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option>
+			<option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option>
+		</param>
+		<conditional name="filter_unique">
+			<param name="filter_unique_select" type="select" label="Filter unique sequences" help="See below for an example.">
+				<option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option>
+				<option value="remove_vjaa">Remove uniques (Based on V+J+CDR3 (AA))</option>
+				<option value="keep">Keep uniques (Based on nucleotide sequence + C)</option>
+				<option value="no">No</option>
+			</param>
+			<when value="remove">
+				<param name="filter_unique_clone_count" size="4" type="integer" label="How many sequences should be in a group to keep 1 of them" value="2" min="2"/>
+			</when>
+			<when value="keep"></when>
+			<when value="no"></when>
+		</conditional>
+		<param name="unique" type="select" label="Remove duplicates based on" help="" >
+			<option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
+			<option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
+			<option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
+			<option value="CDR3.IMGT.AA">CDR3 (AA)</option>
+			
+			<option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option>
+			<option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
+			<option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
+			<option value="CDR3.IMGT.seq">CDR3 (nt)</option>
+			<option value="Sequence.ID" selected="true">Don't remove duplicates</option>
+		</param>
+		<conditional name="class_filter_cond">
+			<param name="class_filter" type="select" label="Human Class/Subclass filter" help="" >
+				<option value="70_70" selected="true">>70% class and >70% subclass</option>
+				<option value="60_55">>60% class and >55% subclass</option>
+				<option value="70_0">>70% class</option>
+				<option value="60_0">>60% class</option>
+				<option value="19_0">>19% class</option>
+				<option value="101_101">Do not assign (sub)class</option>
+			</param>
+			<when value="70_70"></when>
+			<when value="60_55"></when>
+			<when value="70_0"></when>
+			<when value="60_0"></when>
+			<when value="19_0"></when>
+			<when value="101_101"></when>
+		</conditional>
+		<conditional name="naive_output_cond">
+			<param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">
+				<option value="yes">Yes</option>
+				<option value="no" selected="true">No</option>
+			</param>
+			<when value="yes"></when>
+			<when value="no"></when>
+		</conditional>
+		<param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" >
+			<option value="yes">Yes</option>
+			<option value="no" selected="true">No</option>
+		</param>
+	</inputs>
+	<outputs>
+		<data format="html" name="out_file" label = "SHM &amp; CSR on ${in_file.name}"/>
+		<data format="imgt_archive" name="naive_output_ca" label = "Filtered IMGT IGA: ${in_file.name}" >
+		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
+		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
+		</data>
+		<data format="imgt_archive" name="naive_output_cg" label = "Filtered IMGT IGG: ${in_file.name}" >
+		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
+		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
+		</data>
+		<data format="imgt_archive" name="naive_output_cm" label = "Filtered IMGT IGM: ${in_file.name}" >
+		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
+		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
+		</data>
+		<data format="imgt_archive" name="naive_output_ce" label = "Filtered IMGT IGE: ${in_file.name}" >
+		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
+		    <filter>class_filter_cond['class_filter'] != "101_101"</filter>
+		</data>
+		<data format="imgt_archive" name="naive_output_all" label = "Filtered IMGT all: ${in_file.name}" >
+		    <filter>naive_output_cond['naive_output'] == "yes"</filter>
+		    <filter>class_filter_cond['class_filter'] == "101_101"</filter>
+		</data>
+	</outputs>
+	<tests>
+		<test>
+			<param name="fast" value="yes"/>
+			<output name="out_file" file="test1.html"/>
+		</test>
+	</tests>
+	<help>
+<![CDATA[
+**References**
+
+Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_]
+
+.. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457
+
+Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_]
+
+.. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359
+
+-----
+
+**Input files**
+
+IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu.
+
+.. class:: infomark
+
+Note: Files can be uploaded by using “get data†and “upload file†and selecting “IMGT archive“ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names.
+
+-----
+
+**Sequence starts at**
+
+Identifies the region which will be included in the analysis (analysed region)
+
+- Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded. 
+- Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded. 
+- All other filtering/analysis is based on the analysed region.
+
+-----
+
+**Functionality filter**
+
+Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT. 
+
+**Filter unique sequences**
+
+*Remove unique:*
+
+
+This filter consists of two different steps.
+
+Step 1: removes all sequences of which the nucleotide sequence in the “analysed region†and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
+
+Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class).
+
+.. class:: infomark
+
+This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
+
+*Keep unique:*
+
+Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
+
+Example of the sequences that are included using either the “remove unique filter†or the “keep unique filterâ€
+
++--------------------------+
+|       unique filter      |
++--------+--------+--------+
+| values | remove | keep   |
++--------+--------+--------+
+|   A    |   A    |   A    |
++--------+--------+--------+
+|   A    |   B    |   B    |
++--------+--------+--------+
+|   B    |   D    |   C    |
++--------+--------+--------+
+|   B    |        |   D    |
++--------+--------+--------+
+|   C    |        |        |
++--------+--------+--------+
+|   D    |        |        |
++--------+--------+--------+
+|   D    |        |        |
++--------+--------+--------+
+
+-----
+ 
+**Remove duplicates based on**
+
+Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen. 
+
+.. class:: infomark
+
+Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results. 
+
+-----
+
+**Human Class/Subclass filter**
+
+.. class:: warningmark
+
+Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline. 
+
+The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below).
+
+The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage. 
+
+*Chunk hit percentage*: The percentage of the chunks that is aligned 
+
+*Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass. 
+The option “>25% class†can be chosen when you only are interested in the class (Cα/Cγ/Cµ/Cɛ) of  your sequences and the length of your sequence is not long enough to assign the subclasses.
+
+-----
+
+**Output new IMGT archives per class into your history?**
+
+If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline.  
+
+-----
+
+**Execute**
+
+Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found.
+
+]]>
+	</help>
+	<citations>
+		<citation type="doi">10.1093/nar/gks457</citation>
+		<citation type="doi">10.1093/bioinformatics/btv359</citation>
+	</citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_downloads.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,538 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing
+	{margin:0in;
+	margin-bottom:.0001pt;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Info</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The complete
+dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Allows downloading of the complete parsed data set.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The filtered
+dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Allows downloading of all parsed IMGT information of all transcripts that
+passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The alignment
+info on the unmatched sequences:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Provides information of the subclass
+alignment of all unmatched sequences. For each sequence the chunck hit
+percentage and the nt hit percentage is shown together with the best matched
+subclass.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Overview</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The SHM Overview
+table as a dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Allows downloading of the SHM Overview
+table as a data set.  </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Motif data per
+sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> Provides a file that contains information for each
+transcript on the number of mutations present in WA/TW and RGYW/WRCY motives.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Mutation data
+per sequence ID: </span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'>Provides a file containing information
+on the number of sequences bases, the number and location of mutations and the
+type of mutations found in each transcript. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Base count for
+every sequence:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> links to a page showing for each transcript the
+sequence of the analysed region (as dependent on the sequence starts at filter),
+the assigned subclass and the number of sequenced A,C,G and T’s.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the percentage of mutations in AID and pol eta motives plot:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Provides a file containing the values used to generate the percentage of
+mutations in AID and pol eta motives plot in the SHM overview tab.</span></p>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+data used to generate the relative mutation patterns plot:</span></u><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Provides a download with the data used to generate the relative mutation
+patterns plot in the SHM overview tab.</span></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
+data used to generate the absolute mutation patterns plot:</span></u><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Provides a download with the data used to generate the absolute mutation
+patterns plot in the SHM overview tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Frequency</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data
+generate the frequency scatter plot:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows
+downloading the data used to generate the frequency scatter plot in the SHM
+frequency tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the frequency by class plot:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows
+downloading the data used to generate frequency by class plot included in the
+SHM frequency tab.           </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for
+frequency by subclass:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Provides information of the number and
+percentage of sequences that have 0%, 0-2%, 2-5%, 5-10%, 10-15%, 15-20%,
+&gt;20% SHM. Information is provided for each subclass.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Transition
+Tables</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'all' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGA' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGA sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGA1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGA1 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGA2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGA2 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG1 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG2 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG3' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG3 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGG4' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGG4 sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGM' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the information used to
+generate the transition table for all IGM sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+'IGE' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Contains the
+information used to generate the transition table for all IGE sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Antigen
+selection</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>AA mutation data
+per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> Provides for each transcript information on whether
+there is replacement mutation at each amino acid location (as defined by IMGT).
+For all amino acids outside of the analysed region the value 0 is given.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Presence of AA
+per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'> Provides for each transcript information on which
+amino acid location (as defined by IMGT) is present. </span><span lang=NL
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>0 is absent, 1
+is present. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
+data used to generate the aa mutation frequency plot for all sequences in the
+antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGA:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>  Provides the
+data used to generate the aa mutation frequency plot for all IGA sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGG:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
+data used to generate the aa mutation frequency plot for all IGG sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGM:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
+data used to generate the aa mutation frequency plot for all IGM sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
+generate the aa mutation frequency plot for IGE:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>   Provides the
+data used to generate the aa mutation frequency plot for all IGE sequences in
+the antigen selection tab.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline PDF (</span></u><span
+lang=EN-GB><a href="http://selection.med.yale.edu/baseline/"><span
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>http://selection.med.yale.edu/baseline/</span></a></span><u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>):</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF
+containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline data:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual sequence and the sum of all
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA
+PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGA sequence and the sum of
+all IGA sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG
+PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGG
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGG sequence and the sum of
+all IGG sequences.        </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM PDF:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF
+containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGM
+sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGM sequence and the sum of
+all IGM sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE
+PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
+"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGE
+sequences.</span><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE
+data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Table output of the BASELINe analysis. Calculation of antigen selection as
+performed by BASELINe are shown for each individual IGE sequence and the sum of
+all IGE sequences.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>CSR</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA
+subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> </span><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Data used for
+the generation of the </span><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'>IGA subclass distribution plot provided
+in the CSR tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
+</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA
+subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Data used for the generation of the </span><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGG
+subclass distribution plot provided in the CSR tab. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=NL
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Clonal relation</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Sequence overlap
+between subclasses:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Link to the overlap table as provided
+under the clonality overlap tab.         </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones and subclass annotation:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
+Downloads a table with the calculation of clonal relation between all
+sequences. For each individual transcript the results of the clonal assignment
+as provided by Change-O are provided. Sequences with the same number in the CLONE
+column are considered clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file:</span></u><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'> Gives a summary of the total number of
+clones in all sequences and their clone size.           </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGA:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
+calculation of clonal relation between all IGA sequences. For each individual
+transcript the results of the clonal assignment as provided by Change-O are
+provided. Sequences with the same number in the CLONE column are considered
+clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGA:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGA sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGG:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
+calculation of clonal relation between all IGG sequences. For each individual
+transcript the results of the clonal assignment as provided by Change-O are
+provided. Sequences with the same number in the CLONE column are considered
+clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGG:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGG sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGM:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table
+with the calculation of clonal relation between all IGM sequences. For each
+individual transcript the results of the clonal assignment as provided by
+Change-O are provided. Sequences with the same number in the CLONE column are
+considered clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGM:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGM sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+file with defined clones of IGE:</span></u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
+calculation of clonal relation between all IGE sequences. For each individual
+transcript the results of the clonal assignment as provided by Change-O are
+provided. Sequences with the same number in the CLONE column are considered
+clonally related. </span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
+defined clones summary file of IGE:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
+of the total number of clones in all IGE sequences and their clone size.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Filtered IMGT
+output files</span></b></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGA sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGA
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGA1 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGA1
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGA2 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGA2
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGG
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG1 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGG1
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG2 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGG2
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG3 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGG3
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGG4 sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGG4
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGM sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
+file with the same format as downloaded IMGT files that contains all IGM
+sequences that have passed the chosen filter settings.</span></p>
+
+<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
+with just the matched and filtered IGE sequences:</span></u><span lang=EN-GB
+style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
+.txz file with the same format as downloaded IMGT files that contains all IGE
+sequences that have passed the chosen filter settings.</span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_first.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,127 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><span lang=EN-GB style='font-size:12.0pt;
+font-family:"Times New Roman","serif"'>Table showing the order of each
+filtering step and the number and percentage of sequences after each filtering
+step. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'>Input:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The
+number of sequences in the original IMGT file. This is always 100% of the
+sequences.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'>After &quot;no results&quot; filter: </span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IMGT
+classifies sequences either as &quot;productive&quot;, &quot;unproductive&quot;, &quot;unknown&quot;, or &quot;no
+results&quot;. Here, the number and percentages of sequences that are not classified
+as &quot;no results&quot; are reported.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
+text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
+12.0pt;font-family:"Times New Roman","serif"'>After functionality filter:</span></u><span
+lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The
+number and percentages of sequences that have passed the functionality filter. The
+filtering performed is dependent on the settings of the functionality filter.
+Details on the functionality filter <a name="OLE_LINK12"></a><a
+name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of
+the SHM&amp;CSR pipeline</a>.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+removal sequences that are missing a gene region:</span></u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+In this step all sequences that are missing a gene region (FR1, CDR1, FR2,
+CDR2, FR3) that should be present are removed from analysis. The sequence
+regions that should be present are dependent on the settings of the sequence
+starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and
+percentage of sequences that pass this filter step are reported.</a> </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+N filter:</span></u><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> In this step all sequences that contain
+an ambiguous base (n) in the analysed region or the CDR3 are removed from the
+analysis. The analysed region is determined by the setting of the sequence
+starts at filter. The number and percentage of sequences that pass this filter
+step are reported.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+filter unique sequences</span></u><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>: The number and
+percentage of sequences that pass the &quot;filter unique sequences&quot; filter. Details
+on this filter </span><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'>can be found on the start page of
+the SHM&amp;CSR pipeline</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
+remove duplicate based on filter:</span></u><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> The number and
+percentage of sequences that passed the remove duplicate filter. Details on the
+&quot;remove duplicate filter based on filter&quot; can be found on the start page of the
+SHM&amp;CSR pipeline.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK17"></a><a
+name="OLE_LINK16"><u><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Number of matches sequences:</span></u></a><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+The number and percentage of sequences that passed all the filters described
+above and have a (sub)class assigned.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
+of unmatched sequences</span></u><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>: The number and percentage
+of sequences that passed all the filters described above and do not have
+subclass assigned.</span></p>
+
+<p class=MsoNormal><span lang=EN-GB>&nbsp;</span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_frequency.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,87 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM
+frequency tab</span></u></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+graphs give insight into the level of SHM. The data represented in these graphs
+can be downloaded in the download tab. <a name="OLE_LINK24"></a><a
+name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More
+information on the values found in healthy individuals of different ages can be
+found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency
+scatter plot</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
+dot plot showing the percentage of SHM in each transcript divided into the
+different (sub)classes. </span><span lang=NL style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot
+represents an individual transcript.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
+frequency by class</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
+bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%,
+5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_overview.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,332 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Info
+table</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>This
+table contains information on different characteristics of SHM. For all
+characteristics information can be found for all sequences or only sequences of
+a certain (sub)class. All results are based on the sequences that passed the filter
+settings chosen on the start page of the SHM &amp; CSR pipeline and only
+include details on the analysed region as determined by the setting of the
+sequence starts at filter. All data in this table can be downloaded via the
+“downloads” tab.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
+frequency:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK83"></a><a
+name="OLE_LINK82"></a><a name="OLE_LINK81"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values
+give information on the level of SHM. </span></a><a name="OLE_LINK22"></a><a
+name="OLE_LINK21"></a><a name="OLE_LINK20"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information
+on the values found in healthy individuals of different ages can be found in </span></a><a
+name="OLE_LINK15"></a><a name="OLE_LINK14"></a><a name="OLE_LINK13"><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IJspeert
+and van Schouwenburg et al, PMID: 27799928</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
+of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:
+115%;font-family:"Times New Roman","serif"'> Shows the number of total
+mutations / the number of sequenced bases (the % of mutated bases).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Median
+number of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'> Shows the median % of
+SHM of all sequences.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Patterns
+of SHM:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK72"></a><a
+name="OLE_LINK71"></a><a name="OLE_LINK70"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values
+give insights into the targeting and patterns of SHM. These values can give
+insight into the repair pathways used to repair the U:G mismatches introduced
+by AID. </span></a><a name="OLE_LINK40"></a><a name="OLE_LINK39"></a><a
+name="OLE_LINK38"></a><a name="OLE_LINK60"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information
+on the values found in healthy individuals of different ages can be found in
+IJspeert and van Schouwenburg et al, PMID: 27799928</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Shows the number of transition mutations / the number of total mutations (the
+percentage of mutations that are transitions). Transition mutations are C&gt;T,
+T&gt;C, A&gt;G, G&gt;A. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transversions:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Shows the number of transversion mutations / the number of total mutations (the
+percentage of mutations that are transitions). Transversion mutations are
+C&gt;A, C&gt;G, T&gt;A, T&gt;G, A&gt;T, A&gt;C, G&gt;T, G&gt;C.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions
+at GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK2"></a><a
+name="OLE_LINK1">Shows the number of transitions at GC locations (C&gt;T,
+G&gt;A) / the total number of mutations at GC locations (the percentage of
+mutations at GC locations that are transitions).</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting
+of GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK7"></a><a
+name="OLE_LINK6"></a><a name="OLE_LINK3">Shows the number of mutations at GC
+locations / the total number of mutations (the percentage of total mutations
+that are at GC locations).</a> </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions
+at AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of transitions at AT
+locations (T&gt;C, A&gt;G) / the total number of mutations at AT locations (the
+percentage of mutations at AT locations that are transitions).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting
+of AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of mutations at AT
+locations / the total number of mutations (the percentage of total mutations
+that are at AT locations).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>RGYW:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+<a name="OLE_LINK28"></a><a name="OLE_LINK27"></a><a name="OLE_LINK26">Shows
+the number of mutations that are in a RGYW motive / The number of total mutations
+(the percentage of mutations that are in a RGYW motive). </a><a
+name="OLE_LINK62"></a><a name="OLE_LINK61">RGYW motives are known to be
+preferentially targeted by AID </a></span><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
+Y=pyrimidine, W = A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WRCY:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+<a name="OLE_LINK34"></a><a name="OLE_LINK33">Shows the number of mutations
+that are in a </a><a name="OLE_LINK32"></a><a name="OLE_LINK31"></a><a
+name="OLE_LINK30"></a><a name="OLE_LINK29">WRCY</a> motive / The number of
+total mutations (the percentage of mutations that are in a WRCY motive). WRCY
+motives are known to be preferentially targeted by AID </span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
+Y=pyrimidine, W = A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WA:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+<a name="OLE_LINK37"></a><a name="OLE_LINK36"></a><a name="OLE_LINK35">Shows
+the number of mutations that are in a WA motive / The number of total mutations
+(the percentage of mutations that are in a WA motive). It is described that
+polymerase eta preferentially makes errors at WA motives </a></span><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W
+= A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>TW:</span></i><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+Shows the number of mutations that are in a TW motive / The number of total mutations
+(the percentage of mutations that are in a TW motive). It is described that
+polymerase eta preferentially makes errors at TW motives </span><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W
+= A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen
+selection:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+values give insight into antigen selection. It has been described that during
+antigen selection, there is selection against replacement mutations in the FR
+regions as these can cause instability of the B-cell receptor. In contrast
+replacement mutations in the CDR regions are important for changing the
+affinity of the B-cell receptor and therefore there is selection for this type
+of mutations. Silent mutations do not alter the amino acid sequence and
+therefore do not play a role in selection. More information on the values found
+in healthy individuals of different ages can be found in IJspeert and van
+Schouwenburg et al, PMID: 27799928</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>FR
+R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK43"></a><a
+name="OLE_LINK42"></a><a name="OLE_LINK41">Shows the number of replacement
+mutations in the FR regions / The number of silent mutations in the FR regions
+(the number of replacement mutations in the FR regions divided by the number of
+silent mutations in the FR regions)</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>CDR
+R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of replacement
+mutations in the CDR regions / The number of silent mutations in the CDR
+regions (the number of replacement mutations in the CDR regions divided by the
+number of silent mutations in the CDR regions)</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
+of sequences nucleotides:</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
+values give information on the number of sequenced nucleotides.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt
+in FR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> <a name="OLE_LINK46"></a><a
+name="OLE_LINK45"></a><a name="OLE_LINK44">Shows the number of sequences bases
+that are located in the FR regions / The total number of sequenced bases (the
+percentage of sequenced bases that are present in the FR regions).</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt
+in CDR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'> Shows the number of sequenced bases
+that are located in the CDR regions / <a name="OLE_LINK48"></a><a
+name="OLE_LINK47">The total number of sequenced bases (the percentage of
+sequenced bases that are present in the CDR regions).</a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A:
+</span></i><a name="OLE_LINK51"></a><a name="OLE_LINK50"></a><a
+name="OLE_LINK49"><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Shows the total number of sequenced
+adenines / The total number of sequenced bases (the percentage of sequenced
+bases that were adenines).</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>C:
+</span></i><a name="OLE_LINK53"></a><a name="OLE_LINK52"><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
+the total number of sequenced cytosines / The total number of sequenced bases
+(the percentage of sequenced bases that were cytosines).</span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>T:
+</span></i><a name="OLE_LINK57"></a><a name="OLE_LINK56"><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
+the total number of sequenced </span></a><a name="OLE_LINK55"></a><a
+name="OLE_LINK54"><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>thymines</span></a><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
+/ The total number of sequenced bases (the percentage of sequenced bases that
+were thymines).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>G:
+</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Shows the total number of sequenced <a
+name="OLE_LINK59"></a><a name="OLE_LINK58">guanine</a>s / The total number of
+sequenced bases (the percentage of sequenced bases that were guanines).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK69"><b><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK75"></a><a
+name="OLE_LINK74"></a><a name="OLE_LINK73"><span lang=EN-GB style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs visualize
+information on the patterns and targeting of SHM and thereby give information
+into the repair pathways used to repair the U:G mismatches introduced by AID. The
+data represented in these graphs can be downloaded in the download tab. More
+information on the values found in healthy individuals of different ages can be
+found in IJspeert and van Schouwenburg et al, PMID: 27799928</span></a><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>.
+<a name="OLE_LINK85"></a><a name="OLE_LINK84"></a></span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Percentage
+of mutations in AID and pol eta motives</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes
+<a name="OLE_LINK80"></a><a name="OLE_LINK79"></a><a name="OLE_LINK78">for each
+(sub)class </a>the percentage of mutations that are present in AID (RGYW or
+WRCY) or polymerase eta motives (WA or TW) in the different subclasses </span><span
+lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
+Y=pyrimidine, W = A or T).</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Relative
+mutation patterns</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes
+for each (sub)class the distribution of mutations between mutations at AT
+locations and transitions or transversions at GC locations. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Absolute
+mutation patterns</span></u></p>
+
+<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualized
+for each (sub)class the percentage of sequenced AT and GC bases that are
+mutated. The mutations at GC bases are divided into transition and transversion
+mutations<a name="OLE_LINK77"></a><a name="OLE_LINK76">. </a></span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_selection.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,128 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+@font-face
+	{font-family:UICTFontTextStyleBody;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+.MsoChpDefault
+	{font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>References</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:black'>Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying
+selection in high-throughput Immunoglobulin sequencing data sets. In<span
+class=apple-converted-space>&nbsp;</span><em>Nucleic Acids Research, 40 (17),
+pp. e134–e134.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><span
+lang=EN-GB><a href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:#303030'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:black'>][</span><span lang=EN-GB><a
+href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
+color:#303030'>Link</span></a></span><span lang=EN-GB style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif";color:black'>]</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>AA
+mutation frequency</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>For
+each class, the frequency of replacement mutations at each amino acid position
+is shown, which is calculated by dividing the number of replacement mutations
+at a particular amino acid position/the number sequences that have an amino
+acid at that particular position. Since the length of the CDR1 and CDR2 region
+is not the same for every VH gene, some amino acids positions are absent.
+Therefore we calculate the frequency using the number of amino acids present at
+that that particular location. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen
+selection (BASELINe)</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
+the results of the analysis of antigen selection as performed using BASELINe.
+Details on the analysis performed by BASELINe can be found in Yaari et al,
+PMID: 22641856. The settings used for the analysis are</span><span lang=EN-GB
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>:
+focused, SHM targeting model: human Tri-nucleotide, custom bounderies. The
+custom boundries are dependent on the ‘sequence starts at filter’. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>Leader:
+1:26:38:55:65:104:-</span></p>
+
+<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>FR1: 27:27:38:55:65:104:-</span></p>
+
+<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>CDR1:&nbsp;27:27:38:55:65:104:-</span></p>
+
+<p class=MsoNormalCxSpLast style='line-height:normal'><span lang=NL
+style='font-family:UICTFontTextStyleBody;color:black'>FR2:&nbsp;27:27:38:55:65:104:-</span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/shm_transition.htm	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,120 @@
+<html>
+
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
+<meta name=Generator content="Microsoft Word 14 (filtered)">
+<style>
+<!--
+ /* Font Definitions */
+ @font-face
+	{font-family:Calibri;
+	panose-1:2 15 5 2 2 2 4 3 2 4;}
+ /* Style Definitions */
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
+	{margin-top:0in;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:11.0pt;
+	font-family:"Calibri","sans-serif";}
+a:link, span.MsoHyperlink
+	{color:blue;
+	text-decoration:underline;}
+a:visited, span.MsoHyperlinkFollowed
+	{color:purple;
+	text-decoration:underline;}
+p.msochpdefault, li.msochpdefault, div.msochpdefault
+	{mso-style-name:msochpdefault;
+	margin-right:0in;
+	margin-left:0in;
+	font-size:12.0pt;
+	font-family:"Calibri","sans-serif";}
+p.msopapdefault, li.msopapdefault, div.msopapdefault
+	{mso-style-name:msopapdefault;
+	margin-right:0in;
+	margin-bottom:10.0pt;
+	margin-left:0in;
+	line-height:115%;
+	font-size:12.0pt;
+	font-family:"Times New Roman","serif";}
+span.apple-converted-space
+	{mso-style-name:apple-converted-space;}
+.MsoChpDefault
+	{font-size:10.0pt;
+	font-family:"Calibri","sans-serif";}
+.MsoPapDefault
+	{margin-bottom:10.0pt;
+	line-height:115%;}
+@page WordSection1
+	{size:8.5in 11.0in;
+	margin:1.0in 1.0in 1.0in 1.0in;}
+div.WordSection1
+	{page:WordSection1;}
+-->
+</style>
+
+</head>
+
+<body lang=EN-US link=blue vlink=purple>
+
+<div class=WordSection1>
+
+<p class=MsoNormalCxSpFirst style='text-align:justify'><span style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs and
+tables give insight into the targeting and patterns of SHM. This can give
+insight into the DNA repair pathways used to solve the U:G mismatches
+introduced by AID. More information on the values found in healthy individuals
+of different ages can be found in IJspeert and van Schouwenburg et al, PMID:
+27799928.</span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs
+</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK93"></a><a
+name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style='font-size:12.0pt;
+line-height:115%;font-family:"Times New Roman","serif"'>Heatmap transition
+information</span></u></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK98"></a><a
+name="OLE_LINK97"><span style='font-size:12.0pt;line-height:115%;font-family:
+"Times New Roman","serif"'>Heatmaps visualizing for each subclass the frequency
+of all possible substitutions. On the x-axes the original base is shown, while
+the y-axes shows the new base. The darker the shade of blue, the more frequent
+this type of substitution is occurring.  </span></a></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bargraph
+transition information</span></u></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bar graph
+visualizing for each original base the distribution of substitutions into the other
+bases. A graph is included for each (sub)class. </span></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Tables</span></b></p>
+
+<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size:
+12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transition
+tables are shown for each (sub)class. All the original bases are listed
+horizontally, while the new bases are listed vertically. </span></p>
+
+<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
+font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
+Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
+Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
+style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
+of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
+Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
+href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
+style='color:windowtext'>Link</span></a>]</span></p>
+
+</div>
+
+</body>
+
+</html>
Binary file shm_csr/style.tar.gz has changed
Binary file shm_csr/subclass_definition.db.nhr has changed
Binary file shm_csr/subclass_definition.db.nin has changed
Binary file shm_csr/subclass_definition.db.nsq has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/summary_to_fasta.py	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,42 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
+parser.add_argument("--fasta", help="The output fasta file")
+
+args = parser.parse_args()
+
+infile = args.input
+fasta = args.fasta
+
+with open(infile, 'r') as i, open(fasta, 'w') as o:
+	first = True
+	id_col = 0
+	seq_col = 0
+	no_results = 0
+	no_seqs = 0
+	passed = 0
+	for line in i:
+		splt = line.split("\t")
+		if first:
+			id_col = splt.index("Sequence ID")
+			seq_col = splt.index("Sequence")
+			first = False
+			continue
+		if len(splt) < 5:
+			no_results += 1
+			continue
+		
+		ID = splt[id_col]
+		seq = splt[seq_col]
+		
+		if not len(seq) > 0:
+			no_seqs += 1
+			continue
+		
+		o.write(">" + ID + "\n" + seq + "\n")
+		passed += 1
+			
+	print "No results:", no_results
+	print "No sequences:", no_seqs
+	print "Written to fasta file:", passed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shm_csr/wrapper.sh	Fri Feb 19 15:08:51 2021 +0000
@@ -0,0 +1,913 @@
+#!/bin/bash
+#set -e
+dir="$(cd "$(dirname "$0")" && pwd)"
+input=$1
+method=$2
+log=$3 #becomes the main html page at the end
+outdir=$4
+output="$outdir/index.html" #copied to $log location at the end
+title="$5"
+include_fr1=$6
+functionality=$7
+unique=$8
+naive_output=$9
+naive_output_ca=${10}
+naive_output_cg=${11}
+naive_output_cm=${12}
+naive_output_ce=${13}
+naive_output_all=${14}
+filter_unique=${15}
+filter_unique_count=${16}
+class_filter=${17}
+empty_region_filter=${18}
+fast=${19}
+
+mkdir $outdir
+
+tar -xzf $dir/style.tar.gz -C $outdir
+
+echo "---------------- read parameters ----------------"
+echo "---------------- read parameters ----------------<br />" > $log
+
+echo "unpacking IMGT file"
+
+type="`file $input`"
+if [[ "$type" == *"Zip archive"* ]] ; then
+	echo "Zip archive"
+	echo "unzip $input -d $PWD/files/"
+	unzip $input -d $PWD/files/
+elif [[ "$type" == *"XZ compressed data"* ]] ; then
+	echo "ZX archive"
+	echo "tar -xJf $input -C $PWD/files/"
+	mkdir -p "$PWD/files/$title"
+	tar -xJf $input -C "$PWD/files/$title"
+else
+	echo "Unrecognized format $type"
+	echo "Unrecognized format $type" > $log
+	exit 1
+fi
+
+cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt
+cat "`find $PWD/files/ -name "2_*"`" > $PWD/gapped_nt.txt
+cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt
+cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt
+cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt
+cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt
+cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt
+cat "`find $PWD/files/ -name "8_*"`" > $PWD/mutationstats.txt
+cat "`find $PWD/files/ -name "9_*"`" > $PWD/aa_change_stats.txt
+cat "`find $PWD/files/ -name "10_*"`" > $PWD/hotspots.txt
+
+echo "---------------- unique id check ----------------"
+
+Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/gapped_nt.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/aa_change_stats.txt $PWD/hotspots.txt
+
+if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
+	echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
+else
+	BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
+	echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
+fi
+
+echo "---------------- class identification ----------------"
+echo "---------------- class identification ----------------<br />" >> $log
+
+python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
+
+echo "---------------- merge_and_filter.r ----------------"
+echo "---------------- merge_and_filter.r ----------------<br />" >> $log
+
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1
+
+if [[ "${naive_output}" == "yes" ]] || [[ "$fast" == "no" ]] ; then
+
+	echo "---------------- creating new IMGT zips ----------------"
+	echo "---------------- creating new IMGT zips ----------------<br />" >> $log
+
+	mkdir $outdir/new_IMGT
+
+	cp $PWD/summary.txt "$outdir/new_IMGT/1_Summary.txt"
+	cp $PWD/gapped_nt.txt "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
+	cp $PWD/sequences.txt "$outdir/new_IMGT/3_Nt-sequences.txt"
+	cp $PWD/gapped_aa.txt "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
+	cp $PWD/aa.txt "$outdir/new_IMGT/5_AA-sequences.txt"
+	cp $PWD/junction.txt "$outdir/new_IMGT/6_Junction.txt"
+	cp $PWD/mutationanalysis.txt "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
+	cp $PWD/mutationstats.txt "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
+	cp $PWD/aa_change_stats.txt "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
+	cp $PWD/hotspots.txt "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
+
+	mkdir $outdir/new_IMGT_IGA
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA
+
+	mkdir $outdir/new_IMGT_IGA1
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1
+
+	mkdir $outdir/new_IMGT_IGA2
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2
+
+	mkdir $outdir/new_IMGT_IGG
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG
+
+	mkdir $outdir/new_IMGT_IGG1
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1
+
+	mkdir $outdir/new_IMGT_IGG2
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2
+
+	mkdir $outdir/new_IMGT_IGG3
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3
+
+	mkdir $outdir/new_IMGT_IGG4
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4
+
+	mkdir $outdir/new_IMGT_IGM
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM
+
+	mkdir $outdir/new_IMGT_IGE
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE
+
+	Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
+
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1
+
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1
+
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1
+
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1
+
+
+	tmp="$PWD"
+	cd $outdir/new_IMGT/ #tar weirdness...
+	tar -cJf ../new_IMGT.txz *
+
+	cd $outdir/new_IMGT_IGA/
+	tar -cJf ../new_IMGT_IGA.txz *
+
+	cd $outdir/new_IMGT_IGA1/
+	tar -cJf ../new_IMGT_IGA1.txz *
+
+	cd $outdir/new_IMGT_IGA2/
+	tar -cJf ../new_IMGT_IGA2.txz *
+
+	cd $outdir/new_IMGT_IGG/
+	tar -cJf ../new_IMGT_IGG.txz *
+
+	cd $outdir/new_IMGT_IGG1/
+	tar -cJf ../new_IMGT_IGG1.txz *
+
+	cd $outdir/new_IMGT_IGG2/
+	tar -cJf ../new_IMGT_IGG2.txz *
+
+	cd $outdir/new_IMGT_IGG3/
+	tar -cJf ../new_IMGT_IGG3.txz *
+
+	cd $outdir/new_IMGT_IGG4/
+	tar -cJf ../new_IMGT_IGG4.txz *
+
+	cd $outdir/new_IMGT_IGM/
+	tar -cJf ../new_IMGT_IGM.txz *
+
+	cd $outdir/new_IMGT_IGE/
+	tar -cJf ../new_IMGT_IGE.txz *
+
+	cd $tmp
+fi
+
+echo "---------------- shm_csr.r ----------------"
+echo "---------------- shm_csr.r ----------------<br />" >> $log
+
+classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched"
+echo "R mutation analysis"
+Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1
+
+echo "---------------- plot_pdfs.r ----------------"
+echo "---------------- plot_pdfs.r ----------------<br />" >> $log
+
+echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1"
+
+Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1
+
+echo "---------------- shm_csr.py ----------------"
+echo "---------------- shm_csr.py ----------------<br />" >> $log
+
+python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --empty_region_filter "${empty_region_filter}" --output $outdir/hotspot_analysis.txt
+
+echo "---------------- aa_histogram.r ----------------"
+echo "---------------- aa_histogram.r ----------------<br />" >> $log
+
+Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1
+if [ -e "$outdir/aa_histogram_.png" ]; then
+        mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
+        mv $outdir/aa_histogram_.pdf $outdir/aa_histogram.pdf
+        mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
+        mv $outdir/aa_histogram_absent_.txt $outdir/aa_histogram_absent.txt
+        mv $outdir/aa_histogram_count_.txt $outdir/aa_histogram_count.txt
+        mv $outdir/aa_histogram_sum_.txt $outdir/aa_histogram_sum.txt
+fi
+
+genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE)
+
+funcs=(sum mean median)
+funcs=(sum)
+
+echo "---------------- sequence_overview.r ----------------"
+echo "---------------- sequence_overview.r ----------------<br />" >> $log
+
+mkdir $outdir/sequence_overview
+
+Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt ${empty_region_filter} 2>&1
+
+echo "<table border='1'>" > $outdir/base_overview.html
+
+while IFS=$'\t' read ID class seq A C G T
+do
+	echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
+done < $outdir/sequence_overview/ntoverview.txt
+
+echo "<html><center><h1>$title</h1></center>" > $output
+echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
+echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
+echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
+echo "<script type='text/javascript' src='script.js'></script>" >> $output
+echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
+echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
+
+matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
+unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
+total_count=$((matched_count + unmatched_count))
+perc_count=$((unmatched_count / total_count * 100))
+perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
+perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
+
+echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
+echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
+echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
+
+echo "---------------- main tables ----------------"
+echo "---------------- main tables ----------------<br />" >> $log
+
+echo "<div class='tabber'>" >> $output
+echo "<div class='tabbertab' title='SHM Overview' style='width: 3000px;'>" >> $output
+
+for func in ${funcs[@]}
+do
+	
+	echo "---------------- $func table ----------------"
+	echo "---------------- $func table ----------------<br />" >> $log
+	
+	cat $outdir/mutations_${func}.txt $outdir/shm_overview_tandem_row.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
+	
+	echo "---------------- pattern_plots.r ----------------"
+	echo "---------------- pattern_plots.r ----------------<br />" >> $log
+
+	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1
+	
+	echo "<table class='pure-table pure-table-striped'>" >> $output
+	echo "<thead><tr><th>info</th>" >> $output
+	
+	if [ "${class_filter}" != "101_101" ] ; then
+	
+		for gene in ${genes[@]}
+		do
+			tmp=`cat $outdir/${gene}_${func}_n.txt`
+			echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
+		done
+		
+		tmp=`cat $outdir/all_${func}_n.txt`
+		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
+		tmp=`cat $outdir/unmatched_${func}_n.txt`
+		echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
+
+		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz 
+		do
+			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] || [ "$name" == "Tandems/Expected (ratio)" ] ; then #meh
+				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${cex}/${cey} (${cez})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output
+			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
+				echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${cez}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output
+			else
+				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${cex}/${cey} (${cez}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
+			fi
+		done < $outdir/data_${func}.txt
+		
+	else
+		tmp=`cat $outdir/all_${func}_n.txt`
+		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
+		
+		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz
+		do
+			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
+				echo "<tr><td>$name</td><td>${allx}/${ally}</td></tr>" >> $output
+			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
+				echo "<tr><td>$name</td><td>${allz}%</td></tr>" >> $output
+			else
+				echo "<tr><td>$name</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
+			fi
+		done < $outdir/data_${func}.txt
+		
+	fi
+	echo "</table>" >> $output
+	#echo "<a href='data_${func}.txt'>Download data</a>" >> $output
+done
+
+echo "<a href='aid_motives.pdf'><img src='aid_motives.png' /></a><br />" >> $output
+echo "<a href='relative_mutations.pdf'><img src='relative_mutations.png' /></a><br />" >> $output
+echo "<a href='absolute_mutations.pdf'><img src='absolute_mutations.png' /></a><br />" >> $output
+echo "<br />" >> $output
+cat $dir/shm_overview.htm >> $output
+echo "</div>" >> $output #SHM overview tab end
+
+echo "---------------- images ----------------"
+echo "---------------- images ----------------<br />" >> $log
+
+echo "<div class='tabbertab' title='SHM Frequency' style='width: 3000px;'></a>" >> $output
+
+if [ -a $outdir/scatter.png ]
+then
+	echo "<a href='scatter.pdf'><img src='scatter.png'/><br />" >> $output
+fi
+if [ -a $outdir/frequency_ranges.png ]
+then
+	echo "<a href='frequency_ranges.pdf'><img src='frequency_ranges.png'/></a><br />" >> $output
+fi
+
+echo "<br />" >> $output
+cat $dir/shm_frequency.htm >> $output
+
+echo "</div>" >> $output #SHM frequency tab end
+
+echo "<div class='tabbertab' title='Transition tables' style='width: 3000px;'>" >> $output
+
+echo "<table border='0'>" >> $output
+
+for gene in ${genes[@]}
+do
+	echo "<tr>" >> $output
+	echo "<td><h1>${gene}</h1></td>" >> $output
+	
+	if [ -e $outdir/transitions_heatmap_${gene}.png ]
+	then
+		echo "<td><a href='transitions_heatmap_${gene}.pdf'><img src='transitions_heatmap_${gene}.png' /></a></td>" >> $output
+	else
+		echo "<td></td>" >> $output
+	fi
+	
+	if [ -e $outdir/transitions_stacked_${gene}.png ]
+	then
+		echo "<td><a href='transitions_stacked_${gene}.pdf'><img src='transitions_stacked_${gene}.png' /></a></td>" >> $output
+	else
+		echo "<td></td>" >> $output
+	fi
+	
+	echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
+	echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
+	first="true"
+	while IFS=, read from a c g t
+		do
+			if [ "$first" == "true" ] ; then
+				echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+				first="false"
+			else
+				echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+			fi
+	done < $outdir/transitions_${gene}_sum.txt
+	echo "</table></td>" >> $output
+	
+	echo "</tr>" >> $output
+done
+
+echo "<tr>" >> $output
+echo "<td><h1>All</h1></td>" >> $output
+echo "<td><a href='transitions_heatmap_all.pdf'><img src='transitions_heatmap_all.png' /></a></td>" >> $output
+echo "<td><a href='transitions_stacked_all.pdf'><img src='transitions_stacked_all.png' /></a></td>" >> $output
+echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
+echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
+first="true"
+while IFS=, read from a c g t
+	do
+		if [ "$first" == "true" ] ; then
+			echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+			first="false"
+		else
+			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+		fi
+done < $outdir/transitions_all_sum.txt
+echo "</table></td>" >> $output
+
+echo "</tr>" >> $output
+
+echo "</table>" >> $output
+
+echo "<br />" >> $output
+cat $dir/shm_transition.htm >> $output
+
+echo "</div>" >> $output #transition tables tab end
+
+echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
+
+if [ -e $outdir/aa_histogram.png ]
+then
+	echo "<a href='aa_histogram.pdf'><img src='aa_histogram.png'/></a><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGA.png ]
+then
+	echo "<a href='aa_histogram_IGA.pdf'><img src='aa_histogram_IGA.png'/></a><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGG.png ]
+then
+	echo "<a href='aa_histogram_IGG.pdf'><img src='aa_histogram_IGG.png'/></a><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGM.png ]
+then
+	echo "<a href='aa_histogram_IGM.pdf'><img src='aa_histogram_IGM.png'/></a><br />" >> $output
+fi
+
+if [ -e $outdir/aa_histogram_IGE.png ]
+then
+	echo "<a href='aa_histogram_IGE.pdf'><img src='aa_histogram_IGE.png'/></a><br />" >> $output
+fi
+
+
+
+if [[ "$fast" == "no" ]] ; then
+
+    
+
+	echo "---------------- baseline ----------------"
+	echo "---------------- baseline ----------------<br />" >> $log
+	tmp="$PWD"
+
+	mkdir $outdir/baseline
+	
+	echo "<center><h1>BASELINe</h1>" >> $output
+	header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)"
+	
+	baseline_boundaries="27:27:38:55:65:104:-"
+	
+	if [[ "${empty_region_filter}" == "leader" ]] ; then
+		baseline_boundaries="1:26:38:55:65:104:-"
+		header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)"
+	fi
+	
+	echo "<p>${header_substring}</p></center>" >> $output
+
+	mkdir $outdir/baseline/IGA_IGG_IGM
+	if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGA_IGG_IGM
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
+	else
+		echo "No sequences" > "$outdir/baseline.txt"
+	fi
+
+	mkdir $outdir/baseline/IGA
+	if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGA
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
+	else
+		echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
+	fi
+
+	mkdir $outdir/baseline/IGG
+	if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGG
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
+	else
+		echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
+	fi
+
+	mkdir $outdir/baseline/IGM
+	if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGM
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
+	else
+		echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
+	fi
+
+	mkdir $outdir/baseline/IGE
+	if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then
+		cd $outdir/baseline/IGE
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt"
+	else
+		echo "No IGE sequences" > "$outdir/baseline_IGE.txt"
+	fi
+
+	cd $tmp
+
+	echo "Cleaning up *.RData files"
+	find $outdir/baseline -name "*.RData" -type f -delete
+	
+	if [ -e $outdir/baseline.pdf ]
+	then
+		echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
+	fi
+
+	if [ -e $outdir/baseline_IGA.pdf ]
+	then
+		echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
+	fi
+
+	if [ -e $outdir/baseline_IGG.pdf ]
+	then
+		echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
+	fi
+
+	if [ -e $outdir/baseline_IGM.pdf ]
+	then
+		echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
+	fi
+
+	if [ -e $outdir/baseline_IGE.pdf ]
+	then
+		echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output
+	fi
+fi
+
+echo "<br />" >> $output
+cat $dir/shm_selection.htm >> $output
+
+echo "</div>" >> $output #antigen selection tab end
+
+echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
+
+if [ -e $outdir/IGA.png ] 
+then
+	echo "<a href='IGA.pdf'><img src='IGA.png'/></a><br />" >> $output
+fi
+if [ -e $outdir/IGG.png ]
+then
+	echo "<a href='IGG.pdf'><img src='IGG.png'/></a><br />" >> $output
+fi
+
+echo "<br />" >> $output
+cat $dir/shm_csr.htm >> $output
+
+echo "</div>" >> $output #CSR tab end
+
+if [[ "$fast" == "no" ]] ; then
+
+	echo "---------------- change-o MakeDB ----------------"
+
+	mkdir $outdir/change_o
+
+	tmp="$PWD"
+
+	cd $outdir/change_o
+
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
+	Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1
+	
+	mkdir $outdir/new_IMGT_changeo
+	cp $outdir/new_IMGT/* $outdir/new_IMGT_changeo
+	
+	Rscript $dir/new_imgt.r $outdir/new_IMGT_changeo $outdir/change_o/change-o-db-defined_first_clones.txt "-" 2>&1
+	
+	cd $outdir/new_IMGT_changeo
+	tar -cJf ../new_IMGT_first_seq_of_clone.txz *
+	cd $outdir/change_o
+	
+	rm -rf $outdir/new_IMGT_changeo
+	
+	Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
+	echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
+	
+	if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
+		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1
+		
+		mkdir $outdir/new_IMGT_IGA_changeo
+		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA_changeo
+		
+		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA_changeo $outdir/change_o/change-o-db-defined_first_clones-IGA.txt "-" 2>&1
+		
+		cd $outdir/new_IMGT_IGA_changeo
+		tar -cJf ../new_IMGT_IGA_first_seq_of_clone.txz *
+		
+		rm -rf $outdir/new_IMGT_IGA_changeo
+		
+		cd $outdir/change_o
+	else
+		echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"
+		echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
+	fi
+	
+	if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
+		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1
+		
+		mkdir $outdir/new_IMGT_IGG_changeo
+		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG_changeo
+		
+		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG_changeo $outdir/change_o/change-o-db-defined_first_clones-IGG.txt "-" 2>&1
+		
+		cd $outdir/new_IMGT_IGG_changeo
+		tar -cJf ../new_IMGT_IGG_first_seq_of_clone.txz *
+		rm -rf $outdir/new_IMGT_IGG_changeo
+		
+		cd $outdir/change_o
+	else
+		echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"
+		echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
+	fi
+
+	if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
+		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-db-defined_first_clones-IGM.txt 2>&1
+		
+		mkdir $outdir/new_IMGT_IGM_changeo
+		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM_changeo
+		
+		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM_changeo $outdir/change_o/change-o-db-defined_first_clones-IGM.txt "-" 2>&1
+		
+		cd $outdir/new_IMGT_IGM_changeo
+		tar -cJf ../new_IMGT_IGM_first_seq_of_clone.txz *
+		
+		rm -rf $outdir/new_IMGT_IGM_changeo
+		
+		cd $outdir/change_o
+	else
+		echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt"
+		echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
+	fi
+
+	if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then
+		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt
+		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGE.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-defined_clones-summary-IGE.txt
+		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-db-defined_first_clones-IGE.txt 2>&1
+		
+		mkdir $outdir/new_IMGT_IGE_changeo
+		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE_changeo
+		
+		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE_changeo $outdir/change_o/change-o-db-defined_first_clones-IGE.txt "-" 2>&1
+		
+		cd $outdir/new_IMGT_IGE_changeo
+		tar -cJf ../new_IMGT_IGE_first_seq_of_clone.txz *
+		
+		rm -rf $outdir/new_IMGT_IGE_changeo
+		
+		cd $outdir/change_o
+	else
+		echo "No IGE sequences" > "$outdir/change_o/change-o-db-defined_clones-IGE.txt"
+		echo "No IGE sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGE.txt"
+	fi
+
+	cd "$tmp"
+	
+	rm -rf $outdir/new_IMGT
+	rm -rf $outdir/new_IMGT_IGA/
+	rm -rf $outdir/new_IMGT_IGA1/
+	rm -rf $outdir/new_IMGT_IGA2/
+	rm -rf $outdir/new_IMGT_IGG/
+	rm -rf $outdir/new_IMGT_IGG1/
+	rm -rf $outdir/new_IMGT_IGG2/
+	rm -rf $outdir/new_IMGT_IGG3/
+	rm -rf $outdir/new_IMGT_IGG4/
+	rm -rf $outdir/new_IMGT_IGM/
+	rm -rf $outdir/new_IMGT_IGE/
+
+	echo "<div class='tabbertab' title='Clonal Relation' style='width: 7000px;'>" >> $output #clonality tab
+
+	function clonality_table {
+		local infile=$1
+		local outfile=$2
+		
+		echo "<table class='pure-table pure-table-striped'>" >> $outfile
+		echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile
+		
+		first='true'
+		
+		while read size clones seqs
+		do
+			if [[ "$first" == "true" ]]; then
+				first="false"
+				continue
+			fi
+			echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile
+		done < $infile
+		
+		echo "</table>" >> $outfile
+	}
+	echo "<div class='tabber'>" >> $output
+
+	echo "<div class='tabbertab' title='All'>" >> $output
+	clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
+	echo "</div>" >> $output
+
+	echo "<div class='tabbertab' title='IGA'>" >> $output
+	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output
+	echo "</div>" >> $output
+
+	echo "<div class='tabbertab' title='IGG'>" >> $output
+	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output
+	echo "</div>" >> $output
+
+	echo "<div class='tabbertab' title='IGM'>" >> $output
+	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
+	echo "</div>" >> $output
+
+	echo "<div class='tabbertab' title='IGE'>" >> $output
+	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
+	echo "</div>" >> $output
+
+	echo "<div class='tabbertab' title='Overlap' style='width: 7000px;'>" >> $output
+	cat "$outdir/sequence_overview/index.html" | sed -e 's:</td>:</td>\n:g' | sed "s:href='\(.*\).html:href='sequence_overview/\1.html:g" >> $output # rewrite href to 'sequence_overview/..."
+	echo "</div>" >> $output
+	
+	echo "</div>" >> $output #clonality tabber end
+	
+	echo "<br />" >> $output
+	cat $dir/shm_clonality.htm >> $output
+	
+	echo "</div>" >> $output #clonality tab end
+
+fi
+
+echo "<div class='tabbertab' title='Downloads'>" >> $output
+
+echo "<table class='pure-table pure-table-striped'>" >> $output
+echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
+echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Overview</td></tr>" >> $output
+echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='shm_overview.txt' download='shm_overview.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the percentage of mutations in AID and pol eta motives plot</td><td><a href='aid_motives.txt' download='aid_motives.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the relative mutation patterns plot</td><td><a href='relative_mutations.txt' download='relative_mutations.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the absolute mutation patterns plot</td><td><a href='absolute_mutations.txt' download='absolute_mutations.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Data about tandem mutations by ID</td><td><a href='tandems_by_id.txt' download='tandems_by_id.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Frequency</td></tr>" >> $output
+echo "<tr><td>The data  generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Transition Tables</td></tr>" >> $output
+echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_IGA_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transitions_IGG3_sum.txt' download='transitions_IGG3_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transitions_IGG4_sum.txt' download='transitions_IGG4_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transitions_IGM_sum.txt' download='transitions_IGM_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGE' transition plot</td><td><a href='transitions_IGE_sum.txt' download='transitions_IGE_sum.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output
+echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Presence of AA per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The data used to generate the aa mutation frequency plot</td><td><a href='aa_histogram_sum.txt' download='aa_histogram_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the aa mutation frequency plot for IGA</td><td><a href='aa_histogram_sum_IGA.txt' download='aa_histogram_sum_IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the aa mutation frequency plot for IGG</td><td><a href='aa_histogram_sum_IGG.txt' download='aa_histogram_sum_IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the aa mutation frequency plot for IGM</td><td><a href='aa_histogram_sum_IGM.txt' download='aa_histogram_sum_IGM.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the aa mutation frequency plot for IGE</td><td><a href='aa_histogram_sum_IGE.txt' download='aa_histogram_sum_IGE.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGE PDF</td><td><a href='baseline_IGE.pdf' download='baseline_IGE.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGE data</td><td><a href='baseline_IGE.txt' download='baseline_IGE.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>CSR</td></tr>" >> $output
+echo "<tr><td>The data for the IGA subclass distribution plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the IGG subclass distribution plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output
+
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonal Relation</td></tr>" >> $output
+echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href='new_IMGT_first_seq_of_clone.txz' download='new_IMGT_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href='new_IMGT_IGA_first_seq_of_clone.txz' download='new_IMGT_IGA_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href='new_IMGT_IGG_first_seq_of_clone.txz' download='new_IMGT_IGG_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href='new_IMGT_IGM_first_seq_of_clone.txz' download='new_IMGT_IGM_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The Change-O DB file with defined clones of IGE</td><td><a href='change_o/change-o-db-defined_clones-IGE.txt' download='change_o/change-o-db-defined_clones-IGE.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGE</td><td><a href='change_o/change-o-defined_clones-summary-IGE.txt' download='change_o/change-o-defined_clones-summary-IGE.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href='new_IMGT_IGE_first_seq_of_clone.txz' download='new_IMGT_IGE_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Filtered IMGT output files</td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href='new_IMGT_IGE.txz' download='new_IMGT_IGE.txz' >Download</a></td></tr>" >> $output
+
+echo "</table>" >> $output
+
+echo "<br />" >> $output
+cat $dir/shm_downloads.htm >> $output
+
+echo "</div>" >> $output #downloads tab end
+
+echo "</div>" >> $output #tabs end 
+
+echo "</html>" >> $output
+
+
+echo "---------------- naive_output.r ----------------"
+echo "---------------- naive_output.r ----------------<br />" >> $log
+
+if [[ "$naive_output" == "yes" ]]
+then
+	echo "output naive output"
+	if [[ "${class_filter}" == "101_101" ]]
+	then
+		echo "copy new_IMGT.txz to ${naive_output_all}"
+		cp $outdir/new_IMGT.txz ${naive_output_all}
+	else
+		echo "copy for classes"
+		cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
+		cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
+		cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
+		cp $outdir/new_IMGT_IGE.txz ${naive_output_ce}
+	fi
+fi
+
+echo "</table>" >> $outdir/base_overview.html
+
+mv $log $outdir/log.html
+
+echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
+echo "<table border = 1>" >> $log
+echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
+tIFS="$TMP"
+IFS=$'\t'
+while read step seq perc
+	do
+		echo "<tr>" >> $log
+		echo "<td>$step</td>" >> $log
+		echo "<td>$seq</td>" >> $log
+		echo "<td>${perc}%</td>" >> $log
+		echo "</tr>" >> $log
+done < $outdir/filtering_steps.txt
+echo "</table>" >> $log
+echo "<br />" >> $log
+cat $dir/shm_first.htm >> $log
+echo "</center></html>" >> $log
+
+IFS="$tIFS"
+
+
+echo "---------------- Done! ----------------"
+echo "---------------- Done! ----------------<br />" >> $outdir/log.html
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/shm_downloads.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,538 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-a:link, span.MsoHyperlink
-	{color:blue;
-	text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
-	{color:purple;
-	text-decoration:underline;}
-p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing
-	{margin:0in;
-	margin-bottom:.0001pt;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-.MsoChpDefault
-	{font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=WordSection1>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Info</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The complete
-dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Allows downloading of the complete parsed data set.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The filtered
-dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Allows downloading of all parsed IMGT information of all transcripts that
-passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The alignment
-info on the unmatched sequences:</span></u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'> Provides information of the subclass
-alignment of all unmatched sequences. For each sequence the chunck hit
-percentage and the nt hit percentage is shown together with the best matched
-subclass.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Overview</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The SHM Overview
-table as a dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Allows downloading of the SHM Overview
-table as a data set.  </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Motif data per
-sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'> Provides a file that contains information for each
-transcript on the number of mutations present in WA/TW and RGYW/WRCY motives.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Mutation data
-per sequence ID: </span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'>Provides a file containing information
-on the number of sequences bases, the number and location of mutations and the
-type of mutations found in each transcript. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Base count for
-every sequence:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'> links to a page showing for each transcript the
-sequence of the analysed region (as dependent on the sequence starts at filter),
-the assigned subclass and the number of sequenced A,C,G and T’s.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the percentage of mutations in AID and pol eta motives plot:</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Provides a file containing the values used to generate the percentage of
-mutations in AID and pol eta motives plot in the SHM overview tab.</span></p>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
-data used to generate the relative mutation patterns plot:</span></u><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-Provides a download with the data used to generate the relative mutation
-patterns plot in the SHM overview tab.</span></p>
-
-<p class=MsoNormalCxSpLast style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The
-data used to generate the absolute mutation patterns plot:</span></u><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-Provides a download with the data used to generate the absolute mutation
-patterns plot in the SHM overview tab. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Frequency</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data
-generate the frequency scatter plot:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows
-downloading the data used to generate the frequency scatter plot in the SHM
-frequency tab. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the frequency by class plot:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows
-downloading the data used to generate frequency by class plot included in the
-SHM frequency tab.           </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for
-frequency by subclass:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Provides information of the number and
-percentage of sequences that have 0%, 0-2%, 2-5%, 5-10%, 10-15%, 15-20%,
-&gt;20% SHM. Information is provided for each subclass.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Transition
-Tables</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'all' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGA' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGA sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGA1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGA1 sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGA2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGA2 sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGG' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGG sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGG1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGG1 sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGG2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGG2 sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGG3' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGG3 sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGG4' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGG4 sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGM' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the information used to
-generate the transition table for all IGM sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-'IGE' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Contains the
-information used to generate the transition table for all IGE sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Antigen
-selection</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>AA mutation data
-per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'> Provides for each transcript information on whether
-there is replacement mutation at each amino acid location (as defined by IMGT).
-For all amino acids outside of the analysed region the value 0 is given.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Presence of AA
-per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'> Provides for each transcript information on which
-amino acid location (as defined by IMGT) is present. </span><span lang=NL
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>0 is absent, 1
-is present. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the aa mutation frequency plot:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
-data used to generate the aa mutation frequency plot for all sequences in the
-antigen selection tab.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the aa mutation frequency plot for IGA:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>  Provides the
-data used to generate the aa mutation frequency plot for all IGA sequences in
-the antigen selection tab.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the aa mutation frequency plot for IGG:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
-data used to generate the aa mutation frequency plot for all IGG sequences in
-the antigen selection tab.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the aa mutation frequency plot for IGM:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the
-data used to generate the aa mutation frequency plot for all IGM sequences in
-the antigen selection tab.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to
-generate the aa mutation frequency plot for IGE:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>   Provides the
-data used to generate the aa mutation frequency plot for all IGE sequences in
-the antigen selection tab.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline PDF (</span></u><span
-lang=EN-GB><a href="http://selection.med.yale.edu/baseline/"><span
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>http://selection.med.yale.edu/baseline/</span></a></span><u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>):</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF
-containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all
-sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline data:</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Table output of the BASELINe analysis. Calculation of antigen selection as
-performed by BASELINe are shown for each individual sequence and the sum of all
-sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA
-PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all
-sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA
-data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Table output of the BASELINe analysis. Calculation of antigen selection as
-performed by BASELINe are shown for each individual IGA sequence and the sum of
-all IGA sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG
-PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGG
-sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG
-data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Table output of the BASELINe analysis. Calculation of antigen selection as
-performed by BASELINe are shown for each individual IGG sequence and the sum of
-all IGG sequences.        </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM PDF:</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF
-containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGM
-sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM
-data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Table output of the BASELINe analysis. Calculation of antigen selection as
-performed by BASELINe are shown for each individual IGM sequence and the sum of
-all IGM sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE
-PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family:
-"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGE
-sequences.</span><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE
-data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Table output of the BASELINe analysis. Calculation of antigen selection as
-performed by BASELINe are shown for each individual IGE sequence and the sum of
-all IGE sequences.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>CSR</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA
-subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> </span><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Data used for
-the generation of the </span><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'>IGA subclass distribution plot provided
-in the CSR tab. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the
-</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA
-subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Data used for the generation of the </span><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGG
-subclass distribution plot provided in the CSR tab. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=NL
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Clonal relation</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Sequence overlap
-between subclasses:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Link to the overlap table as provided
-under the clonality overlap tab.         </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-file with defined clones and subclass annotation:</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>
-Downloads a table with the calculation of clonal relation between all
-sequences. For each individual transcript the results of the clonal assignment
-as provided by Change-O are provided. Sequences with the same number in the CLONE
-column are considered clonally related. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-defined clones summary file:</span></u><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'> Gives a summary of the total number of
-clones in all sequences and their clone size.           </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-file with defined clones of IGA:</span></u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
-calculation of clonal relation between all IGA sequences. For each individual
-transcript the results of the clonal assignment as provided by Change-O are
-provided. Sequences with the same number in the CLONE column are considered
-clonally related. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-defined clones summary file of IGA:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
-of the total number of clones in all IGA sequences and their clone size.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-file with defined clones of IGG:</span></u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
-calculation of clonal relation between all IGG sequences. For each individual
-transcript the results of the clonal assignment as provided by Change-O are
-provided. Sequences with the same number in the CLONE column are considered
-clonally related. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-defined clones summary file of IGG:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
-of the total number of clones in all IGG sequences and their clone size.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-file with defined clones of IGM:</span></u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'> Downloads a table
-with the calculation of clonal relation between all IGM sequences. For each
-individual transcript the results of the clonal assignment as provided by
-Change-O are provided. Sequences with the same number in the CLONE column are
-considered clonally related. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-defined clones summary file of IGM:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
-of the total number of clones in all IGM sequences and their clone size.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-file with defined clones of IGE:</span></u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the
-calculation of clonal relation between all IGE sequences. For each individual
-transcript the results of the clonal assignment as provided by Change-O are
-provided. Sequences with the same number in the CLONE column are considered
-clonally related. </span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB
-defined clones summary file of IGE:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary
-of the total number of clones in all IGE sequences and their clone size.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Filtered IMGT
-output files</span></b></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGA sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all IGA
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGA1 sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all IGA1
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGA2 sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
-file with the same format as downloaded IMGT files that contains all IGA2
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGG sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
-file with the same format as downloaded IMGT files that contains all IGG
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGG1 sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all IGG1
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGG2 sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all IGG2
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGG3 sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
-file with the same format as downloaded IMGT files that contains all IGG3
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGG4 sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all IGG4
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGM sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz
-file with the same format as downloaded IMGT files that contains all IGM
-sequences that have passed the chosen filter settings.</span></p>
-
-<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive
-with just the matched and filtered IGE sequences:</span></u><span lang=EN-GB
-style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a
-.txz file with the same format as downloaded IMGT files that contains all IGE
-sequences that have passed the chosen filter settings.</span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_first.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,127 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-.MsoChpDefault
-	{font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='margin-bottom:0in;margin-bottom:.0001pt;
-text-align:justify;line-height:normal'><span lang=EN-GB style='font-size:12.0pt;
-font-family:"Times New Roman","serif"'>Table showing the order of each
-filtering step and the number and percentage of sequences after each filtering
-step. </span></p>
-
-<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
-text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'>Input:</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The
-number of sequences in the original IMGT file. This is always 100% of the
-sequences.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
-text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'>After &quot;no results&quot; filter: </span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IMGT
-classifies sequences either as &quot;productive&quot;, &quot;unproductive&quot;, &quot;unknown&quot;, or &quot;no
-results&quot;. Here, the number and percentages of sequences that are not classified
-as &quot;no results&quot; are reported.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt;
-text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size:
-12.0pt;font-family:"Times New Roman","serif"'>After functionality filter:</span></u><span
-lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The
-number and percentages of sequences that have passed the functionality filter. The
-filtering performed is dependent on the settings of the functionality filter.
-Details on the functionality filter <a name="OLE_LINK12"></a><a
-name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of
-the SHM&amp;CSR pipeline</a>.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
-removal sequences that are missing a gene region:</span></u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-In this step all sequences that are missing a gene region (FR1, CDR1, FR2,
-CDR2, FR3) that should be present are removed from analysis. The sequence
-regions that should be present are dependent on the settings of the sequence
-starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and
-percentage of sequences that pass this filter step are reported.</a> </span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
-N filter:</span></u><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> In this step all sequences that contain
-an ambiguous base (n) in the analysed region or the CDR3 are removed from the
-analysis. The analysed region is determined by the setting of the sequence
-starts at filter. The number and percentage of sequences that pass this filter
-step are reported.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
-filter unique sequences</span></u><span lang=EN-GB style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>: The number and
-percentage of sequences that pass the &quot;filter unique sequences&quot; filter. Details
-on this filter </span><span lang=EN-GB style='font-size:12.0pt;line-height:
-115%;font-family:"Times New Roman","serif"'>can be found on the start page of
-the SHM&amp;CSR pipeline</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After
-remove duplicate based on filter:</span></u><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> The number and
-percentage of sequences that passed the remove duplicate filter. Details on the
-&quot;remove duplicate filter based on filter&quot; can be found on the start page of the
-SHM&amp;CSR pipeline.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK17"></a><a
-name="OLE_LINK16"><u><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Number of matches sequences:</span></u></a><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-The number and percentage of sequences that passed all the filters described
-above and have a (sub)class assigned.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
-of unmatched sequences</span></u><span lang=EN-GB style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>: The number and percentage
-of sequences that passed all the filters described above and do not have
-subclass assigned.</span></p>
-
-<p class=MsoNormal><span lang=EN-GB>&nbsp;</span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_frequency.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-.MsoChpDefault
-	{font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM
-frequency tab</span></u></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
-graphs give insight into the level of SHM. The data represented in these graphs
-can be downloaded in the download tab. <a name="OLE_LINK24"></a><a
-name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More
-information on the values found in healthy individuals of different ages can be
-found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency
-scatter plot</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
-dot plot showing the percentage of SHM in each transcript divided into the
-different (sub)classes. </span><span lang=NL style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot
-represents an individual transcript.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
-frequency by class</span></u></p>
-
-<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A
-bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%,
-5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p>
-
-<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
-Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
-Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
-of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
-Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>Link</span></a>]</span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_overview.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,332 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-.MsoChpDefault
-	{font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Info
-table</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>This
-table contains information on different characteristics of SHM. For all
-characteristics information can be found for all sequences or only sequences of
-a certain (sub)class. All results are based on the sequences that passed the filter
-settings chosen on the start page of the SHM &amp; CSR pipeline and only
-include details on the analysed region as determined by the setting of the
-sequence starts at filter. All data in this table can be downloaded via the
-“downloads” tab.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation
-frequency:</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK83"></a><a
-name="OLE_LINK82"></a><a name="OLE_LINK81"><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values
-give information on the level of SHM. </span></a><a name="OLE_LINK22"></a><a
-name="OLE_LINK21"></a><a name="OLE_LINK20"><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information
-on the values found in healthy individuals of different ages can be found in </span></a><a
-name="OLE_LINK15"></a><a name="OLE_LINK14"></a><a name="OLE_LINK13"><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IJspeert
-and van Schouwenburg et al, PMID: 27799928</span></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
-of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:
-115%;font-family:"Times New Roman","serif"'> Shows the number of total
-mutations / the number of sequenced bases (the % of mutated bases).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Median
-number of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'> Shows the median % of
-SHM of all sequences.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Patterns
-of SHM:</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK72"></a><a
-name="OLE_LINK71"></a><a name="OLE_LINK70"><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values
-give insights into the targeting and patterns of SHM. These values can give
-insight into the repair pathways used to repair the U:G mismatches introduced
-by AID. </span></a><a name="OLE_LINK40"></a><a name="OLE_LINK39"></a><a
-name="OLE_LINK38"></a><a name="OLE_LINK60"><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information
-on the values found in healthy individuals of different ages can be found in
-IJspeert and van Schouwenburg et al, PMID: 27799928</span></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions:</span></i><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-Shows the number of transition mutations / the number of total mutations (the
-percentage of mutations that are transitions). Transition mutations are C&gt;T,
-T&gt;C, A&gt;G, G&gt;A. </span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transversions:</span></i><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-Shows the number of transversion mutations / the number of total mutations (the
-percentage of mutations that are transitions). Transversion mutations are
-C&gt;A, C&gt;G, T&gt;A, T&gt;G, A&gt;T, A&gt;C, G&gt;T, G&gt;C.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions
-at GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> <a name="OLE_LINK2"></a><a
-name="OLE_LINK1">Shows the number of transitions at GC locations (C&gt;T,
-G&gt;A) / the total number of mutations at GC locations (the percentage of
-mutations at GC locations that are transitions).</a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting
-of GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> <a name="OLE_LINK7"></a><a
-name="OLE_LINK6"></a><a name="OLE_LINK3">Shows the number of mutations at GC
-locations / the total number of mutations (the percentage of total mutations
-that are at GC locations).</a> </span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions
-at AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> Shows the number of transitions at AT
-locations (T&gt;C, A&gt;G) / the total number of mutations at AT locations (the
-percentage of mutations at AT locations that are transitions).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting
-of AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> Shows the number of mutations at AT
-locations / the total number of mutations (the percentage of total mutations
-that are at AT locations).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>RGYW:</span></i><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-<a name="OLE_LINK28"></a><a name="OLE_LINK27"></a><a name="OLE_LINK26">Shows
-the number of mutations that are in a RGYW motive / The number of total mutations
-(the percentage of mutations that are in a RGYW motive). </a><a
-name="OLE_LINK62"></a><a name="OLE_LINK61">RGYW motives are known to be
-preferentially targeted by AID </a></span><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
-Y=pyrimidine, W = A or T).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WRCY:</span></i><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-<a name="OLE_LINK34"></a><a name="OLE_LINK33">Shows the number of mutations
-that are in a </a><a name="OLE_LINK32"></a><a name="OLE_LINK31"></a><a
-name="OLE_LINK30"></a><a name="OLE_LINK29">WRCY</a> motive / The number of
-total mutations (the percentage of mutations that are in a WRCY motive). WRCY
-motives are known to be preferentially targeted by AID </span><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
-Y=pyrimidine, W = A or T).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WA:</span></i><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-<a name="OLE_LINK37"></a><a name="OLE_LINK36"></a><a name="OLE_LINK35">Shows
-the number of mutations that are in a WA motive / The number of total mutations
-(the percentage of mutations that are in a WA motive). It is described that
-polymerase eta preferentially makes errors at WA motives </a></span><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W
-= A or T).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>TW:</span></i><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-Shows the number of mutations that are in a TW motive / The number of total mutations
-(the percentage of mutations that are in a TW motive). It is described that
-polymerase eta preferentially makes errors at TW motives </span><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W
-= A or T).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen
-selection:</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
-values give insight into antigen selection. It has been described that during
-antigen selection, there is selection against replacement mutations in the FR
-regions as these can cause instability of the B-cell receptor. In contrast
-replacement mutations in the CDR regions are important for changing the
-affinity of the B-cell receptor and therefore there is selection for this type
-of mutations. Silent mutations do not alter the amino acid sequence and
-therefore do not play a role in selection. More information on the values found
-in healthy individuals of different ages can be found in IJspeert and van
-Schouwenburg et al, PMID: 27799928</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>FR
-R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> <a name="OLE_LINK43"></a><a
-name="OLE_LINK42"></a><a name="OLE_LINK41">Shows the number of replacement
-mutations in the FR regions / The number of silent mutations in the FR regions
-(the number of replacement mutations in the FR regions divided by the number of
-silent mutations in the FR regions)</a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>CDR
-R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> Shows the number of replacement
-mutations in the CDR regions / The number of silent mutations in the CDR
-regions (the number of replacement mutations in the CDR regions divided by the
-number of silent mutations in the CDR regions)</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number
-of sequences nucleotides:</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These
-values give information on the number of sequenced nucleotides.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt
-in FR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> <a name="OLE_LINK46"></a><a
-name="OLE_LINK45"></a><a name="OLE_LINK44">Shows the number of sequences bases
-that are located in the FR regions / The total number of sequenced bases (the
-percentage of sequenced bases that are present in the FR regions).</a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt
-in CDR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'> Shows the number of sequenced bases
-that are located in the CDR regions / <a name="OLE_LINK48"></a><a
-name="OLE_LINK47">The total number of sequenced bases (the percentage of
-sequenced bases that are present in the CDR regions).</a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A:
-</span></i><a name="OLE_LINK51"></a><a name="OLE_LINK50"></a><a
-name="OLE_LINK49"><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Shows the total number of sequenced
-adenines / The total number of sequenced bases (the percentage of sequenced
-bases that were adenines).</span></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>C:
-</span></i><a name="OLE_LINK53"></a><a name="OLE_LINK52"><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
-the total number of sequenced cytosines / The total number of sequenced bases
-(the percentage of sequenced bases that were cytosines).</span></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>T:
-</span></i><a name="OLE_LINK57"></a><a name="OLE_LINK56"><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
-the total number of sequenced </span></a><a name="OLE_LINK55"></a><a
-name="OLE_LINK54"><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>thymines</span></a><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>
-/ The total number of sequenced bases (the percentage of sequenced bases that
-were thymines).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>G:
-</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Shows the total number of sequenced <a
-name="OLE_LINK59"></a><a name="OLE_LINK58">guanine</a>s / The total number of
-sequenced bases (the percentage of sequenced bases that were guanines).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK69"><b><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK75"></a><a
-name="OLE_LINK74"></a><a name="OLE_LINK73"><span lang=EN-GB style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs visualize
-information on the patterns and targeting of SHM and thereby give information
-into the repair pathways used to repair the U:G mismatches introduced by AID. The
-data represented in these graphs can be downloaded in the download tab. More
-information on the values found in healthy individuals of different ages can be
-found in IJspeert and van Schouwenburg et al, PMID: 27799928</span></a><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>.
-<a name="OLE_LINK85"></a><a name="OLE_LINK84"></a></span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Percentage
-of mutations in AID and pol eta motives</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes
-<a name="OLE_LINK80"></a><a name="OLE_LINK79"></a><a name="OLE_LINK78">for each
-(sub)class </a>the percentage of mutations that are present in AID (RGYW or
-WRCY) or polymerase eta motives (WA or TW) in the different subclasses </span><span
-lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine,
-Y=pyrimidine, W = A or T).</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Relative
-mutation patterns</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes
-for each (sub)class the distribution of mutations between mutations at AT
-locations and transitions or transversions at GC locations. </span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Absolute
-mutation patterns</span></u></p>
-
-<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualized
-for each (sub)class the percentage of sequenced AT and GC bases that are
-mutated. The mutations at GC bases are divided into transition and transversion
-mutations<a name="OLE_LINK77"></a><a name="OLE_LINK76">. </a></span></p>
-
-<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
-Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
-Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
-of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
-Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>Link</span></a>]</span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_selection.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
-@font-face
-	{font-family:UICTFontTextStyleBody;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-a:link, span.MsoHyperlink
-	{color:blue;
-	text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
-	{color:purple;
-	text-decoration:underline;}
-span.apple-converted-space
-	{mso-style-name:apple-converted-space;}
-.MsoChpDefault
-	{font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>References</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
-color:black'>Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying
-selection in high-throughput Immunoglobulin sequencing data sets. In<span
-class=apple-converted-space>&nbsp;</span><em>Nucleic Acids Research, 40 (17),
-pp. e134–e134.</em><span class=apple-converted-space><i>&nbsp;</i></span>[</span><span
-lang=EN-GB><a href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
-color:#303030'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
-color:black'>][</span><span lang=EN-GB><a
-href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif";
-color:#303030'>Link</span></a></span><span lang=EN-GB style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif";color:black'>]</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>AA
-mutation frequency</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>For
-each class, the frequency of replacement mutations at each amino acid position
-is shown, which is calculated by dividing the number of replacement mutations
-at a particular amino acid position/the number sequences that have an amino
-acid at that particular position. Since the length of the CDR1 and CDR2 region
-is not the same for every VH gene, some amino acids positions are absent.
-Therefore we calculate the frequency using the number of amino acids present at
-that that particular location. </span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen
-selection (BASELINe)</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows
-the results of the analysis of antigen selection as performed using BASELINe.
-Details on the analysis performed by BASELINe can be found in Yaari et al,
-PMID: 22641856. The settings used for the analysis are</span><span lang=EN-GB
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>:
-focused, SHM targeting model: human Tri-nucleotide, custom bounderies. The
-custom boundries are dependent on the ‘sequence starts at filter’. </span></p>
-
-<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
-style='font-family:UICTFontTextStyleBody;color:black'>Leader:
-1:26:38:55:65:104:-</span></p>
-
-<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
-style='font-family:UICTFontTextStyleBody;color:black'>FR1: 27:27:38:55:65:104:-</span></p>
-
-<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL
-style='font-family:UICTFontTextStyleBody;color:black'>CDR1:&nbsp;27:27:38:55:65:104:-</span></p>
-
-<p class=MsoNormalCxSpLast style='line-height:normal'><span lang=NL
-style='font-family:UICTFontTextStyleBody;color:black'>FR2:&nbsp;27:27:38:55:65:104:-</span></p>
-
-<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
-Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
-Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
-of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
-Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>Link</span></a>]</span></p>
-
-</div>
-
-</body>
-
-</html>
--- a/shm_transition.htm	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,120 +0,0 @@
-<html>
-
-<head>
-<meta http-equiv=Content-Type content="text/html; charset=windows-1252">
-<meta name=Generator content="Microsoft Word 14 (filtered)">
-<style>
-<!--
- /* Font Definitions */
- @font-face
-	{font-family:Calibri;
-	panose-1:2 15 5 2 2 2 4 3 2 4;}
- /* Style Definitions */
- p.MsoNormal, li.MsoNormal, div.MsoNormal
-	{margin-top:0in;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:11.0pt;
-	font-family:"Calibri","sans-serif";}
-a:link, span.MsoHyperlink
-	{color:blue;
-	text-decoration:underline;}
-a:visited, span.MsoHyperlinkFollowed
-	{color:purple;
-	text-decoration:underline;}
-p.msochpdefault, li.msochpdefault, div.msochpdefault
-	{mso-style-name:msochpdefault;
-	margin-right:0in;
-	margin-left:0in;
-	font-size:12.0pt;
-	font-family:"Calibri","sans-serif";}
-p.msopapdefault, li.msopapdefault, div.msopapdefault
-	{mso-style-name:msopapdefault;
-	margin-right:0in;
-	margin-bottom:10.0pt;
-	margin-left:0in;
-	line-height:115%;
-	font-size:12.0pt;
-	font-family:"Times New Roman","serif";}
-span.apple-converted-space
-	{mso-style-name:apple-converted-space;}
-.MsoChpDefault
-	{font-size:10.0pt;
-	font-family:"Calibri","sans-serif";}
-.MsoPapDefault
-	{margin-bottom:10.0pt;
-	line-height:115%;}
-@page WordSection1
-	{size:8.5in 11.0in;
-	margin:1.0in 1.0in 1.0in 1.0in;}
-div.WordSection1
-	{page:WordSection1;}
--->
-</style>
-
-</head>
-
-<body lang=EN-US link=blue vlink=purple>
-
-<div class=WordSection1>
-
-<p class=MsoNormalCxSpFirst style='text-align:justify'><span style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs and
-tables give insight into the targeting and patterns of SHM. This can give
-insight into the DNA repair pathways used to solve the U:G mismatches
-introduced by AID. More information on the values found in healthy individuals
-of different ages can be found in IJspeert and van Schouwenburg et al, PMID:
-27799928.</span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs
-</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK93"></a><a
-name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style='font-size:12.0pt;
-line-height:115%;font-family:"Times New Roman","serif"'>Heatmap transition
-information</span></u></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK98"></a><a
-name="OLE_LINK97"><span style='font-size:12.0pt;line-height:115%;font-family:
-"Times New Roman","serif"'>Heatmaps visualizing for each subclass the frequency
-of all possible substitutions. On the x-axes the original base is shown, while
-the y-axes shows the new base. The darker the shade of blue, the more frequent
-this type of substitution is occurring.  </span></a></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bargraph
-transition information</span></u></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bar graph
-visualizing for each original base the distribution of substitutions into the other
-bases. A graph is included for each (sub)class. </span></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Tables</span></b></p>
-
-<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size:
-12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transition
-tables are shown for each (sub)class. All the original bases are listed
-horizontally, while the new bases are listed vertically. </span></p>
-
-<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%;
-font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van
-Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen,
-Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span
-style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation
-of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and
-Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a
-href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span
-style='color:windowtext'>Link</span></a>]</span></p>
-
-</div>
-
-</body>
-
-</html>
Binary file style.tar.gz has changed
Binary file subclass_definition.db.nhr has changed
Binary file subclass_definition.db.nin has changed
Binary file subclass_definition.db.nsq has changed
--- a/summary_to_fasta.py	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
-parser.add_argument("--fasta", help="The output fasta file")
-
-args = parser.parse_args()
-
-infile = args.input
-fasta = args.fasta
-
-with open(infile, 'r') as i, open(fasta, 'w') as o:
-	first = True
-	id_col = 0
-	seq_col = 0
-	no_results = 0
-	no_seqs = 0
-	passed = 0
-	for line in i:
-		splt = line.split("\t")
-		if first:
-			id_col = splt.index("Sequence ID")
-			seq_col = splt.index("Sequence")
-			first = False
-			continue
-		if len(splt) < 5:
-			no_results += 1
-			continue
-		
-		ID = splt[id_col]
-		seq = splt[seq_col]
-		
-		if not len(seq) > 0:
-			no_seqs += 1
-			continue
-		
-		o.write(">" + ID + "\n" + seq + "\n")
-		passed += 1
-			
-	print "No results:", no_results
-	print "No sequences:", no_seqs
-	print "Written to fasta file:", passed
--- a/wrapper.sh	Tue Sep 01 16:03:44 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,913 +0,0 @@
-#!/bin/bash
-#set -e
-dir="$(cd "$(dirname "$0")" && pwd)"
-input=$1
-method=$2
-log=$3 #becomes the main html page at the end
-outdir=$4
-output="$outdir/index.html" #copied to $log location at the end
-title="$5"
-include_fr1=$6
-functionality=$7
-unique=$8
-naive_output=$9
-naive_output_ca=${10}
-naive_output_cg=${11}
-naive_output_cm=${12}
-naive_output_ce=${13}
-naive_output_all=${14}
-filter_unique=${15}
-filter_unique_count=${16}
-class_filter=${17}
-empty_region_filter=${18}
-fast=${19}
-
-mkdir $outdir
-
-tar -xzf $dir/style.tar.gz -C $outdir
-
-echo "---------------- read parameters ----------------"
-echo "---------------- read parameters ----------------<br />" > $log
-
-echo "unpacking IMGT file"
-
-type="`file $input`"
-if [[ "$type" == *"Zip archive"* ]] ; then
-	echo "Zip archive"
-	echo "unzip $input -d $PWD/files/"
-	unzip $input -d $PWD/files/
-elif [[ "$type" == *"XZ compressed data"* ]] ; then
-	echo "ZX archive"
-	echo "tar -xJf $input -C $PWD/files/"
-	mkdir -p "$PWD/files/$title"
-	tar -xJf $input -C "$PWD/files/$title"
-else
-	echo "Unrecognized format $type"
-	echo "Unrecognized format $type" > $log
-	exit 1
-fi
-
-cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt
-cat "`find $PWD/files/ -name "2_*"`" > $PWD/gapped_nt.txt
-cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt
-cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt
-cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt
-cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt
-cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt
-cat "`find $PWD/files/ -name "8_*"`" > $PWD/mutationstats.txt
-cat "`find $PWD/files/ -name "9_*"`" > $PWD/aa_change_stats.txt
-cat "`find $PWD/files/ -name "10_*"`" > $PWD/hotspots.txt
-
-echo "---------------- unique id check ----------------"
-
-Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/gapped_nt.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/aa_change_stats.txt $PWD/hotspots.txt
-
-if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
-	echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
-else
-	BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
-	echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
-fi
-
-echo "---------------- class identification ----------------"
-echo "---------------- class identification ----------------<br />" >> $log
-
-python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
-
-echo "---------------- merge_and_filter.r ----------------"
-echo "---------------- merge_and_filter.r ----------------<br />" >> $log
-
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1
-
-if [[ "${naive_output}" == "yes" ]] || [[ "$fast" == "no" ]] ; then
-
-	echo "---------------- creating new IMGT zips ----------------"
-	echo "---------------- creating new IMGT zips ----------------<br />" >> $log
-
-	mkdir $outdir/new_IMGT
-
-	cp $PWD/summary.txt "$outdir/new_IMGT/1_Summary.txt"
-	cp $PWD/gapped_nt.txt "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
-	cp $PWD/sequences.txt "$outdir/new_IMGT/3_Nt-sequences.txt"
-	cp $PWD/gapped_aa.txt "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
-	cp $PWD/aa.txt "$outdir/new_IMGT/5_AA-sequences.txt"
-	cp $PWD/junction.txt "$outdir/new_IMGT/6_Junction.txt"
-	cp $PWD/mutationanalysis.txt "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
-	cp $PWD/mutationstats.txt "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
-	cp $PWD/aa_change_stats.txt "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
-	cp $PWD/hotspots.txt "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
-
-	mkdir $outdir/new_IMGT_IGA
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA
-
-	mkdir $outdir/new_IMGT_IGA1
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1
-
-	mkdir $outdir/new_IMGT_IGA2
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2
-
-	mkdir $outdir/new_IMGT_IGG
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG
-
-	mkdir $outdir/new_IMGT_IGG1
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1
-
-	mkdir $outdir/new_IMGT_IGG2
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2
-
-	mkdir $outdir/new_IMGT_IGG3
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3
-
-	mkdir $outdir/new_IMGT_IGG4
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4
-
-	mkdir $outdir/new_IMGT_IGM
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM
-
-	mkdir $outdir/new_IMGT_IGE
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE
-
-	Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
-
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1
-
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1
-
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1
-
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1
-
-
-	tmp="$PWD"
-	cd $outdir/new_IMGT/ #tar weirdness...
-	tar -cJf ../new_IMGT.txz *
-
-	cd $outdir/new_IMGT_IGA/
-	tar -cJf ../new_IMGT_IGA.txz *
-
-	cd $outdir/new_IMGT_IGA1/
-	tar -cJf ../new_IMGT_IGA1.txz *
-
-	cd $outdir/new_IMGT_IGA2/
-	tar -cJf ../new_IMGT_IGA2.txz *
-
-	cd $outdir/new_IMGT_IGG/
-	tar -cJf ../new_IMGT_IGG.txz *
-
-	cd $outdir/new_IMGT_IGG1/
-	tar -cJf ../new_IMGT_IGG1.txz *
-
-	cd $outdir/new_IMGT_IGG2/
-	tar -cJf ../new_IMGT_IGG2.txz *
-
-	cd $outdir/new_IMGT_IGG3/
-	tar -cJf ../new_IMGT_IGG3.txz *
-
-	cd $outdir/new_IMGT_IGG4/
-	tar -cJf ../new_IMGT_IGG4.txz *
-
-	cd $outdir/new_IMGT_IGM/
-	tar -cJf ../new_IMGT_IGM.txz *
-
-	cd $outdir/new_IMGT_IGE/
-	tar -cJf ../new_IMGT_IGE.txz *
-
-	cd $tmp
-fi
-
-echo "---------------- shm_csr.r ----------------"
-echo "---------------- shm_csr.r ----------------<br />" >> $log
-
-classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched"
-echo "R mutation analysis"
-Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1
-
-echo "---------------- plot_pdfs.r ----------------"
-echo "---------------- plot_pdfs.r ----------------<br />" >> $log
-
-echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1"
-
-Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1
-
-echo "---------------- shm_csr.py ----------------"
-echo "---------------- shm_csr.py ----------------<br />" >> $log
-
-python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --empty_region_filter "${empty_region_filter}" --output $outdir/hotspot_analysis.txt
-
-echo "---------------- aa_histogram.r ----------------"
-echo "---------------- aa_histogram.r ----------------<br />" >> $log
-
-Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1
-if [ -e "$outdir/aa_histogram_.png" ]; then
-        mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
-        mv $outdir/aa_histogram_.pdf $outdir/aa_histogram.pdf
-        mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
-        mv $outdir/aa_histogram_absent_.txt $outdir/aa_histogram_absent.txt
-        mv $outdir/aa_histogram_count_.txt $outdir/aa_histogram_count.txt
-        mv $outdir/aa_histogram_sum_.txt $outdir/aa_histogram_sum.txt
-fi
-
-genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE)
-
-funcs=(sum mean median)
-funcs=(sum)
-
-echo "---------------- sequence_overview.r ----------------"
-echo "---------------- sequence_overview.r ----------------<br />" >> $log
-
-mkdir $outdir/sequence_overview
-
-Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt ${empty_region_filter} 2>&1
-
-echo "<table border='1'>" > $outdir/base_overview.html
-
-while IFS=$'\t' read ID class seq A C G T
-do
-	echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
-done < $outdir/sequence_overview/ntoverview.txt
-
-echo "<html><center><h1>$title</h1></center>" > $output
-echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
-echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
-echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
-echo "<script type='text/javascript' src='script.js'></script>" >> $output
-echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
-echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
-
-matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
-unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
-total_count=$((matched_count + unmatched_count))
-perc_count=$((unmatched_count / total_count * 100))
-perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
-perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
-
-echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
-echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
-echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
-
-echo "---------------- main tables ----------------"
-echo "---------------- main tables ----------------<br />" >> $log
-
-echo "<div class='tabber'>" >> $output
-echo "<div class='tabbertab' title='SHM Overview' style='width: 3000px;'>" >> $output
-
-for func in ${funcs[@]}
-do
-	
-	echo "---------------- $func table ----------------"
-	echo "---------------- $func table ----------------<br />" >> $log
-	
-	cat $outdir/mutations_${func}.txt $outdir/shm_overview_tandem_row.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
-	
-	echo "---------------- pattern_plots.r ----------------"
-	echo "---------------- pattern_plots.r ----------------<br />" >> $log
-
-	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1
-	
-	echo "<table class='pure-table pure-table-striped'>" >> $output
-	echo "<thead><tr><th>info</th>" >> $output
-	
-	if [ "${class_filter}" != "101_101" ] ; then
-	
-		for gene in ${genes[@]}
-		do
-			tmp=`cat $outdir/${gene}_${func}_n.txt`
-			echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
-		done
-		
-		tmp=`cat $outdir/all_${func}_n.txt`
-		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
-		tmp=`cat $outdir/unmatched_${func}_n.txt`
-		echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
-
-		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz 
-		do
-			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] || [ "$name" == "Tandems/Expected (ratio)" ] ; then #meh
-				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${cex}/${cey} (${cez})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output
-			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
-				echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${cez}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output
-			else
-				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${cex}/${cey} (${cez}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
-			fi
-		done < $outdir/data_${func}.txt
-		
-	else
-		tmp=`cat $outdir/all_${func}_n.txt`
-		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
-		
-		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz
-		do
-			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
-				echo "<tr><td>$name</td><td>${allx}/${ally}</td></tr>" >> $output
-			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
-				echo "<tr><td>$name</td><td>${allz}%</td></tr>" >> $output
-			else
-				echo "<tr><td>$name</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
-			fi
-		done < $outdir/data_${func}.txt
-		
-	fi
-	echo "</table>" >> $output
-	#echo "<a href='data_${func}.txt'>Download data</a>" >> $output
-done
-
-echo "<a href='aid_motives.pdf'><img src='aid_motives.png' /></a><br />" >> $output
-echo "<a href='relative_mutations.pdf'><img src='relative_mutations.png' /></a><br />" >> $output
-echo "<a href='absolute_mutations.pdf'><img src='absolute_mutations.png' /></a><br />" >> $output
-echo "<br />" >> $output
-cat $dir/shm_overview.htm >> $output
-echo "</div>" >> $output #SHM overview tab end
-
-echo "---------------- images ----------------"
-echo "---------------- images ----------------<br />" >> $log
-
-echo "<div class='tabbertab' title='SHM Frequency' style='width: 3000px;'></a>" >> $output
-
-if [ -a $outdir/scatter.png ]
-then
-	echo "<a href='scatter.pdf'><img src='scatter.png'/><br />" >> $output
-fi
-if [ -a $outdir/frequency_ranges.png ]
-then
-	echo "<a href='frequency_ranges.pdf'><img src='frequency_ranges.png'/></a><br />" >> $output
-fi
-
-echo "<br />" >> $output
-cat $dir/shm_frequency.htm >> $output
-
-echo "</div>" >> $output #SHM frequency tab end
-
-echo "<div class='tabbertab' title='Transition tables' style='width: 3000px;'>" >> $output
-
-echo "<table border='0'>" >> $output
-
-for gene in ${genes[@]}
-do
-	echo "<tr>" >> $output
-	echo "<td><h1>${gene}</h1></td>" >> $output
-	
-	if [ -e $outdir/transitions_heatmap_${gene}.png ]
-	then
-		echo "<td><a href='transitions_heatmap_${gene}.pdf'><img src='transitions_heatmap_${gene}.png' /></a></td>" >> $output
-	else
-		echo "<td></td>" >> $output
-	fi
-	
-	if [ -e $outdir/transitions_stacked_${gene}.png ]
-	then
-		echo "<td><a href='transitions_stacked_${gene}.pdf'><img src='transitions_stacked_${gene}.png' /></a></td>" >> $output
-	else
-		echo "<td></td>" >> $output
-	fi
-	
-	echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
-	echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
-	first="true"
-	while IFS=, read from a c g t
-		do
-			if [ "$first" == "true" ] ; then
-				echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
-				first="false"
-			else
-				echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
-			fi
-	done < $outdir/transitions_${gene}_sum.txt
-	echo "</table></td>" >> $output
-	
-	echo "</tr>" >> $output
-done
-
-echo "<tr>" >> $output
-echo "<td><h1>All</h1></td>" >> $output
-echo "<td><a href='transitions_heatmap_all.pdf'><img src='transitions_heatmap_all.png' /></a></td>" >> $output
-echo "<td><a href='transitions_stacked_all.pdf'><img src='transitions_stacked_all.png' /></a></td>" >> $output
-echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
-echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
-first="true"
-while IFS=, read from a c g t
-	do
-		if [ "$first" == "true" ] ; then
-			echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
-			first="false"
-		else
-			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
-		fi
-done < $outdir/transitions_all_sum.txt
-echo "</table></td>" >> $output
-
-echo "</tr>" >> $output
-
-echo "</table>" >> $output
-
-echo "<br />" >> $output
-cat $dir/shm_transition.htm >> $output
-
-echo "</div>" >> $output #transition tables tab end
-
-echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
-
-if [ -e $outdir/aa_histogram.png ]
-then
-	echo "<a href='aa_histogram.pdf'><img src='aa_histogram.png'/></a><br />" >> $output
-fi
-
-if [ -e $outdir/aa_histogram_IGA.png ]
-then
-	echo "<a href='aa_histogram_IGA.pdf'><img src='aa_histogram_IGA.png'/></a><br />" >> $output
-fi
-
-if [ -e $outdir/aa_histogram_IGG.png ]
-then
-	echo "<a href='aa_histogram_IGG.pdf'><img src='aa_histogram_IGG.png'/></a><br />" >> $output
-fi
-
-if [ -e $outdir/aa_histogram_IGM.png ]
-then
-	echo "<a href='aa_histogram_IGM.pdf'><img src='aa_histogram_IGM.png'/></a><br />" >> $output
-fi
-
-if [ -e $outdir/aa_histogram_IGE.png ]
-then
-	echo "<a href='aa_histogram_IGE.pdf'><img src='aa_histogram_IGE.png'/></a><br />" >> $output
-fi
-
-
-
-if [[ "$fast" == "no" ]] ; then
-
-    
-
-	echo "---------------- baseline ----------------"
-	echo "---------------- baseline ----------------<br />" >> $log
-	tmp="$PWD"
-
-	mkdir $outdir/baseline
-	
-	echo "<center><h1>BASELINe</h1>" >> $output
-	header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)"
-	
-	baseline_boundaries="27:27:38:55:65:104:-"
-	
-	if [[ "${empty_region_filter}" == "leader" ]] ; then
-		baseline_boundaries="1:26:38:55:65:104:-"
-		header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)"
-	fi
-	
-	echo "<p>${header_substring}</p></center>" >> $output
-
-	mkdir $outdir/baseline/IGA_IGG_IGM
-	if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
-		cd $outdir/baseline/IGA_IGG_IGM
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
-	else
-		echo "No sequences" > "$outdir/baseline.txt"
-	fi
-
-	mkdir $outdir/baseline/IGA
-	if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
-		cd $outdir/baseline/IGA
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
-	else
-		echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
-	fi
-
-	mkdir $outdir/baseline/IGG
-	if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
-		cd $outdir/baseline/IGG
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
-	else
-		echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
-	fi
-
-	mkdir $outdir/baseline/IGM
-	if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
-		cd $outdir/baseline/IGM
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
-	else
-		echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
-	fi
-
-	mkdir $outdir/baseline/IGE
-	if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then
-		cd $outdir/baseline/IGE
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt"
-	else
-		echo "No IGE sequences" > "$outdir/baseline_IGE.txt"
-	fi
-
-	cd $tmp
-
-	echo "Cleaning up *.RData files"
-	find $outdir/baseline -name "*.RData" -type f -delete
-	
-	if [ -e $outdir/baseline.pdf ]
-	then
-		echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
-	fi
-
-	if [ -e $outdir/baseline_IGA.pdf ]
-	then
-		echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
-	fi
-
-	if [ -e $outdir/baseline_IGG.pdf ]
-	then
-		echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
-	fi
-
-	if [ -e $outdir/baseline_IGM.pdf ]
-	then
-		echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
-	fi
-
-	if [ -e $outdir/baseline_IGE.pdf ]
-	then
-		echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output
-	fi
-fi
-
-echo "<br />" >> $output
-cat $dir/shm_selection.htm >> $output
-
-echo "</div>" >> $output #antigen selection tab end
-
-echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
-
-if [ -e $outdir/IGA.png ] 
-then
-	echo "<a href='IGA.pdf'><img src='IGA.png'/></a><br />" >> $output
-fi
-if [ -e $outdir/IGG.png ]
-then
-	echo "<a href='IGG.pdf'><img src='IGG.png'/></a><br />" >> $output
-fi
-
-echo "<br />" >> $output
-cat $dir/shm_csr.htm >> $output
-
-echo "</div>" >> $output #CSR tab end
-
-if [[ "$fast" == "no" ]] ; then
-
-	echo "---------------- change-o MakeDB ----------------"
-
-	mkdir $outdir/change_o
-
-	tmp="$PWD"
-
-	cd $outdir/change_o
-
-	bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
-	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
-	Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1
-	
-	mkdir $outdir/new_IMGT_changeo
-	cp $outdir/new_IMGT/* $outdir/new_IMGT_changeo
-	
-	Rscript $dir/new_imgt.r $outdir/new_IMGT_changeo $outdir/change_o/change-o-db-defined_first_clones.txt "-" 2>&1
-	
-	cd $outdir/new_IMGT_changeo
-	tar -cJf ../new_IMGT_first_seq_of_clone.txz *
-	cd $outdir/change_o
-	
-	rm -rf $outdir/new_IMGT_changeo
-	
-	Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
-	echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
-	
-	if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
-		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1
-		
-		mkdir $outdir/new_IMGT_IGA_changeo
-		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA_changeo
-		
-		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA_changeo $outdir/change_o/change-o-db-defined_first_clones-IGA.txt "-" 2>&1
-		
-		cd $outdir/new_IMGT_IGA_changeo
-		tar -cJf ../new_IMGT_IGA_first_seq_of_clone.txz *
-		
-		rm -rf $outdir/new_IMGT_IGA_changeo
-		
-		cd $outdir/change_o
-	else
-		echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"
-		echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
-	fi
-	
-	if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
-		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1
-		
-		mkdir $outdir/new_IMGT_IGG_changeo
-		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG_changeo
-		
-		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG_changeo $outdir/change_o/change-o-db-defined_first_clones-IGG.txt "-" 2>&1
-		
-		cd $outdir/new_IMGT_IGG_changeo
-		tar -cJf ../new_IMGT_IGG_first_seq_of_clone.txz *
-		rm -rf $outdir/new_IMGT_IGG_changeo
-		
-		cd $outdir/change_o
-	else
-		echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"
-		echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
-	fi
-
-	if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
-		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-db-defined_first_clones-IGM.txt 2>&1
-		
-		mkdir $outdir/new_IMGT_IGM_changeo
-		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM_changeo
-		
-		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM_changeo $outdir/change_o/change-o-db-defined_first_clones-IGM.txt "-" 2>&1
-		
-		cd $outdir/new_IMGT_IGM_changeo
-		tar -cJf ../new_IMGT_IGM_first_seq_of_clone.txz *
-		
-		rm -rf $outdir/new_IMGT_IGM_changeo
-		
-		cd $outdir/change_o
-	else
-		echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt"
-		echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
-	fi
-
-	if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then
-		bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt
-		bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGE.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-defined_clones-summary-IGE.txt
-		Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-db-defined_first_clones-IGE.txt 2>&1
-		
-		mkdir $outdir/new_IMGT_IGE_changeo
-		cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE_changeo
-		
-		Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE_changeo $outdir/change_o/change-o-db-defined_first_clones-IGE.txt "-" 2>&1
-		
-		cd $outdir/new_IMGT_IGE_changeo
-		tar -cJf ../new_IMGT_IGE_first_seq_of_clone.txz *
-		
-		rm -rf $outdir/new_IMGT_IGE_changeo
-		
-		cd $outdir/change_o
-	else
-		echo "No IGE sequences" > "$outdir/change_o/change-o-db-defined_clones-IGE.txt"
-		echo "No IGE sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGE.txt"
-	fi
-
-	cd "$tmp"
-	
-	rm -rf $outdir/new_IMGT
-	rm -rf $outdir/new_IMGT_IGA/
-	rm -rf $outdir/new_IMGT_IGA1/
-	rm -rf $outdir/new_IMGT_IGA2/
-	rm -rf $outdir/new_IMGT_IGG/
-	rm -rf $outdir/new_IMGT_IGG1/
-	rm -rf $outdir/new_IMGT_IGG2/
-	rm -rf $outdir/new_IMGT_IGG3/
-	rm -rf $outdir/new_IMGT_IGG4/
-	rm -rf $outdir/new_IMGT_IGM/
-	rm -rf $outdir/new_IMGT_IGE/
-
-	echo "<div class='tabbertab' title='Clonal Relation' style='width: 7000px;'>" >> $output #clonality tab
-
-	function clonality_table {
-		local infile=$1
-		local outfile=$2
-		
-		echo "<table class='pure-table pure-table-striped'>" >> $outfile
-		echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile
-		
-		first='true'
-		
-		while read size clones seqs
-		do
-			if [[ "$first" == "true" ]]; then
-				first="false"
-				continue
-			fi
-			echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile
-		done < $infile
-		
-		echo "</table>" >> $outfile
-	}
-	echo "<div class='tabber'>" >> $output
-
-	echo "<div class='tabbertab' title='All'>" >> $output
-	clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
-	echo "</div>" >> $output
-
-	echo "<div class='tabbertab' title='IGA'>" >> $output
-	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output
-	echo "</div>" >> $output
-
-	echo "<div class='tabbertab' title='IGG'>" >> $output
-	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output
-	echo "</div>" >> $output
-
-	echo "<div class='tabbertab' title='IGM'>" >> $output
-	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
-	echo "</div>" >> $output
-
-	echo "<div class='tabbertab' title='IGE'>" >> $output
-	clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
-	echo "</div>" >> $output
-
-	echo "<div class='tabbertab' title='Overlap' style='width: 7000px;'>" >> $output
-	cat "$outdir/sequence_overview/index.html" | sed -e 's:</td>:</td>\n:g' | sed "s:href='\(.*\).html:href='sequence_overview/\1.html:g" >> $output # rewrite href to 'sequence_overview/..."
-	echo "</div>" >> $output
-	
-	echo "</div>" >> $output #clonality tabber end
-	
-	echo "<br />" >> $output
-	cat $dir/shm_clonality.htm >> $output
-	
-	echo "</div>" >> $output #clonality tab end
-
-fi
-
-echo "<div class='tabbertab' title='Downloads'>" >> $output
-
-echo "<table class='pure-table pure-table-striped'>" >> $output
-echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
-echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Overview</td></tr>" >> $output
-echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='shm_overview.txt' download='shm_overview.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the percentage of mutations in AID and pol eta motives plot</td><td><a href='aid_motives.txt' download='aid_motives.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the relative mutation patterns plot</td><td><a href='relative_mutations.txt' download='relative_mutations.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the absolute mutation patterns plot</td><td><a href='absolute_mutations.txt' download='absolute_mutations.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Data about tandem mutations by ID</td><td><a href='tandems_by_id.txt' download='tandems_by_id.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Frequency</td></tr>" >> $output
-echo "<tr><td>The data  generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Transition Tables</td></tr>" >> $output
-echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_IGA_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transitions_IGG3_sum.txt' download='transitions_IGG3_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transitions_IGG4_sum.txt' download='transitions_IGG4_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transitions_IGM_sum.txt' download='transitions_IGM_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGE' transition plot</td><td><a href='transitions_IGE_sum.txt' download='transitions_IGE_sum.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output
-echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Presence of AA per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td>The data used to generate the aa mutation frequency plot</td><td><a href='aa_histogram_sum.txt' download='aa_histogram_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the aa mutation frequency plot for IGA</td><td><a href='aa_histogram_sum_IGA.txt' download='aa_histogram_sum_IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the aa mutation frequency plot for IGG</td><td><a href='aa_histogram_sum_IGG.txt' download='aa_histogram_sum_IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the aa mutation frequency plot for IGM</td><td><a href='aa_histogram_sum_IGM.txt' download='aa_histogram_sum_IGM.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the aa mutation frequency plot for IGE</td><td><a href='aa_histogram_sum_IGE.txt' download='aa_histogram_sum_IGE.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGE PDF</td><td><a href='baseline_IGE.pdf' download='baseline_IGE.pdf' >Download</a></td></tr>" >> $output
-echo "<tr><td>Baseline IGE data</td><td><a href='baseline_IGE.txt' download='baseline_IGE.txt' >Download</a></td></tr>" >> $output
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>CSR</td></tr>" >> $output
-echo "<tr><td>The data for the IGA subclass distribution plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the IGG subclass distribution plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output
-
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonal Relation</td></tr>" >> $output
-echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href='new_IMGT_first_seq_of_clone.txz' download='new_IMGT_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
-
-echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href='new_IMGT_IGA_first_seq_of_clone.txz' download='new_IMGT_IGA_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
-
-echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href='new_IMGT_IGG_first_seq_of_clone.txz' download='new_IMGT_IGG_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
-
-echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href='new_IMGT_IGM_first_seq_of_clone.txz' download='new_IMGT_IGM_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
-
-echo "<tr><td>The Change-O DB file with defined clones of IGE</td><td><a href='change_o/change-o-db-defined_clones-IGE.txt' download='change_o/change-o-db-defined_clones-IGE.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGE</td><td><a href='change_o/change-o-defined_clones-summary-IGE.txt' download='change_o/change-o-defined_clones-summary-IGE.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href='new_IMGT_IGE_first_seq_of_clone.txz' download='new_IMGT_IGE_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output
-
-echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Filtered IMGT output files</td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output
-echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href='new_IMGT_IGE.txz' download='new_IMGT_IGE.txz' >Download</a></td></tr>" >> $output
-
-echo "</table>" >> $output
-
-echo "<br />" >> $output
-cat $dir/shm_downloads.htm >> $output
-
-echo "</div>" >> $output #downloads tab end
-
-echo "</div>" >> $output #tabs end 
-
-echo "</html>" >> $output
-
-
-echo "---------------- naive_output.r ----------------"
-echo "---------------- naive_output.r ----------------<br />" >> $log
-
-if [[ "$naive_output" == "yes" ]]
-then
-	echo "output naive output"
-	if [[ "${class_filter}" == "101_101" ]]
-	then
-		echo "copy new_IMGT.txz to ${naive_output_all}"
-		cp $outdir/new_IMGT.txz ${naive_output_all}
-	else
-		echo "copy for classes"
-		cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
-		cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
-		cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
-		cp $outdir/new_IMGT_IGE.txz ${naive_output_ce}
-	fi
-fi
-
-echo "</table>" >> $outdir/base_overview.html
-
-mv $log $outdir/log.html
-
-echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
-echo "<table border = 1>" >> $log
-echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
-tIFS="$TMP"
-IFS=$'\t'
-while read step seq perc
-	do
-		echo "<tr>" >> $log
-		echo "<td>$step</td>" >> $log
-		echo "<td>$seq</td>" >> $log
-		echo "<td>${perc}%</td>" >> $log
-		echo "</tr>" >> $log
-done < $outdir/filtering_steps.txt
-echo "</table>" >> $log
-echo "<br />" >> $log
-cat $dir/shm_first.htm >> $log
-echo "</center></html>" >> $log
-
-IFS="$tIFS"
-
-
-echo "---------------- Done! ----------------"
-echo "---------------- Done! ----------------<br />" >> $outdir/log.html
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-