Mercurial > repos > davidvanzessen > shm_csr
changeset 80:a4617f1d1d89 draft
Uploaded
line wrap: on
line diff
--- a/.gitattributes Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -# Auto detect text files and perform LF normalization -* text=auto
--- a/.gitignore Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - -shm_csr\.tar\.gz - -\.vscode/settings\.json
--- a/LICENSE Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2019 david - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file
--- a/README.md Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -# SHM CSR - -Somatic hypermutation and class switch recombination pipeline. -The docker version can be found [here](https://github.com/ErasmusMC-Bioinformatics/ARGalaxy-docker). - -# Dependencies --------------------- -[Python 2.7](https://www.python.org/) -[Change-O](https://changeo.readthedocs.io/en/version-0.4.4/) -[Baseline](http://selection.med.yale.edu/baseline/) -[R data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf) -[R ggplot2](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf) -[R reshape2](https://cran.r-project.org/web/packages/reshape/reshape.pdf)
--- a/aa_histogram.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -library(ggplot2) - -args <- commandArgs(trailingOnly = TRUE) - -mutations.by.id.file = args[1] -absent.aa.by.id.file = args[2] -genes = strsplit(args[3], ",")[[1]] -genes = c(genes, "") -outdir = args[4] - - -print("---------------- read input ----------------") - -mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") -absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") - -for(gene in genes){ - graph.title = paste(gene, "AA mutation frequency") - if(gene == ""){ - mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] - absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] - - graph.title = "AA mutation frequency all" - } else { - mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] - absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] - } - print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) - if(nrow(mutations.by.id.gene) == 0){ - next - } - - mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) - aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) - - dat_freq = mutations.at.position / aa.at.position - dat_freq[is.na(dat_freq)] = 0 - dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) - - - print("---------------- plot ----------------") - - m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black")) - m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) - m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") - m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") - m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") - m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") - m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") - m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) - m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) - #m = m + scale_colour_manual(values=c("black")) - - print("---------------- write/print ----------------") - - - dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) - - write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - - png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) - print(m) - dev.off() - - ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7) -}
--- a/baseline/Baseline_Functions.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2287 +0,0 @@ -######################################################################################### -# License Agreement -# -# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE -# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER -# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE -# OR COPYRIGHT LAW IS PROHIBITED. -# -# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE -# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED -# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN -# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. -# -# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences -# Coded by: Mohamed Uduman & Gur Yaari -# Copyright 2012 Kleinstein Lab -# Version: 1.3 (01/23/2014) -######################################################################################### - -# Global variables - - FILTER_BY_MUTATIONS = 1000 - - # Nucleotides - NUCLEOTIDES = c("A","C","G","T") - - # Amino Acids - AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G") - names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG") - names(AMINO_ACIDS) <- names(AMINO_ACIDS) - - #Amino Acid Traits - #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y" - #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface") - TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N") - names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS)) - TRAITS_AMINO_ACIDS <- array(NA,21) - - # Codon Table - CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12)) - - # Substitution Model: Smith DS et al. 1996 - substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) - substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) - substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) - load("FiveS_Substitution.RData") - - # Mutability Models: Shapiro GS et al. 2002 - triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T) - triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T) - triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT") - load("FiveS_Mutability.RData") - -# Functions - - # Translate codon to amino acid - translateCodonToAminoAcid<-function(Codon){ - return(AMINO_ACIDS[Codon]) - } - - # Translate amino acid to trait change - translateAminoAcidToTraitChange<-function(AminoAcid){ - return(TRAITS_AMINO_ACIDS[AminoAcid]) - } - - # Initialize Amino Acid Trait Changes - initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){ - if(!is.null(traitChangeFileName)){ - tryCatch( - traitChange <- read.delim(traitChangeFileName,sep="\t",header=T) - , error = function(ex){ - cat("Error|Error reading trait changes. Please check file name/path and format.\n") - q() - } - ) - }else{ - traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98 - } - TRAITS_AMINO_ACIDS <<- traitChange - } - - # Read in formatted nucleotide substitution matrix - initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){ - if(!is.null(subsMatFileName)){ - tryCatch( - subsMat <- read.delim(subsMatFileName,sep="\t",header=T) - , error = function(ex){ - cat("Error|Error reading substitution matrix. Please check file name/path and format.\n") - q() - } - ) - if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x))) - }else{ - if(substitutionModel==1)subsMat <- substitution_Literature_Mouse - if(substitutionModel==2)subsMat <- substitution_Flu_Human - if(substitutionModel==3)subsMat <- substitution_Flu25_Human - - } - - if(substitutionModel==0){ - subsMat <- matrix(1,4,4) - subsMat[,] = 1/3 - subsMat[1,1] = 0 - subsMat[2,2] = 0 - subsMat[3,3] = 0 - subsMat[4,4] = 0 - } - - - NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-") - if(substitutionModel==5){ - subsMat <- FiveS_Substitution - return(subsMat) - }else{ - subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4)) - return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) ) - } - } - - - # Read in formatted Mutability file - initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){ - if(!is.null(mutabilityMatFileName)){ - tryCatch( - mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T) - , error = function(ex){ - cat("Error|Error reading mutability matrix. Please check file name/path and format.\n") - q() - } - ) - }else{ - mutabilityMat <- triMutability_Literature_Human - if(species==2) mutabilityMat <- triMutability_Literature_Mouse - } - - if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)} - - if(mutabilityModel==5){ - mutabilityMat <- FiveS_Mutability - return(mutabilityMat) - }else{ - return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) ) - } - } - - # Read FASTA file formats - # Modified from read.fasta from the seqinR package - baseline.read.fasta <- - function (file = system.file("sequences/sample.fasta", package = "seqinr"), - seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, - set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, - strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong, - endian = .Platform$endian, apply.mask = TRUE) - { - seqtype <- match.arg(seqtype) - - lines <- readLines(file) - - if (legacy.mode) { - comments <- grep("^;", lines) - if (length(comments) > 0) - lines <- lines[-comments] - } - - - ind_groups<-which(substr(lines, 1L, 3L) == ">>>") - lines_mod<-lines - - if(!length(ind_groups)){ - lines_mod<-c(">>>All sequences combined",lines) - } - - ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>") - - lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod))) - id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1 - lines[id] <- "THIS IS A FAKE SEQUENCE" - lines[-id] <- lines_mod - rm(lines_mod) - - ind <- which(substr(lines, 1L, 1L) == ">") - nseq <- length(ind) - if (nseq == 0) { - stop("no line starting with a > character found") - } - start <- ind + 1 - end <- ind - 1 - - while( any(which(ind%in%end)) ){ - ind=ind[-which(ind%in%end)] - nseq <- length(ind) - if (nseq == 0) { - stop("no line starting with a > character found") - } - start <- ind + 1 - end <- ind - 1 - } - - end <- c(end[-1], length(lines)) - sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = "")) - if (seqonly) - return(sequences) - nomseq <- lapply(seq_len(nseq), function(i) { - - #firstword <- strsplit(lines[ind[i]], " ")[[1]][1] - substr(lines[ind[i]], 2, nchar(lines[ind[i]])) - - }) - if (seqtype == "DNA") { - if (forceDNAtolower) { - sequences <- as.list(tolower(chartr(".","-",sequences))) - }else{ - sequences <- as.list(toupper(chartr(".","-",sequences))) - } - } - if (as.string == FALSE) - sequences <- lapply(sequences, s2c) - if (set.attributes) { - for (i in seq_len(nseq)) { - Annot <- lines[ind[i]] - if (strip.desc) - Annot <- substr(Annot, 2L, nchar(Annot)) - attributes(sequences[[i]]) <- list(name = nomseq[[i]], - Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", - DNA = "SeqFastadna")) - } - } - names(sequences) <- nomseq - return(sequences) - } - - - # Replaces non FASTA characters in input files with N - replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){ - gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE) - } - - # Find the germlines in the FASTA list - germlinesInFile <- function(seqIDs){ - firstChar = sapply(seqIDs,function(x){substr(x,1,1)}) - secondChar = sapply(seqIDs,function(x){substr(x,2,2)}) - return(firstChar==">" & secondChar!=">") - } - - # Find the groups in the FASTA list - groupsInFile <- function(seqIDs){ - sapply(seqIDs,function(x){substr(x,1,2)})==">>" - } - - # In the process of finding germlines/groups, expand from the start to end of the group - expandTillNext <- function(vecPosToID){ - IDs = names(vecPosToID) - posOfInterests = which(vecPosToID) - - expandedID = rep(NA,length(IDs)) - expandedIDNames = gsub(">","",IDs[posOfInterests]) - startIndexes = c(1,posOfInterests[-1]) - stopIndexes = c(posOfInterests[-1]-1,length(IDs)) - expandedID = unlist(sapply(1:length(startIndexes),function(i){ - rep(i,stopIndexes[i]-startIndexes[i]+1) - })) - names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){ - rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1) - })) - return(expandedID) - } - - # Process FASTA (list) to return a matrix[input, germline) - processInputAdvanced <- function(inputFASTA){ - - seqIDs = names(inputFASTA) - numbSeqs = length(seqIDs) - posGermlines1 = germlinesInFile(seqIDs) - numbGermlines = sum(posGermlines1) - posGroups1 = groupsInFile(seqIDs) - numbGroups = sum(posGroups1) - consDef = NA - - if(numbGermlines==0){ - posGermlines = 2 - numbGermlines = 1 - } - - glPositionsSum = cumsum(posGermlines1) - glPositions = table(glPositionsSum) - #Find the position of the conservation row - consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1 - if( length(consDefPos)> 0 ){ - consDefID = match(consDefPos, glPositionsSum) - #The coservation rows need to be pulled out and stores seperately - consDef = inputFASTA[consDefID] - inputFASTA = inputFASTA[-consDefID] - - seqIDs = names(inputFASTA) - numbSeqs = length(seqIDs) - posGermlines1 = germlinesInFile(seqIDs) - numbGermlines = sum(posGermlines1) - posGroups1 = groupsInFile(seqIDs) - numbGroups = sum(posGroups1) - if(numbGermlines==0){ - posGermlines = 2 - numbGermlines = 1 - } - } - - posGroups <- expandTillNext(posGroups1) - posGermlines <- expandTillNext(posGermlines1) - posGermlines[posGroups1] = 0 - names(posGermlines)[posGroups1] = names(posGroups)[posGroups1] - posInput = rep(TRUE,numbSeqs) - posInput[posGroups1 | posGermlines1] = FALSE - - matInput = matrix(NA, nrow=sum(posInput), ncol=2) - rownames(matInput) = seqIDs[posInput] - colnames(matInput) = c("Input","Germline") - - vecInputFASTA = unlist(inputFASTA) - matInput[,1] = vecInputFASTA[posInput] - matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ] - - germlines = posGermlines[posInput] - groups = posGroups[posInput] - - return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef )) - } - - - # Replace leading and trailing dashes in the sequence - replaceLeadingTrailingDashes <- function(x,readEnd){ - iiGap = unlist(gregexpr("-",x[1])) - ggGap = unlist(gregexpr("-",x[2])) - #posToChange = intersect(iiGap,ggGap) - - - seqIn = replaceLeadingTrailingDashesHelper(x[1]) - seqGL = replaceLeadingTrailingDashesHelper(x[2]) - seqTemplate = rep('N',readEnd) - seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd]) - seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd]) -# if(posToChange!=-1){ -# seqIn[posToChange] = "-" -# seqGL[posToChange] = "-" -# } - - seqIn = c2s(seqIn[1:readEnd]) - seqGL = c2s(seqGL[1:readEnd]) - - lenGL = nchar(seqGL) - if(lenGL<readEnd){ - seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="") - } - - lenInput = nchar(seqIn) - if(lenInput<readEnd){ - seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="") - } - return( c(seqIn,seqGL) ) - } - - replaceLeadingTrailingDashesHelper <- function(x){ - grepResults = gregexpr("-*",x) - grepResultsPos = unlist(grepResults) - grepResultsLen = attr(grepResults[[1]],"match.length") - #print(paste("x = '", x, "'", sep="")) - x = s2c(x) - if(x[1]=="-"){ - x[1:grepResultsLen[1]] = "N" - } - if(x[length(x)]=="-"){ - x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N" - } - return(x) - } - - - - - # Check sequences for indels - checkForInDels <- function(matInputP){ - insPos <- checkInsertion(matInputP) - delPos <- checkDeletions(matInputP) - return(list("Insertions"=insPos, "Deletions"=delPos)) - } - - # Check sequences for insertions - checkInsertion <- function(matInputP){ - insertionCheck = apply( matInputP,1, function(x){ - inputGaps <- as.vector( gregexpr("-",x[1])[[1]] ) - glGaps <- as.vector( gregexpr("-",x[2])[[1]] ) - return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) ) - }) - return(as.vector(insertionCheck)) - } - # Fix inserstions - fixInsertions <- function(matInputP){ - insPos <- checkInsertion(matInputP) - sapply((1:nrow(matInputP))[insPos],function(rowIndex){ - x <- matInputP[rowIndex,] - inputGaps <- gregexpr("-",x[1])[[1]] - glGaps <- gregexpr("-",x[2])[[1]] - posInsertions <- glGaps[!(glGaps%in%inputGaps)] - inputInsertionToN <- s2c(x[2]) - inputInsertionToN[posInsertions]!="-" - inputInsertionToN[posInsertions] <- "N" - inputInsertionToN <- c2s(inputInsertionToN) - matInput[rowIndex,2] <<- inputInsertionToN - }) - return(insPos) - } - - # Check sequences for deletions - checkDeletions <-function(matInputP){ - deletionCheck = apply( matInputP,1, function(x){ - inputGaps <- as.vector( gregexpr("-",x[1])[[1]] ) - glGaps <- as.vector( gregexpr("-",x[2])[[1]] ) - return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) ) - }) - return(as.vector(deletionCheck)) - } - # Fix sequences with deletions - fixDeletions <- function(matInputP){ - delPos <- checkDeletions(matInputP) - sapply((1:nrow(matInputP))[delPos],function(rowIndex){ - x <- matInputP[rowIndex,] - inputGaps <- gregexpr("-",x[1])[[1]] - glGaps <- gregexpr("-",x[2])[[1]] - posDeletions <- inputGaps[!(inputGaps%in%glGaps)] - inputDeletionToN <- s2c(x[1]) - inputDeletionToN[posDeletions] <- "N" - inputDeletionToN <- c2s(inputDeletionToN) - matInput[rowIndex,1] <<- inputDeletionToN - }) - return(delPos) - } - - - # Trim DNA sequence to the last codon - trimToLastCodon <- function(seqToTrim){ - seqLen = nchar(seqToTrim) - trimmedSeq = s2c(seqToTrim) - poi = seqLen - tailLen = 0 - - while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){ - tailLen = tailLen + 1 - poi = poi - 1 - } - - trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)]) - seqLen = nchar(trimmedSeq) - # Trim sequence to last codon - if( getCodonPos(seqLen)[3] > seqLen ) - trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) ) - - return(trimmedSeq) - } - - # Given a nuclotide position, returns the pos of the 3 nucs that made the codon - # e.g. nuc 86 is part of nucs 85,86,87 - getCodonPos <- function(nucPos){ - codonNum = (ceiling(nucPos/3))*3 - return( (codonNum-2):codonNum) - } - - # Given a nuclotide position, returns the codon number - # e.g. nuc 86 = codon 29 - getCodonNumb <- function(nucPos){ - return( ceiling(nucPos/3) ) - } - - # Given a codon, returns all the nuc positions that make the codon - getCodonNucs <- function(codonNumb){ - getCodonPos(codonNumb*3) - } - - computeCodonTable <- function(testID=1){ - - if(testID<=4){ - # Pre-compute every codons - intCounter = 1 - for(pOne in NUCLEOTIDES){ - for(pTwo in NUCLEOTIDES){ - for(pThree in NUCLEOTIDES){ - codon = paste(pOne,pTwo,pThree,sep="") - colnames(CODON_TABLE)[intCounter] = codon - intCounter = intCounter + 1 - CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12))) - } - } - } - chars = c("N","A","C","G","T", "-") - for(a in chars){ - for(b in chars){ - for(c in chars){ - if(a=="N" | b=="N" | c=="N"){ - #cat(paste(a,b,c),sep="","\n") - CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) - } - } - } - } - - chars = c("-","A","C","G","T") - for(a in chars){ - for(b in chars){ - for(c in chars){ - if(a=="-" | b=="-" | c=="-"){ - #cat(paste(a,b,c),sep="","\n") - CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) - } - } - } - } - CODON_TABLE <<- as.matrix(CODON_TABLE) - } - } - - collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){ - #print(length(vecInputSeqs)) - vecInputSeqs = unique(vecInputSeqs) - if(length(vecInputSeqs)==1){ - return( list( c(vecInputSeqs,glSeq), F) ) - }else{ - charInputSeqs <- sapply(vecInputSeqs, function(x){ - s2c(x)[1:readEnd] - }) - charGLSeq <- s2c(glSeq) - matClone <- sapply(1:readEnd, function(i){ - posNucs = unique(charInputSeqs[i,]) - posGL = charGLSeq[i] - error = FALSE - if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){ - return(c("-",error)) - } - if(length(posNucs)==1) - return(c(posNucs[1],error)) - else{ - if("N"%in%posNucs){ - error=TRUE - } - if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){ - return( c(posGL,error) ) - }else{ - #return( c(sample(posNucs[posNucs!="N"],1),error) ) - if(nonTerminalOnly==0){ - return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) ) - }else{ - posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL] - posNucsTable = table(posNucs) - if(sum(posNucsTable>1)==0){ - return( c(posGL,error) ) - }else{ - return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) ) - } - } - - } - } - }) - - - #print(length(vecInputSeqs)) - return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,])) - } - } - - # Compute the expected for each sequence-germline pair - getExpectedIndividual <- function(matInput){ - if( any(grep("multicore",search())) ){ - facGL <- factor(matInput[,2]) - facLevels = levels(facGL) - LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ - computeMutabilities(facLevels[x]) - }) - facIndex = match(facGL,facLevels) - - LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ - cInput = rep(NA,nchar(matInput[x,1])) - cInput[s2c(matInput[x,1])!="N"] = 1 - LisGLs_MutabilityU[[facIndex[x]]] * cInput - }) - - LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ - computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) - }) - - LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ - #print(x) - computeMutationTypes(matInput[x,2]) - }) - - LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){ - computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) - }) - - ul_LisGLs_Exp = unlist(LisGLs_Exp) - return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) - }else{ - facGL <- factor(matInput[,2]) - facLevels = levels(facGL) - LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ - computeMutabilities(facLevels[x]) - }) - facIndex = match(facGL,facLevels) - - LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ - cInput = rep(NA,nchar(matInput[x,1])) - cInput[s2c(matInput[x,1])!="N"] = 1 - LisGLs_MutabilityU[[facIndex[x]]] * cInput - }) - - LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ - computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) - }) - - LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ - #print(x) - computeMutationTypes(matInput[x,2]) - }) - - LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){ - computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) - }) - - ul_LisGLs_Exp = unlist(LisGLs_Exp) - return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) - - } - } - - # Compute mutabilities of sequence based on the tri-nucleotide model - computeMutabilities <- function(paramSeq){ - seqLen = nchar(paramSeq) - seqMutabilites = rep(NA,seqLen) - - gaplessSeq = gsub("-", "", paramSeq) - gaplessSeqLen = nchar(gaplessSeq) - gaplessSeqMutabilites = rep(NA,gaplessSeqLen) - - if(mutabilityModel!=5){ - pos<- 3:(gaplessSeqLen) - subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) - gaplessSeqMutabilites[pos] = - tapply( c( - getMutability( substr(subSeq,1,3), 3) , - getMutability( substr(subSeq,2,4), 2), - getMutability( substr(subSeq,3,5), 1) - ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE - ) - #Pos 1 - subSeq = substr(gaplessSeq,1,3) - gaplessSeqMutabilites[1] = getMutability(subSeq , 1) - #Pos 2 - subSeq = substr(gaplessSeq,1,4) - gaplessSeqMutabilites[2] = mean( c( - getMutability( substr(subSeq,1,3), 2) , - getMutability( substr(subSeq,2,4), 1) - ),na.rm=T - ) - seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites - return(seqMutabilites) - }else{ - - pos<- 3:(gaplessSeqLen) - subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) - gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T) - seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites - return(seqMutabilites) - } - - } - - # Returns the mutability of a triplet at a given position - getMutability <- function(codon, pos=1:3){ - triplets <- rownames(mutability) - mutability[ match(codon,triplets) ,pos] - } - - getMutability5 <- function(fivemer){ - return(mutability[fivemer]) - } - - # Returns the substitution probabilty - getTransistionProb <- function(nuc){ - substitution[nuc,] - } - - getTransistionProb5 <- function(fivemer){ - if(any(which(fivemer==colnames(substitution)))){ - return(substitution[,fivemer]) - }else{ - return(array(NA,4)) - } - } - - # Given a nuc, returns the other 3 nucs it can mutate to - canMutateTo <- function(nuc){ - NUCLEOTIDES[- which(NUCLEOTIDES==nuc)] - } - - # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to - canMutateToProb <- function(nuc){ - substitution[nuc,canMutateTo(nuc)] - } - - # Compute targeting, based on precomputed mutatbility & substitution - computeTargeting <- function(param_strSeq,param_vecMutabilities){ - - if(substitutionModel!=5){ - vecSeq = s2c(param_strSeq) - matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } ) - #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } ) - dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq))) - return (matTargeting) - }else{ - - seqLen = nchar(param_strSeq) - seqsubstitution = matrix(NA,ncol=seqLen,nrow=4) - paramSeq <- param_strSeq - gaplessSeq = gsub("-", "", paramSeq) - gaplessSeqLen = nchar(gaplessSeq) - gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4) - - pos<- 3:(gaplessSeqLen) - subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) - gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T) - seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution - #matTargeting <- param_vecMutabilities %*% seqsubstitution - matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`) - dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen)) - return (matTargeting) - } - } - - # Compute the mutations types - computeMutationTypes <- function(param_strSeq){ - #cat(param_strSeq,"\n") - #vecSeq = trimToLastCodon(param_strSeq) - lenSeq = nchar(param_strSeq) - vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)}) - matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F) - dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes))) - return(matMutationTypes) - } - computeMutationTypesFast <- function(param_strSeq){ - matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F) - #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq))) - return(matMutationTypes) - } - mutationTypeOptimized <- function( matOfCodons ){ - apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) - } - - # Returns a vector of codons 1 mutation away from the given codon - permutateAllCodon <- function(codon){ - cCodon = s2c(codon) - matCodons = t(array(cCodon,dim=c(3,12))) - matCodons[1:4,1] = NUCLEOTIDES - matCodons[5:8,2] = NUCLEOTIDES - matCodons[9:12,3] = NUCLEOTIDES - apply(matCodons,1,c2s) - } - - # Given two codons, tells you if the mutation is R or S (based on your definition) - mutationType <- function(codonFrom,codonTo){ - if(testID==4){ - if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ - return(NA) - }else{ - mutationType = "S" - if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){ - mutationType = "R" - } - if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ - mutationType = "Stop" - } - return(mutationType) - } - }else if(testID==5){ - if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ - return(NA) - }else{ - if(codonFrom==codonTo){ - mutationType = "S" - }else{ - codonFrom = s2c(codonFrom) - codonTo = s2c(codonTo) - mutationType = "Stop" - nucOfI = codonFrom[which(codonTo!=codonFrom)] - if(nucOfI=="C"){ - mutationType = "R" - }else if(nucOfI=="G"){ - mutationType = "S" - } - } - return(mutationType) - } - }else{ - if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ - return(NA) - }else{ - mutationType = "S" - if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){ - mutationType = "R" - } - if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ - mutationType = "Stop" - } - return(mutationType) - } - } - } - - - #given a mat of targeting & it's corresponding mutationtypes returns - #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR - computeExpected <- function(paramTargeting,paramMutationTypes){ - # Replacements - RPos = which(paramMutationTypes=="R") - #FWR - Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T) - #CDR - Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T) - # Silents - SPos = which(paramMutationTypes=="S") - #FWR - Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T) - #CDR - Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T) - - return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR)) - } - - # Count the mutations in a sequence - # each mutation is treated independently - analyzeMutations2NucUri_website <- function( rev_in_matrix ){ - paramGL = rev_in_matrix[2,] - paramSeq = rev_in_matrix[1,] - - #Fill seq with GL seq if gapped - #if( any(paramSeq=="-") ){ - # gapPos_Seq = which(paramSeq=="-") - # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"] - # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] - #} - - - #if( any(paramSeq=="N") ){ - # gapPos_Seq = which(paramSeq=="N") - # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] - # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] - #} - - analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) ) - - } - - #1 = GL - #2 = Seq - analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){ - paramGL = in_matrix[2,] - paramSeq = in_matrix[1,] - paramSeqUri = paramGL - #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]}) - mutations_val = paramGL != paramSeq - if(any(mutations_val)){ - mutationPos = {1:length(mutations_val)}[mutations_val] - mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})] - length_mutations =length(mutationPos) - mutationInfo = rep(NA,length_mutations) - if(any(mutationPos)){ - - pos<- mutationPos - pos_array<-array(sapply(pos,getCodonPos)) - codonGL = paramGL[pos_array] - - codonSeq = sapply(pos,function(x){ - seqP = paramGL[getCodonPos(x)] - muCodonPos = {x-1}%%3+1 - seqP[muCodonPos] = paramSeq[x] - return(seqP) - }) - GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) - Seqcodons = apply(codonSeq,2,c2s) - mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) - names(mutationInfo) = mutationPos - } - if(any(!is.na(mutationInfo))){ - return(mutationInfo[!is.na(mutationInfo)]) - }else{ - return(NA) - } - - - }else{ - return (NA) - } - } - - processNucMutations2 <- function(mu){ - if(!is.na(mu)){ - #R - if(any(mu=="R")){ - Rs = mu[mu=="R"] - nucNumbs = as.numeric(names(Rs)) - R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) - R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) - }else{ - R_CDR = 0 - R_FWR = 0 - } - - #S - if(any(mu=="S")){ - Ss = mu[mu=="S"] - nucNumbs = as.numeric(names(Ss)) - S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) - S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) - }else{ - S_CDR = 0 - S_FWR = 0 - } - - - retVec = c(R_CDR,S_CDR,R_FWR,S_FWR) - retVec[is.na(retVec)]=0 - return(retVec) - }else{ - return(rep(0,4)) - } - } - - - ## Z-score Test - computeZScore <- function(mat, test="Focused"){ - matRes <- matrix(NA,ncol=2,nrow=(nrow(mat))) - if(test=="Focused"){ - #Z_Focused_CDR - #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) - P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}) - R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) - R_sd=sqrt(R_mean*(1-P)) - matRes[,1] = (mat[,1]-R_mean)/R_sd - - #Z_Focused_FWR - #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) - P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}) - R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) - R_sd=sqrt(R_mean*(1-P)) - matRes[,2] = (mat[,3]-R_mean)/R_sd - } - - if(test=="Local"){ - #Z_Focused_CDR - #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) - P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}) - R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))}) - R_sd=sqrt(R_mean*(1-P)) - matRes[,1] = (mat[,1]-R_mean)/R_sd - - #Z_Focused_FWR - #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) - P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}) - R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))}) - R_sd=sqrt(R_mean*(1-P)) - matRes[,2] = (mat[,3]-R_mean)/R_sd - } - - if(test=="Imbalanced"){ - #Z_Focused_CDR - #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) - P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))}) - R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) - R_sd=sqrt(R_mean*(1-P)) - matRes[,1] = (mat[,1]-R_mean)/R_sd - - #Z_Focused_FWR - #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) - P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))}) - R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) - R_sd=sqrt(R_mean*(1-P)) - matRes[,2] = (mat[,3]-R_mean)/R_sd - } - - matRes[is.nan(matRes)] = NA - return(matRes) - } - - # Return a p-value for a z-score - z2p <- function(z){ - p=NA - if( !is.nan(z) && !is.na(z)){ - if(z>0){ - p = (1 - pnorm(z,0,1)) - } else if(z<0){ - p = (-1 * pnorm(z,0,1)) - } else{ - p = 0.5 - } - }else{ - p = NA - } - return(p) - } - - - ## Bayesian Test - - # Fitted parameter for the bayesian framework -BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986) - CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2)) - - # Given x, M & p, returns a pdf - calculate_bayes <- function ( x=3, N=10, p=0.33, - i=CONST_i, - max_sigma=20,length_sigma=4001 - ){ - if(!0%in%N){ - G <- max(length(x),length(N),length(p)) - x=array(x,dim=G) - N=array(N,dim=G) - p=array(p,dim=G) - sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma) - sigma_1<-log({i/{1-i}}/{p/{1-p}}) - index<-min(N,60) - y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1)) - if(!sum(is.na(y))){ - tmp<-approx(sigma_1,y,sigma_s)$y - tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}} - }else{ - return(NA) - } - }else{ - return(NA) - } - } - # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection - computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){ - flagOneSeq = F - if(nrow(mat)==1){ - mat=rbind(mat,mat) - flagOneSeq = T - } - if(test=="Focused"){ - #CDR - P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5) - N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0) - X = c(mat[,1],0) - bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesCDR = bayesCDR[-length(bayesCDR)] - - #FWR - P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5) - N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0) - X = c(mat[,3],0) - bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesFWR = bayesFWR[-length(bayesFWR)] - } - - if(test=="Local"){ - #CDR - P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5) - N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0) - X = c(mat[,1],0) - bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesCDR = bayesCDR[-length(bayesCDR)] - - #FWR - P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5) - N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0) - X = c(mat[,3],0) - bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesFWR = bayesFWR[-length(bayesFWR)] - } - - if(test=="Imbalanced"){ - #CDR - P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5) - N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) - X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0) - bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesCDR = bayesCDR[-length(bayesCDR)] - - #FWR - P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5) - N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) - X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0) - bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesFWR = bayesFWR[-length(bayesFWR)] - } - - if(test=="ImbalancedSilent"){ - #CDR - P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5) - N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) - X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0) - bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesCDR = bayesCDR[-length(bayesCDR)] - - #FWR - P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5) - N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) - X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0) - bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) - bayesFWR = bayesFWR[-length(bayesFWR)] - } - - if(flagOneSeq==T){ - bayesCDR = bayesCDR[1] - bayesFWR = bayesFWR[1] - } - return( list("CDR"=bayesCDR, "FWR"=bayesFWR) ) - } - - ##Covolution - break2chunks<-function(G=1000){ - base<-2^round(log(sqrt(G),2),0) - return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base))) - } - - PowersOfTwo <- function(G=100){ - exponents <- array() - i = 0 - while(G > 0){ - i=i+1 - exponents[i] <- floor( log2(G) ) - G <- G-2^exponents[i] - } - return(exponents) - } - - convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){ - G = ncol(cons) - if(G>1){ - for(gen in log(G,2):1){ - ll<-seq(from=2,to=2^gen,by=2) - sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)}) - } - } - return( cons[,1] ) - } - - convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){ - if(length(ncol(cons))) G<-ncol(cons) - groups <- PowersOfTwo(G) - matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G ) - startIndex = 1 - for( i in 1:length(groups) ){ - stopIndex <- 2^groups[i] + startIndex - 1 - if(stopIndex!=startIndex){ - matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma ) - startIndex = stopIndex + 1 - } - else { - if(G>1) matG[,i] <- cons[,startIndex:stopIndex] - else matG[,i] <- cons - #startIndex = stopIndex + 1 - } - } - return( list( matG, groups ) ) - } - - weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ - lx<-length(x) - ly<-length(y) - if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){ - if(w<1){ - y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y - x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y - lx<-length(x1) - ly<-length(y1) - } - else { - y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y - x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y - lx<-length(x1) - ly<-length(y1) - } - } - else{ - x1<-x - y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y - ly<-length(y1) - } - tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y - tmp[tmp<=0] = 0 - return(tmp/sum(tmp)) - } - - calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ - matG <- listMatG[[1]] - groups <- listMatG[[2]] - i = 1 - resConv <- matG[,i] - denom <- 2^groups[i] - if(length(groups)>1){ - while( i<length(groups) ){ - i = i + 1 - resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma) - #cat({{2^groups[i]}/denom},"\n") - denom <- denom + 2^groups[i] - } - } - return(resConv) - } - - # Given a list of PDFs, returns a convoluted PDF - groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){ - listPosteriors = listPosteriors[ !is.na(listPosteriors) ] - Length_Postrior<-length(listPosteriors) - if(Length_Postrior>1 & Length_Postrior<=Threshold){ - cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) - listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) - y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) - return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) - }else if(Length_Postrior==1) return(listPosteriors[[1]]) - else if(Length_Postrior==0) return(NA) - else { - cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) - y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma ) - return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) - } - } - - fastConv<-function(cons, max_sigma=20, length_sigma=4001){ - chunks<-break2chunks(G=ncol(cons)) - if(ncol(cons)==3) chunks<-2:1 - index_chunks_end <- cumsum(chunks) - index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1) - index_chunks <- cbind(index_chunks_start,index_chunks_end) - - case <- sum(chunks!=chunks[1]) - if(case==1) End <- max(1,((length(index_chunks)/2)-1)) - else End <- max(1,((length(index_chunks)/2))) - - firsts <- sapply(1:End,function(i){ - indexes<-index_chunks[i,1]:index_chunks[i,2] - convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]] - }) - if(case==0){ - result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) ) - }else if(case==1){ - last<-list(calculate_bayesGHelper( - convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] ) - ),0) - result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts)) - result<-calculate_bayesGHelper( - list( - cbind( - result_first,last[[1]]), - c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2)) - ) - ) - } - return(as.vector(result)) - } - - # Computes the 95% CI for a pdf - calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ - if(length(Pdf)!=length_sigma) return(NA) - sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) - cdf = cumsum(Pdf) - cdf = cdf/cdf[length(cdf)] - return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) - } - - # Computes a mean for a pdf - calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){ - if(length(Pdf)!=length_sigma) return(NA) - sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) - norm = {length_sigma-1}/2/max_sigma - return( (Pdf%*%sigma_s/norm) ) - } - - # Returns the mean, and the 95% CI for a pdf - calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ - if(is.na(Pdf)) - return(rep(NA,3)) - bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma) - bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma) - return(c(bMean, bCI)) - } - - # Computes the p-value of a pdf - computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){ - if(length(Pdf)>1){ - norm = {length_sigma-1}/2/max_sigma - pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm - if(pVal>0.5){ - pVal = pVal-1 - } - return(pVal) - }else{ - return(NA) - } - } - - # Compute p-value of two distributions - compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ - #print(c(length(dens1),length(dens2))) - if(length(dens1)>1 & length(dens2)>1 ){ - dens1<-dens1/sum(dens1) - dens2<-dens2/sum(dens2) - cum2 <- cumsum(dens2)-dens2/2 - tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) - #print(tmp) - if(tmp>0.5)tmp<-tmp-1 - return( tmp ) - } - else { - return(NA) - } - #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) - } - - # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations) - numberOfSeqsWithMutations <- function(matMutations,test=1){ - if(test==4)test=2 - cdrSeqs <- 0 - fwrSeqs <- 0 - if(test==1){#focused - cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) }) - fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) }) - if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) - if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) - } - if(test==2){#local - cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) }) - fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) }) - if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) - if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) - } - return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs)) -} - - - -shadeColor <- function(sigmaVal=NA,pVal=NA){ - if(is.na(sigmaVal) & is.na(pVal)) return(NA) - if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal) - if(is.na(pVal) || pVal==1 || pVal==0){ - returnColor = "#FFFFFF"; - }else{ - colVal=abs(pVal); - - if(sigmaVal<0){ - if(colVal>0.1) - returnColor = "#CCFFCC"; - if(colVal<=0.1) - returnColor = "#99FF99"; - if(colVal<=0.050) - returnColor = "#66FF66"; - if(colVal<=0.010) - returnColor = "#33FF33"; - if(colVal<=0.005) - returnColor = "#00FF00"; - - }else{ - if(colVal>0.1) - returnColor = "#FFCCCC"; - if(colVal<=0.1) - returnColor = "#FF9999"; - if(colVal<=0.05) - returnColor = "#FF6666"; - if(colVal<=0.01) - returnColor = "#FF3333"; - if(colVal<0.005) - returnColor = "#FF0000"; - } - } - - return(returnColor) -} - - - -plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){ - if(!log){ - x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac - y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac - }else { - if(log==2){ - x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac - y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) - } - if(log==1){ - x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) - y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac - } - if(log==3){ - x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) - y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) - } - } - return(c("x"=x,"y"=y)) -} - -# SHMulation - - # Based on targeting, introduce a single mutation & then update the targeting - oneMutation <- function(){ - # Pick a postion + mutation - posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting)) - posNucNumb = ceiling(posMutation/4) # Nucleotide number - posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to - - #mutate the simulation sequence - seqSimVec <- s2c(seqSim) - seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind] - seqSim <<- c2s(seqSimVec) - - #update Mutability, Targeting & MutationsTypes - updateMutabilityNTargeting(posNucNumb) - - #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) - return(posNucNumb) - } - - updateMutabilityNTargeting <- function(position){ - min_i<-max((position-2),1) - max_i<-min((position+2),nchar(seqSim)) - min_ii<-min(min_i,3) - - #mutability - update locally - seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)] - - - #targeting - compute locally - seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i]) - seqTargeting[is.na(seqTargeting)] <<- 0 - #mutCodonPos = getCodonPos(position) - mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3]) - #cat(mutCodonPos,"\n") - mutTypeCodon = getCodonPos(position) - seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) - # Stop = 0 - if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){ - seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0 - } - - - #Selection - selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R")) - # CDR - selectedCDR = selectedPos[which(matCDR[selectedPos]==T)] - seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR) - seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K - - # FWR - selectedFWR = selectedPos[which(matFWR[selectedPos]==T)] - seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR) - seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K - - } - - - - # Validate the mutation: if the mutation has not been sampled before validate it, else discard it. - validateMutation <- function(){ - if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation - uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1 - mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos - }else{ - if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation - mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)] - uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1 - } - } - } - - - - # Places text (labels) at normalized coordinates - myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){ - par(xpd=TRUE) - if(!log) - text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol) - else { - if(log==2) - text( - par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac, - 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), - w,cex=cex,adj=adj,col=thecol) - if(log==1) - text( - 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), - par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac, - w,cex=cex,adj=adj,col=thecol) - if(log==3) - text( - 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), - 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), - w,cex=cex,adj=adj,col=thecol) - } - par(xpd=FALSE) - } - - - - # Count the mutations in a sequence - analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){ - - paramGL = s2c(matInput[inputMatrixIndex,2]) - paramSeq = s2c(matInput[inputMatrixIndex,1]) - - #if( any(paramSeq=="N") ){ - # gapPos_Seq = which(paramSeq=="N") - # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] - # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] - #} - mutations_val = paramGL != paramSeq - - if(any(mutations_val)){ - mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] - length_mutations =length(mutationPos) - mutationInfo = rep(NA,length_mutations) - - pos<- mutationPos - pos_array<-array(sapply(pos,getCodonPos)) - codonGL = paramGL[pos_array] - codonSeqWhole = paramSeq[pos_array] - codonSeq = sapply(pos,function(x){ - seqP = paramGL[getCodonPos(x)] - muCodonPos = {x-1}%%3+1 - seqP[muCodonPos] = paramSeq[x] - return(seqP) - }) - GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) - SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) - Seqcodons = apply(codonSeq,2,c2s) - - mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) - names(mutationInfo) = mutationPos - - mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) - names(mutationInfoWhole) = mutationPos - - mutationInfo <- mutationInfo[!is.na(mutationInfo)] - mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] - - if(any(!is.na(mutationInfo))){ - - #Filter based on Stop (at the codon level) - if(seqWithStops==1){ - nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) - mutationInfo = mutationInfo[nucleotidesAtStopCodons] - mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] - }else{ - countStops = sum(mutationInfoWhole=="Stop") - if(seqWithStops==2 & countStops==0) mutationInfo = NA - if(seqWithStops==3 & countStops>0) mutationInfo = NA - } - - if(any(!is.na(mutationInfo))){ - #Filter mutations based on multipleMutation - if(multipleMutation==1 & !is.na(mutationInfo)){ - mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) - tableMutationCodons <- table(mutationCodons) - codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) - if(any(codonsWithMultipleMutations)){ - #remove the nucleotide mutations in the codons with multiple mutations - mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] - #replace those codons with Ns in the input sequence - paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" - matInput[inputMatrixIndex,1] <<- c2s(paramSeq) - } - } - - #Filter mutations based on the model - if(any(mutationInfo)==T | is.na(any(mutationInfo))){ - - if(model==1 & !is.na(mutationInfo)){ - mutationInfo <- mutationInfo[mutationInfo=="S"] - } - if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo) - else return(NA) - }else{ - return(NA) - } - }else{ - return(NA) - } - - - }else{ - return(NA) - } - - - }else{ - return (NA) - } - } - - analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){ - - paramGL = s2c(inputArray[2]) - paramSeq = s2c(inputArray[1]) - inputSeq <- inputArray[1] - #if( any(paramSeq=="N") ){ - # gapPos_Seq = which(paramSeq=="N") - # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] - # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] - #} - mutations_val = paramGL != paramSeq - - if(any(mutations_val)){ - mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] - length_mutations =length(mutationPos) - mutationInfo = rep(NA,length_mutations) - - pos<- mutationPos - pos_array<-array(sapply(pos,getCodonPos)) - codonGL = paramGL[pos_array] - codonSeqWhole = paramSeq[pos_array] - codonSeq = sapply(pos,function(x){ - seqP = paramGL[getCodonPos(x)] - muCodonPos = {x-1}%%3+1 - seqP[muCodonPos] = paramSeq[x] - return(seqP) - }) - GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) - SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) - Seqcodons = apply(codonSeq,2,c2s) - - mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) - names(mutationInfo) = mutationPos - - mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) - names(mutationInfoWhole) = mutationPos - - mutationInfo <- mutationInfo[!is.na(mutationInfo)] - mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] - - if(any(!is.na(mutationInfo))){ - - #Filter based on Stop (at the codon level) - if(seqWithStops==1){ - nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) - mutationInfo = mutationInfo[nucleotidesAtStopCodons] - mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] - }else{ - countStops = sum(mutationInfoWhole=="Stop") - if(seqWithStops==2 & countStops==0) mutationInfo = NA - if(seqWithStops==3 & countStops>0) mutationInfo = NA - } - - if(any(!is.na(mutationInfo))){ - #Filter mutations based on multipleMutation - if(multipleMutation==1 & !is.na(mutationInfo)){ - mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) - tableMutationCodons <- table(mutationCodons) - codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) - if(any(codonsWithMultipleMutations)){ - #remove the nucleotide mutations in the codons with multiple mutations - mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] - #replace those codons with Ns in the input sequence - paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" - #matInput[inputMatrixIndex,1] <<- c2s(paramSeq) - inputSeq <- c2s(paramSeq) - } - } - - #Filter mutations based on the model - if(any(mutationInfo)==T | is.na(any(mutationInfo))){ - - if(model==1 & !is.na(mutationInfo)){ - mutationInfo <- mutationInfo[mutationInfo=="S"] - } - if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq)) - else return(list(NA,inputSeq)) - }else{ - return(list(NA,inputSeq)) - } - }else{ - return(list(NA,inputSeq)) - } - - - }else{ - return(list(NA,inputSeq)) - } - - - }else{ - return (list(NA,inputSeq)) - } - } - - # triMutability Background Count - buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ - - #rowOrigMatInput = matInput[inputMatrixIndex,] - seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) - seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) - #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL) - tempInput <- cbind(seqInput,seqGL) - seqLength = nchar(seqGL) - list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops) - mutationCount <- list_analyzeMutationsFixed[[1]] - seqInput <- list_analyzeMutationsFixed[[2]] - BackgroundMatrix = mutabilityMatrix - MutationMatrix = mutabilityMatrix - MutationCountMatrix = mutabilityMatrix - if(!is.na(mutationCount)){ - if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ - - fivermerStartPos = 1:(seqLength-4) - fivemerLength <- length(fivermerStartPos) - fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) - fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) - - #Background - for(fivemerIndex in 1:fivemerLength){ - fivemer = fivemerGL[fivemerIndex] - if(!any(grep("N",fivemer))){ - fivemerCodonPos = fivemerCodon(fivemerIndex) - fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) - fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3]) - - # All mutations model - #if(!any(grep("N",fivemerReadingFrameCodon))){ - if(model==0){ - if(stopMutations==0){ - if(!any(grep("N",fivemerReadingFrameCodonInputSeq))) - BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1) - }else{ - if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){ - positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1] - BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon]) - } - } - }else{ # Only silent mutations - if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){ - positionWithinCodon = which(fivemerCodonPos==3) - BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon]) - } - } - #} - } - } - - #Mutations - if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] - if(model==1) mutationCount = mutationCount[mutationCount=="S"] - mutationPositions = as.numeric(names(mutationCount)) - mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] - mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] - countMutations = 0 - for(mutationPosition in mutationPositions){ - fivemerIndex = mutationPosition-2 - fivemer = fivemerSeq[fivemerIndex] - GLfivemer = fivemerGL[fivemerIndex] - fivemerCodonPos = fivemerCodon(fivemerIndex) - fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) - fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3]) - if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){ - if(model==0){ - countMutations = countMutations + 1 - MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1) - MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) - }else{ - if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){ - countMutations = countMutations + 1 - positionWithinCodon = which(fivemerCodonPos==3) - glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) - inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon) - MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc]) - MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) - } - } - } - } - - seqMutability = MutationMatrix/BackgroundMatrix - seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) - #cat(inputMatrixIndex,"\t",countMutations,"\n") - return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) - - } - } - - } - - #Returns the codon position containing the middle nucleotide - fivemerCodon <- function(fivemerIndex){ - codonPos = list(2:4,1:3,3:5) - fivemerType = fivemerIndex%%3 - return(codonPos[[fivemerType+1]]) - } - - #returns probability values for one mutation in codons resulting in R, S or Stop - probMutations <- function(typeOfMutation){ - matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3))) - for(codon in rownames(matMutationProb)){ - if( !any(grep("N",codon)) ){ - for(muPos in 1:3){ - matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T) - glNuc = matCodon[1,muPos] - matCodon[,muPos] = canMutateTo(glNuc) - substitutionRate = substitution[glNuc,matCodon[,muPos]] - typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) - matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation]) - } - } - } - - return(matMutationProb) - } - - - - -#Mapping Trinucleotides to fivemers -mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){ - rownames(triMutability) <- triMutability_Names - Fivemer<-rep(NA,1024) - names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5) - Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE))) - Fivemer<-Fivemer/sum(Fivemer) - return(Fivemer) -} - -collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ - Indices<-substring(names(Fivemer),3,3)==NUC - Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) - tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE)) -} - - - -CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ - Indices<-substring(names(Fivemer),3,3)==NUC - Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) - tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE)) -} - -#Uses the real counts of the mutated fivemers -CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){ - Indices<-substring(names(Fivemer),3,3)==NUC - Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) - tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE)) -} - -bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){ -N<-sum(x) -if(N){ -p<-x/N -k<-length(x)-1 -tmp<-rmultinom(M, size = N, prob=p) -tmp_p<-apply(tmp,2,function(y)y/N) -(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k)))) -} -else return(matrix(0,2,length(x))) -} - - - - -bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){ - -N<-sum(x) -k<-length(x) -y<-rep(1:k,x) -tmp<-sapply(1:M,function(i)sample(y,n)) -if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n -if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n -(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1))))) -} - - - -p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){ -n=sum(x_obs) -N<-sum(x) -k<-length(x) -y<-rep(1:k,x) -tmp<-sapply(1:M,function(i)sample(y,n)) -if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i))) -if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i))) -tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M), -sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M)) -sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])}) -} - -#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA" -# Remove SNPs from IMGT germline segment alleles -generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){ - repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) - alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2]) - SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){ - Indices<-NULL - for(i in 1:length(x)){ - firstSeq = s2c(x[[1]]) - iSeq = s2c(x[[i]]) - Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." )) - } - return(sort(unique(Indices))) - }) - repertoireOut <- repertoireIn - repertoireOut <- lapply(names(repertoireOut), function(repertoireName){ - alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2] - geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1] - alleleSeq <- s2c(repertoireOut[[repertoireName]]) - alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N" - alleleSeq <- c2s(alleleSeq) - repertoireOut[[repertoireName]] <- alleleSeq - }) - names(repertoireOut) <- names(repertoireIn) - write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile) - -} - - - - - - -############ -groupBayes2 = function(indexes, param_resultMat){ - - BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])})) - BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])})) - #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])})) - #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])})) - #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])})) - #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])})) - return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR, - "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) ) - #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR, - #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR)) -# "BayesGDist_Global_CDR" = BayesGDist_Global_CDR, -# "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) ) - - -} - - -calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){ - G <- max(length(x),length(N),length(p)) - x=array(x,dim=G) - N=array(N,dim=G) - p=array(p,dim=G) - - indexOfZero = N>0 & p>0 - N = N[indexOfZero] - x = x[indexOfZero] - p = p[indexOfZero] - G <- length(x) - - if(G){ - - cons<-array( dim=c(length_sigma,G) ) - if(G==1) { - return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma)) - } - else { - for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma) - listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) - y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) - return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) - } - }else{ - return(NA) - } -} - - -calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ - matG <- listMatG[[1]] - groups <- listMatG[[2]] - i = 1 - resConv <- matG[,i] - denom <- 2^groups[i] - if(length(groups)>1){ - while( i<length(groups) ){ - i = i + 1 - resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma) - #cat({{2^groups[i]}/denom},"\n") - denom <- denom + 2^groups[i] - } - } - return(resConv) -} - -weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ -lx<-length(x) -ly<-length(y) -if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){ -if(w<1){ -y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y -x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y -lx<-length(x1) -ly<-length(y1) -} -else { -y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y -x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y -lx<-length(x1) -ly<-length(y1) -} -} -else{ -x1<-x -y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y -ly<-length(y1) -} -tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y -tmp[tmp<=0] = 0 -return(tmp/sum(tmp)) -} - -######################## - - - - -mutabilityMatrixONE<-rep(0,4) -names(mutabilityMatrixONE)<-NUCLEOTIDES - - # triMutability Background Count - buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ - - #rowOrigMatInput = matInput[inputMatrixIndex,] - seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) - seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) - matInput[inputMatrixIndex,] <<- c(seqInput,seqGL) - seqLength = nchar(seqGL) - mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops) - BackgroundMatrix = mutabilityMatrixONE - MutationMatrix = mutabilityMatrixONE - MutationCountMatrix = mutabilityMatrixONE - if(!is.na(mutationCount)){ - if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ - -# ONEmerStartPos = 1:(seqLength) -# ONEmerLength <- length(ONEmerStartPos) - ONEmerGL <- s2c(seqGL) - ONEmerSeq <- s2c(seqInput) - - #Background - for(ONEmerIndex in 1:seqLength){ - ONEmer = ONEmerGL[ONEmerIndex] - if(ONEmer!="N"){ - ONEmerCodonPos = getCodonPos(ONEmerIndex) - ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) - ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] ) - - # All mutations model - #if(!any(grep("N",ONEmerReadingFrameCodon))){ - if(model==0){ - if(stopMutations==0){ - if(!any(grep("N",ONEmerReadingFrameCodonInputSeq))) - BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1) - }else{ - if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){ - positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1] - BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon]) - } - } - }else{ # Only silent mutations - if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){ - positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) - BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon]) - } - } - } - } - } - - #Mutations - if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] - if(model==1) mutationCount = mutationCount[mutationCount=="S"] - mutationPositions = as.numeric(names(mutationCount)) - mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] - mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] - countMutations = 0 - for(mutationPosition in mutationPositions){ - ONEmerIndex = mutationPosition - ONEmer = ONEmerSeq[ONEmerIndex] - GLONEmer = ONEmerGL[ONEmerIndex] - ONEmerCodonPos = getCodonPos(ONEmerIndex) - ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos]) - ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos]) - if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){ - if(model==0){ - countMutations = countMutations + 1 - MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1) - MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) - }else{ - if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){ - countMutations = countMutations + 1 - positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) - glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) - inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon) - MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc]) - MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) - } - } - } - } - - seqMutability = MutationMatrix/BackgroundMatrix - seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) - #cat(inputMatrixIndex,"\t",countMutations,"\n") - return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) -# tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix) - } - } - -################ -# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $ - -trim <- function(s, recode.factor=TRUE, ...) - UseMethod("trim", s) - -trim.default <- function(s, recode.factor=TRUE, ...) - s - -trim.character <- function(s, recode.factor=TRUE, ...) -{ - s <- sub(pattern="^ +", replacement="", x=s) - s <- sub(pattern=" +$", replacement="", x=s) - s -} - -trim.factor <- function(s, recode.factor=TRUE, ...) -{ - levels(s) <- trim(levels(s)) - if(recode.factor) { - dots <- list(x=s, ...) - if(is.null(dots$sort)) dots$sort <- sort - s <- do.call(what=reorder.factor, args=dots) - } - s -} - -trim.list <- function(s, recode.factor=TRUE, ...) - lapply(s, trim, recode.factor=recode.factor, ...) - -trim.data.frame <- function(s, recode.factor=TRUE, ...) -{ - s[] <- trim.list(s, recode.factor=recode.factor, ...) - s -} -####################################### -# Compute the expected for each sequence-germline pair by codon -getExpectedIndividualByCodon <- function(matInput){ -if( any(grep("multicore",search())) ){ - facGL <- factor(matInput[,2]) - facLevels = levels(facGL) - LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ - computeMutabilities(facLevels[x]) - }) - facIndex = match(facGL,facLevels) - - LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ - cInput = rep(NA,nchar(matInput[x,1])) - cInput[s2c(matInput[x,1])!="N"] = 1 - LisGLs_MutabilityU[[facIndex[x]]] * cInput - }) - - LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ - computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) - }) - - LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ - #print(x) - computeMutationTypes(matInput[x,2]) - }) - - LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){ - Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, - function(codonNucs){ - RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") - sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) - } - ) - }) - - LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){ - Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, - function(codonNucs){ - SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") - sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) - } - ) - }) - - Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) - Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) - return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) - }else{ - facGL <- factor(matInput[,2]) - facLevels = levels(facGL) - LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ - computeMutabilities(facLevels[x]) - }) - facIndex = match(facGL,facLevels) - - LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ - cInput = rep(NA,nchar(matInput[x,1])) - cInput[s2c(matInput[x,1])!="N"] = 1 - LisGLs_MutabilityU[[facIndex[x]]] * cInput - }) - - LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ - computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) - }) - - LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ - #print(x) - computeMutationTypes(matInput[x,2]) - }) - - LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){ - Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, - function(codonNucs){ - RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") - sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) - } - ) - }) - - LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){ - Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, - function(codonNucs){ - SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") - sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) - } - ) - }) - - Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) - Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) - return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) - } -} - -# getObservedMutationsByCodon <- function(listMutations){ -# numbSeqs <- length(listMutations) -# obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) -# obsMu_S <- obsMu_R -# temp <- mclapply(1:length(listMutations), function(i){ -# arrMutations = listMutations[[i]] -# RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) -# RPos <- sapply(RPos,getCodonNumb) -# if(any(RPos)){ -# tabR <- table(RPos) -# obsMu_R[i,as.numeric(names(tabR))] <<- tabR -# } -# -# SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) -# SPos <- sapply(SPos,getCodonNumb) -# if(any(SPos)){ -# tabS <- table(SPos) -# obsMu_S[i,names(tabS)] <<- tabS -# } -# } -# ) -# return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) -# } - -getObservedMutationsByCodon <- function(listMutations){ - numbSeqs <- length(listMutations) - obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) - obsMu_S <- obsMu_R - temp <- lapply(1:length(listMutations), function(i){ - arrMutations = listMutations[[i]] - RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) - RPos <- sapply(RPos,getCodonNumb) - if(any(RPos)){ - tabR <- table(RPos) - obsMu_R[i,as.numeric(names(tabR))] <<- tabR - } - - SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) - SPos <- sapply(SPos,getCodonNumb) - if(any(SPos)){ - tabS <- table(SPos) - obsMu_S[i,names(tabS)] <<- tabS - } - } - ) - return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) -} -
--- a/baseline/Baseline_Main.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,388 +0,0 @@ -######################################################################################### -# License Agreement -# -# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE -# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER -# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE -# OR COPYRIGHT LAW IS PROHIBITED. -# -# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE -# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED -# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN -# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. -# -# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences -# Coded by: Mohamed Uduman & Gur Yaari -# Copyright 2012 Kleinstein Lab -# Version: 1.3 (01/23/2014) -######################################################################################### - -op <- options(); -options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1) -library('seqinr') -if( F & Sys.info()[1]=="Linux"){ - library("multicore") -} - -# Load functions and initialize global variables -source("Baseline_Functions.r") - -# Initialize parameters with user provided arguments - arg <- commandArgs(TRUE) - #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample") - #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample") - #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu") - testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local - species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse - substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS - mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS - clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations - fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels - region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3 - inputFilePath <- arg[8] # Full path to input file - outputPath <- arg[9] # Full path to location of output files - outputID <- arg[10] # ID for session output - - - if(testID==5){ - traitChangeModel <- 1 - if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998 - initializeTraitChange(traitChangeModel) - } - -# Initialize other parameters/variables - - # Initialzie the codon table ( definitions of R/S ) - computeCodonTable(testID) - - # Initialize - # Test Name - testName<-"Focused" - if(testID==2) testName<-"Local" - if(testID==3) testName<-"Imbalanced" - if(testID==4) testName<-"ImbalancedSilent" - - # Indel placeholders initialization - indelPos <- NULL - delPos <- NULL - insPos <- NULL - - # Initialize in Tranistion & Mutability matrixes - substitution <- initializeSubstitutionMatrix(substitutionModel,species) - mutability <- initializeMutabilityMatrix(mutabilityModel,species) - - # FWR/CDR boundaries - flagTrim <- F - if( is.na(region[7])){ - flagTrim <- T - region[7]<-region[6] - } - readStart = min(region,na.rm=T) - readEnd = max(region,na.rm=T) - if(readStart>1){ - region = region - (readStart - 1) - } - region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) ) - region_Cod = region - - readStart = (readStart*3)-2 - readEnd = (readEnd*3) - - FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])), - rep(FALSE,(region_Nuc[3]-region_Nuc[2])), - rep(TRUE,(region_Nuc[4]-region_Nuc[3])), - rep(FALSE,(region_Nuc[5]-region_Nuc[4])), - rep(TRUE,(region_Nuc[6]-region_Nuc[5])), - rep(FALSE,(region_Nuc[7]-region_Nuc[6])) - ) - CDR_Nuc <- (1-FWR_Nuc) - CDR_Nuc <- as.logical(CDR_Nuc) - FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T) - CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T) - - FWR_Codon <- c( rep(TRUE,(region[2])), - rep(FALSE,(region[3]-region[2])), - rep(TRUE,(region[4]-region[3])), - rep(FALSE,(region[5]-region[4])), - rep(TRUE,(region[6]-region[5])), - rep(FALSE,(region[7]-region[6])) - ) - CDR_Codon <- (1-FWR_Codon) - CDR_Codon <- as.logical(CDR_Codon) - - -# Read input FASTA file - tryCatch( - inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) - , error = function(ex){ - cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") - q() - } - ) - - if (length(inputFASTA)==1) { - cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") - q() - } - - # Process sequence IDs/names - names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)}) - - # Convert non nucleotide characters to N - inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)]) - inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars) - - # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence] - processedInput <- processInputAdvanced(inputFASTA) - matInput <- processedInput[[1]] - germlines <- processedInput[[2]] - lenGermlines = length(unique(germlines)) - groups <- processedInput[[3]] - lenGroups = length(unique(groups)) - rm(processedInput) - rm(inputFASTA) - -# # remove clones with less than 2 seqeunces -# tableGL <- table(germlines) -# singletons <- which(tableGL<8) -# rowsToRemove <- match(singletons,germlines) -# if(any(rowsToRemove)){ -# matInput <- matInput[-rowsToRemove,] -# germlines <- germlines[-rowsToRemove] -# groups <- groups[-rowsToRemove] -# } -# -# # remove unproductive seqs -# nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))}) -# if(any(nonFuctionalSeqs)){ -# if(sum(nonFuctionalSeqs)==length(germlines)){ -# write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) -# q() -# } -# matInput <- matInput[-which(nonFuctionalSeqs),] -# germlines <- germlines[-which(nonFuctionalSeqs)] -# germlines[1:length(germlines)] <- 1:length(germlines) -# groups <- groups[-which(nonFuctionalSeqs)] -# } -# -# if(class(matInput)=="character"){ -# write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) -# q() -# } -# -# if(nrow(matInput)<10 | is.null(nrow(matInput))){ -# write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) -# q() -# } - -# replace leading & trailing "-" with "N: - matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd)) - - # Trim (nucleotide) input sequences to the last codon - #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon) - -# # Check for Indels -# if(fixIndels){ -# delPos <- fixDeletions(matInput) -# insPos <- fixInsertions(matInput) -# }else{ -# # Check for indels -# indelPos <- checkForInDels(matInput) -# indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)}) -# } - - # If indels are present, remove mutations in the seqeunce & throw warning at end - #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) }) - - colnames(matInput)=c("Input","Germline") - - # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions - germlinesOriginal = NULL - if(clonal){ - germlinesOriginal <- germlines - collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){ - collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1)) - }) - matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])})) - names_groups = tapply(groups,germlines,function(x){names(x[1])}) - groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))}) - names(groups) = names_groups - - names_germlines = tapply(germlines,germlines,function(x){names(x[1])}) - germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} ) - names(germlines) = names_germlines - matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])}) - } - - -# Selection Analysis - - -# if (length(germlines)>sequenceLimit) { -# # Code to parallelize processing goes here -# stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") ) -# } - -# if (length(germlines)<sequenceLimit) {} - - # Compute expected mutation frequencies - matExpected <- getExpectedIndividual(matInput) - - # Count observed number of mutations in the different regions - mutations <- lapply( 1:nrow(matInput), function(i){ - #cat(i,"\n") - seqI = s2c(matInput[i,1]) - seqG = s2c(matInput[i,2]) - matIGL = matrix(c(seqI,seqG),ncol=length(seqI),nrow=2,byrow=T) - retVal <- NA - tryCatch( - retVal <- analyzeMutations2NucUri(matIGL) - , error = function(ex){ - retVal <- NA - } - ) - - - return( retVal ) - }) - - matObserved <- t(sapply( mutations, processNucMutations2 )) - numberOfSeqsWithMutations <- numberOfSeqsWithMutations(matObserved, testID) - - #if(sum(numberOfSeqsWithMutations)==0){ - # write.table("No mutated sequences",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) - # q() - #} - - matMutationInfo <- cbind(matObserved,matExpected) - rm(matObserved,matExpected) - - - #Bayesian PDFs - bayes_pdf = computeBayesianScore(matMutationInfo, test=testName, max_sigma=20,length_sigma=4001) - bayesPDF_cdr = bayes_pdf[[1]] - bayesPDF_fwr = bayes_pdf[[2]] - rm(bayes_pdf) - - bayesPDF_germlines_cdr = tapply(bayesPDF_cdr,germlines,function(x) groupPosteriors(x,length_sigma=4001)) - bayesPDF_germlines_fwr = tapply(bayesPDF_fwr,germlines,function(x) groupPosteriors(x,length_sigma=4001)) - - bayesPDF_groups_cdr = tapply(bayesPDF_cdr,groups,function(x) groupPosteriors(x,length_sigma=4001)) - bayesPDF_groups_fwr = tapply(bayesPDF_fwr,groups,function(x) groupPosteriors(x,length_sigma=4001)) - - if(lenGroups>1){ - groups <- c(groups,lenGroups+1) - names(groups)[length(groups)] = "All sequences combined" - bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001) - bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001) - } - - #Bayesian Outputs - bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo)) - bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo)) - bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo)) - bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo)) - bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo)) - bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo)) - - #P-values - simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP) - simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP) - - simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP) - simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP) - - simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP) - simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP) - - - #Format output - - # Round expected mutation frequencies to 3 decimal places - matMutationInfo[germlinesOriginal[indelPos],] = NA - if(nrow(matMutationInfo)==1){ - matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3) - }else{ - matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3)) - } - - listPDFs = list() - nRows = length(unique(groups)) + length(unique(germlines)) + length(groups) - - matOutput = matrix(NA,ncol=18,nrow=nRows) - rowNumb = 1 - for(G in unique(groups)){ - #print(G) - matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G]) - listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]]) - names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1] - #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){ - gs = unique(germlines[groups==G]) - rowNumb = rowNumb+1 - if( !is.na(gs) ){ - for( g in gs ){ - matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g]) - listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]]) - names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1] - rowNumb = rowNumb+1 - indexesOfInterest = which(germlines==g) - numbSeqsOfInterest = length(indexesOfInterest) - rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1)) - matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest), - rownames(matInput)[indexesOfInterest], - c(matMutationInfo[indexesOfInterest,1:4]), - c(matMutationInfo[indexesOfInterest,5:8]), - c(bayes_cdr[indexesOfInterest,]), - c(bayes_fwr[indexesOfInterest,]), - c(simgaP_cdr[indexesOfInterest]), - c(simgaP_fwr[indexesOfInterest]) - ), ncol=18, nrow=numbSeqsOfInterest,byrow=F) - increment=0 - for( ioi in indexesOfInterest){ - listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]]) - names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi] - increment = increment + 1 - } - rowNumb=max(rowNumb)+1 - - } - } - } - colsToFormat = 11:18 - matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3) - matOutput[matOutput== " NaN"] = NA - - - - colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S", - "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S", - paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"), - paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_") - ) - fileName = paste(outputPath,outputID,".txt",sep="") - write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA) - fileName = paste(outputPath,outputID,".RData",sep="") - save(listPDFs,file=fileName) - -indelWarning = FALSE -if(sum(indelPos)>0){ - indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis."; - indelWarning = paste( indelWarning , "<UL>", sep="" ) - for(indels in names(indelPos)[indelPos]){ - indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" ) - } - indelWarning = paste( indelWarning , "</UL></P>", sep="" ) -} - -cloneWarning = FALSE -if(clonal==1){ - if(sum(matInputErrors)>0){ - cloneWarning = "<P>Warning: The following clones have sequences of unequal length."; - cloneWarning = paste( cloneWarning , "<UL>", sep="" ) - for(clone in names(matInputErrors)[matInputErrors]){ - cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" ) - } - cloneWarning = paste( cloneWarning , "</UL></P>", sep="" ) - } -} -cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
--- a/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,703 +0,0 @@ ->IGHV1-18*01 -caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga ->IGHV1-18*02 -caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc ->IGHV1-18*03 -caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga ->IGHV1-18*04 -caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga ->IGHV1-2*01 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga ->IGHV1-2*02 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga ->IGHV1-2*03 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga ->IGHV1-2*04 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga ->IGHV1-2*05 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga ->IGHV1-24*01 -caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga ->IGHV1-3*01 -caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga ->IGHV1-3*02 -caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga ->IGHV1-38-4*01 -caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca ->IGHV1-45*01 -cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana ->IGHV1-45*02 -cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata ->IGHV1-45*03 -.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga ->IGHV1-46*01 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-46*02 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-46*03 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga ->IGHV1-58*01 -caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga ->IGHV1-58*02 -caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga ->IGHV1-68*01 -caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata ->IGHV1-69*01 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*02 -caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga ->IGHV1-69*03 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc ->IGHV1-69*04 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*05 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga ->IGHV1-69*06 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*07 -.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag ->IGHV1-69*08 -caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*09 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*10 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*11 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*12 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*13 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69*14 -caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-69-2*01 -gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga ->IGHV1-69-2*02 -.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag ->IGHV1-69D*01 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1-8*01 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg ->IGHV1-8*02 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg ->IGHV1-NL1*01 -caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga ->IGHV1/OR15-1*01 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga ->IGHV1/OR15-1*02 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga ->IGHV1/OR15-1*03 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga ->IGHV1/OR15-1*04 -caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga ->IGHV1/OR15-2*01 -caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga ->IGHV1/OR15-2*02 -caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga ->IGHV1/OR15-2*03 -caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga ->IGHV1/OR15-3*01 -caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga ->IGHV1/OR15-3*02 -caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga ->IGHV1/OR15-3*03 -caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga ->IGHV1/OR15-4*01 -caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga ->IGHV1/OR15-5*01 -.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga ->IGHV1/OR15-5*02 -caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga ->IGHV1/OR15-9*01 -caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga ->IGHV1/OR21-1*01 -caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga ->IGHV2-10*01 -caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac ->IGHV2-26*01 -caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac ->IGHV2-5*01 -cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac ->IGHV2-5*02 -cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac ->IGHV2-5*03 -................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt ->IGHV2-5*04| -cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac ->IGHV2-5*05 -cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac ->IGHV2-5*06 -cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga ->IGHV2-5*08 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac ->IGHV2-5*09 -caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac ->IGHV2-70*01 -caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac ->IGHV2-70*02 -caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg ->IGHV2-70*03 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg ->IGHV2-70*04 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac ->IGHV2-70*05 -..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga ->IGHV2-70*06 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg ->IGHV2-70*07 -caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg ->IGHV2-70*08 -caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg ->IGHV2-70*09 -cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg ->IGHV2-70*10 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac ->IGHV2-70*11 -cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac ->IGHV2-70*12 -cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac ->IGHV2-70*13 -caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac ->IGHV2-70D*04 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac ->IGHV2-70D*14 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac ->IGHV2/OR16-5*01 -caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag ->IGHV3-11*01 -caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-11*03 -caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga ->IGHV3-11*04 -caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-11*05 -caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-11*06 -caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-13*01 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga ->IGHV3-13*02 -gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga ->IGHV3-13*03 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga ->IGHV3-13*04 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga ->IGHV3-13*05 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga ->IGHV3-15*01 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*02 -gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*03 -gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*04 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*05 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*06 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*07 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga ->IGHV3-15*08 -gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg ->IGHV3-16*01 -gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa ->IGHV3-16*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa ->IGHV3-19*01 -acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa ->IGHV3-20*01 -gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga ->IGHV3-20*02 -gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga ->IGHV3-21*01 -gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-21*02 -gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-21*03 -gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga ->IGHV3-21*04 -gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-22*01 -gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga ->IGHV3-22*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga ->IGHV3-23*01 -gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga ->IGHV3-23*02 -gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga ->IGHV3-23*03 -gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga ->IGHV3-23*04 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga ->IGHV3-23*05 -gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa ->IGHV3-23D*01 -gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga ->IGHV3-23D*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga ->IGHV3-25*01 -gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga ->IGHV3-25*02 -gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga ->IGHV3-25*03 -gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga ->IGHV3-25*04 -gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga ->IGHV3-25*05 -gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga ->IGHV3-29*01 -gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt ->IGHV3-30*01 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*02 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-30*03 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*04 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*05 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga ->IGHV3-30*06 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*07 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*08 -caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga ->IGHV3-30*09 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*10 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*11 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*12 -caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*13 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*14 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*15 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*16 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*17 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30*18 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-30*19 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30-2*01 -gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca ->IGHV3-30-22*01 -gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc ->IGHV3-30-3*01 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30-3*02 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-30-3*03 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-30-33*01 -gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg ->IGHV3-30-42*01 -gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt ->IGHV3-30-5*01 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-30-5*02 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-30-52*01 -gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg ->IGHV3-32*01 -gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc ->AIGHV3-33*01 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-33*02 -caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-33*03 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-33*04 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-33*05 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-33*06 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga ->IGHV3-33-2*01 -gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca ->IGHV3-35*01 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa ->IGHV3-38*01| -gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata ->IGHV3-38*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata ->IGHV3-38*03 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata ->IGHV3-38-3*01 -gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga ->IGHV3-43*01 -gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata ->IGHV3-43*02 -gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata ->IGHV3-43D*01 -gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata ->IGHV3-47*01 -gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga ->IGHV3-47*02 -gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga ->IGHV3-48*01 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-48*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga ->IGHV3-48*03 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga ->IGHV3-48*04 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-49*01 -gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga ->IGHV3-49*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga ->IGHV3-49*03 -gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga ->IGHV3-49*04 -gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga ->IGHV3-49*05 -gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga ->IGHV3-52*01 -gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg ->IGHV3-52*02 -gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga ->IGHV3-52*03 -gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga ->IGHV3-53*01 -gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-53*02 -gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-53*03 -gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga ->IGHV3-53*04 -gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga ->IGHV3-54*01 -gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt ->IGHV3-54*02 -gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg ->IGHV3-54*04 -gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt ->IGHV3-62*01 -gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga ->IGHV3-63*01 -gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt ->IGHV3-63*02 -gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa ->IGHV3-64*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga ->IGHV3-64*02 -gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga ->IGHV3-64*03 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga ->IGHV3-64*04 -caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-64*05 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga ->IGHV3-64D*06 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga ->IGHV3-66*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-66*02 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga ->IGHV3-66*03 -gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga ->IGHV3-66*04 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca ->IGHV3-69-1*01 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-69-1*02 -gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga ->IGHV3-7*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-7*02 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga ->IGHV3-7*03 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-71*01 -gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga ->IGHV3-71*02 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga ->IGHV3-71*03 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga ->IGHV3-72*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga ->IGHV3-72*02 -....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat ->IGHV3-73*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca ->IGHV3-73*02 -gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca ->IGHV3-74*01 -gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga ->IGHV3-74*02 -gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga ->IGHV3-74*03 -gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga ->IGHV3-9*01 -gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata ->IGHV3-9*02 -gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata ->IGHV3-9*03 -gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata ->IGHV3-NL1*01 -caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga ->IGHV3/OR15-7*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga ->IGHV3/OR15-7*02 -gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga ->IGHV3/OR15-7*03 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga ->IGHV3/OR15-7*05 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga ->IGHV3/OR16-10*01 -gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga ->IGHV3/OR16-10*02 -gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga ->IGHV3/OR16-10*03 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga ->IGHV3/OR16-12*01 -gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga ->IGHV3/OR16-13*01 -gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga ->IGHV3/OR16-14*01 -gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga ->IGHV3/OR16-15*01 -gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa ->IGHV3/OR16-15*02 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga ->IGHV3/OR16-16*01 -gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga ->IGHV3/OR16-6*02 -gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg ->IGHV3/OR16-8*01 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa ->IGHV3/OR16-8*02 -gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca ->IGHV3/OR16-9*01 -gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa ->IGHV4-28*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa ->IGHV4-28*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa ->IGHV4-28*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga ->IGHV4-28*04 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga ->IGHV4-28*05 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa ->IGHV4-28*06 -caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa ->IGHV4-28*07 -caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa ->IGHV4-30-2*01 -cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga ->IGHV4-30-2*02 -cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg ->IGHV4-30-2*03 -cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca ->IGHV4-30-2*04 -...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga ->IGHV4-30-2*05 -cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga ->IGHV4-30-2*06 -cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga ->IGHV4-30-4*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga ->IGHV4-30-4*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga ->IGHV4-30-4*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg ->XIGHV4-30-4*04 -caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg ->IGHV4-30-4*05 -..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga ->IGHV4-30-4*06 -...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga ->IGHV4-30-4*07 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga ->IGHV4-31*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-31*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-31*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-31*04 -caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg ->IGHV4-31*05 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg ->IGHV4-31*06 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg ->IGHV4-31*07 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg ->IGHV4-31*08 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg ->IGHV4-31*09 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-31*10 -caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga ->IGHV4-34*01 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg ->IGHV4-34*02 -caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg ->IGHV4-34*03 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-34*04 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg ->IGHV4-34*05 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg ->IGHV4-34*06 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-34*07 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-34*08 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg ->IGHV4-34*09 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-34*10 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata ->IGHV4-34*11 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga ->IGHV4-34*12 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga ->IGHV4-34*13 -...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg ->IGHV4-38-2*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga ->IGHV4-38-2*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga ->IGHV4-39*01 -cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca ->IGHV4-39*02 -cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga ->IGHV4-39*03 -cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg ->IGHV4-39*04 -..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac ->IGHV4-39*05 -cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg ->IGHV4-39*06 -cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-39*07 -cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-4*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga ->IGHV4-4*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-4*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-4*04 -caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-4*05 -caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-4*06 -............................................................ -...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-4*07 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-4*08 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga ->IGHV4-55*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata ->IGHV4-55*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata ->IGHV4-55*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-55*04 -caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-55*05 -caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg ->IGHV4-55*06 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg ->IGHV4-55*07 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg ->IGHV4-55*08 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4-55*09 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa ->IGHV4-59*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga ->IGHV4-59*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga ->IGHV4-59*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg ->IGHV4-59*04 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg ->IGHV4-59*05 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg ->IGHV4-59*06 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg ->IGHV4-59*07 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga ->IGHV4-59*08 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca ->IGHV4-59*09 -...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg ->IGHV4-59*10 -caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata ->IGHV4-61*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga ->IGHV4-61*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga ->IGHV4-61*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga ->IGHV4-61*04 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg ->IGHV4-61*05 -cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga ->IGHV4-61*06 -...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga ->IGHV4-61*07 -...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca ->IGHV4-61*08 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga ->IGHV4/OR15-8*01 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4/OR15-8*02 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV4/OR15-8*03 -caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga ->IGHV5-10-1*01 -gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga ->IGHV5-10-1*02 -gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca ->IGHV5-10-1*03 -gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga ->IGHV5-10-1*04 -gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga ->IGHV5-51*01 -gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca ->IGHV5-51*02 -gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca ->IGHV5-51*03 -gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga ->IGHV5-51*04 -gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga ->IGHV5-51*05 -.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg ->IGHV5-78*01 -gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga ->IGHV6-1*01 -caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga ->IGHV6-1*02 -caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga ->IGHV7-34-1*01 -...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta ->IGHV7-34-1*02 -...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta ->IGHV7-4-1*01 -caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga ->IGHV7-4-1*02 -caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga ->IGHV7-4-1*03 -caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg ->IGHV7-4-1*04 -caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga ->IGHV7-4-1*05 -caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga ->AIGHV7-40*03| -ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga ->IGHV7-81*01 -caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- a/baseline/IMGTVHreferencedataset20161215.fa Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ ->IGHV1-18*01 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-18*02 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc >IGHV1-18*03 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1-18*04 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-2*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*05 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-24*01 caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-3*01 caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga >IGHV1-3*02 caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga >IGHV1-38-4*01 caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca >IGHV1-45*01 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana >IGHV1-45*02 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata >IGHV1-45*03 .....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga >IGHV1-46*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga >IGHV1-58*01 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-58*02 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-68*01 caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata >IGHV1-69*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*02 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc >IGHV1-69*04 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*05 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*06 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*07 .....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag >IGHV1-69*08 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*09 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*10 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*11 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*12 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*13 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*14 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69-2*01 gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-69-2*02 .....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag >IGHV1-69D*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-8*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-8*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-NL1*01 caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga >IGHV1/OR15-1*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga >IGHV1/OR15-1*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-1*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga >IGHV1/OR15-1*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-2*01 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*02 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*03 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-3*01 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-3*02 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1/OR15-3*03 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-4*01 caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-5*01 .....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-5*02 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-9*01 caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV1/OR21-1*01 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV2-10*01 caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac >IGHV2-26*01 caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac >IGHV2-5*01 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*02 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*03 ................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt >IGHV2-5*04 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac >IGHV2-5*05 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*06 cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga >IGHV2-5*08 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*09 caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*01 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*02 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*03 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac >IGHV2-70*05 ..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga >IGHV2-70*06 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*07 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*08 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*09 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg >IGHV2-70*10 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*11 cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*12 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*13 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac >IGHV2-70D*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70D*14 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2/OR16-5*01 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag >IGHV3-11*01 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*03 caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga >IGHV3-11*04 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-11*05 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*06 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-13*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*02 gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga >IGHV3-13*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*05 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-15*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*02 gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*03 gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*04 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*06 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*07 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*08 gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3-16*01 gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-16*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-19*01 acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-20*01 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-20*02 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-21*01 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*02 gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*03 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga >IGHV3-21*04 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-22*01 gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-22*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-23*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*02 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*03 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*05 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa >IGHV3-23D*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-25*01 gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*02 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*03 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga >IGHV3-25*04 gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga >IGHV3-25*05 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-29*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga >IGHV3-30*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*07 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*08 caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-30*09 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*10 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*11 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*12 caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*13 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*14 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*15 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*16 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*17 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*18 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*19 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca >IGHV3-30-22*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc >IGHV3-30-3*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-3*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-3*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-33*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg >IGHV3-30-42*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30-5*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-5*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-52*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg >IGHV3-32*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc >IGHV3-33*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*02 caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca >IGHV3-35*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa >IGHV3-38*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata >IGHV3-38*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38-3*01 gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga >IGHV3-43*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43*02 gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43D*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata >IGHV3-47*01 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga >IGHV3-47*02 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga >IGHV3-48*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-48*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-49*01 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*02 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*03 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*04 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-52*01 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg >IGHV3-52*02 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-52*03 gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-53*01 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*02 gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga >IGHV3-53*04 gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga >IGHV3-54*01 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-54*02 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg >IGHV3-54*04 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-62*01 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga >IGHV3-63*01 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt >IGHV3-63*02 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa >IGHV3-64*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*02 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64*04 caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-64*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64D*06 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-66*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-66*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*04 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca >IGHV3-69-1*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-69-1*02 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-7*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga >IGHV3-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*01 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga >IGHV3-71*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-72*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga >IGHV3-72*02 ....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat >IGHV3-73*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-73*02 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-74*01 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-74*02 gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga >IGHV3-74*03 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-9*01 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*02 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*03 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata >IGHV3-NL1*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3/OR15-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*02 gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga >IGHV3/OR16-10*01 gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*02 gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga >IGHV3/OR16-12*01 gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga >IGHV3/OR16-13*01 gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-14*01 gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-15*01 gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa >IGHV3/OR16-15*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga >IGHV3/OR16-16*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga >IGHV3/OR16-6*02 gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3/OR16-8*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa >IGHV3/OR16-8*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca >IGHV3/OR16-9*01 gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa >IGHV4-28*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga >IGHV4-28*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga >IGHV4-28*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*06 caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa >IGHV4-28*07 caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-30-2*01 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-2*02 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-30-2*03 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca >IGHV4-30-2*04 ...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-30-2*05 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-2*06 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-30-4*04 caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg >IGHV4-30-4*05 ..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*06 ...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-31*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*04 caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-31*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg >IGHV4-31*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-31*10 caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga >IGHV4-34*01 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*02 caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*03 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*04 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*05 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*06 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*07 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*08 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg >IGHV4-34*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-34*10 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-34*11 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-34*12 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga >IGHV4-34*13 ...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-38-2*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga >IGHV4-38-2*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-39*01 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca >IGHV4-39*02 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga >IGHV4-39*03 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-39*04 ..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac >IGHV4-39*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-39*06 cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-39*07 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*05 caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*06 ...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-55*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg >IGHV4-55*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-55*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-55*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-59*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-59*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-59*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga >IGHV4-59*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca >IGHV4-59*09 ...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg >IGHV4-59*10 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-61*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-61*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg >IGHV4-61*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga >IGHV4-61*06 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-61*07 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca >IGHV4-61*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV5-10-1*01 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*02 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca >IGHV5-10-1*03 gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*04 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*01 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*02 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*03 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*04 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*05 .....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg >IGHV5-78*01 gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga >IGHV6-1*01 caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV6-1*02 caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV7-34-1*01 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-34-1*02 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-4-1*01 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga >IGHV7-4-1*02 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*03 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg >IGHV7-4-1*04 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*05 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga >IGHV7-40*03 ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga >IGHV7-81*01 caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- a/baseline/IMGTVHreferencedataset20161215.fasta Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ ->IGHV1-18*01 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-18*02 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc >IGHV1-18*03 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1-18*04 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-2*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*05 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-24*01 caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-3*01 caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga >IGHV1-3*02 caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga >IGHV1-38-4*01 caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca >IGHV1-45*01 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana >IGHV1-45*02 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata >IGHV1-45*03 .....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga >IGHV1-46*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga >IGHV1-58*01 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-58*02 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-68*01 caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata >IGHV1-69*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*02 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc >IGHV1-69*04 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*05 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*06 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*07 .....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag >IGHV1-69*08 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*09 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*10 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*11 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*12 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*13 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*14 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69-2*01 gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-69-2*02 .....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag >IGHV1-69D*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-8*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-8*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-NL1*01 caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga >IGHV1/OR15-1*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga >IGHV1/OR15-1*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-1*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga >IGHV1/OR15-1*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-2*01 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*02 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*03 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-3*01 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-3*02 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1/OR15-3*03 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-4*01 caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-5*01 .....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-5*02 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-9*01 caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV1/OR21-1*01 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV2-10*01 caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac >IGHV2-26*01 caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac >IGHV2-5*01 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*02 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*03 ................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt >IGHV2-5*04 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac >IGHV2-5*05 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*06 cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga >IGHV2-5*08 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*09 caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*01 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*02 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*03 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac >IGHV2-70*05 ..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga >IGHV2-70*06 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*07 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*08 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*09 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg >IGHV2-70*10 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*11 cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*12 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*13 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac >IGHV2-70D*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70D*14 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2/OR16-5*01 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag >IGHV3-11*01 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*03 caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga >IGHV3-11*04 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-11*05 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*06 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-13*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*02 gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga >IGHV3-13*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*05 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-15*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*02 gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*03 gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*04 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*06 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*07 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*08 gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3-16*01 gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-16*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-19*01 acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-20*01 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-20*02 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-21*01 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*02 gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*03 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga >IGHV3-21*04 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-22*01 gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-22*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-23*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*02 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*03 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*05 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa >IGHV3-23D*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-25*01 gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*02 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*03 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga >IGHV3-25*04 gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga >IGHV3-25*05 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-29*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga >IGHV3-30*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*07 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*08 caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-30*09 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*10 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*11 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*12 caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*13 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*14 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*15 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*16 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*17 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*18 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*19 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca >IGHV3-30-22*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc >IGHV3-30-3*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-3*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-3*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-33*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg >IGHV3-30-42*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30-5*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-5*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-52*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg >IGHV3-32*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc >IGHV3-33*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*02 caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca >IGHV3-35*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa >IGHV3-38*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata >IGHV3-38*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38-3*01 gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga >IGHV3-43*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43*02 gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43D*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata >IGHV3-47*01 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga >IGHV3-47*02 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga >IGHV3-48*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-48*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-49*01 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*02 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*03 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*04 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-52*01 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg >IGHV3-52*02 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-52*03 gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-53*01 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*02 gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga >IGHV3-53*04 gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga >IGHV3-54*01 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-54*02 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg >IGHV3-54*04 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-62*01 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga >IGHV3-63*01 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt >IGHV3-63*02 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa >IGHV3-64*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*02 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64*04 caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-64*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64D*06 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-66*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-66*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*04 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca >IGHV3-69-1*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-69-1*02 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-7*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga >IGHV3-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*01 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga >IGHV3-71*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-72*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga >IGHV3-72*02 ....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat >IGHV3-73*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-73*02 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-74*01 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-74*02 gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga >IGHV3-74*03 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-9*01 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*02 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*03 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata >IGHV3-NL1*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3/OR15-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*02 gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga >IGHV3/OR16-10*01 gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*02 gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga >IGHV3/OR16-12*01 gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga >IGHV3/OR16-13*01 gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-14*01 gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-15*01 gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa >IGHV3/OR16-15*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga >IGHV3/OR16-16*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga >IGHV3/OR16-6*02 gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3/OR16-8*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa >IGHV3/OR16-8*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca >IGHV3/OR16-9*01 gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa >IGHV4-28*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga >IGHV4-28*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga >IGHV4-28*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*06 caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa >IGHV4-28*07 caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-30-2*01 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-2*02 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-30-2*03 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca >IGHV4-30-2*04 ...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-30-2*05 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-2*06 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-30-4*04 caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg >IGHV4-30-4*05 ..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*06 ...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-31*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*04 caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-31*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg >IGHV4-31*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-31*10 caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga >IGHV4-34*01 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*02 caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*03 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*04 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*05 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*06 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*07 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*08 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg >IGHV4-34*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-34*10 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-34*11 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-34*12 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga >IGHV4-34*13 ...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-38-2*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga >IGHV4-38-2*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-39*01 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca >IGHV4-39*02 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga >IGHV4-39*03 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-39*04 ..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac >IGHV4-39*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-39*06 cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-39*07 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*05 caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*06 ...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-55*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg >IGHV4-55*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-55*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-55*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-59*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-59*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-59*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga >IGHV4-59*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca >IGHV4-59*09 ...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg >IGHV4-59*10 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-61*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-61*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg >IGHV4-61*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga >IGHV4-61*06 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-61*07 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca >IGHV4-61*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV5-10-1*01 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*02 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca >IGHV5-10-1*03 gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*04 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*01 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*02 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*03 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*04 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*05 .....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg >IGHV5-78*01 gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga >IGHV6-1*01 caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV6-1*02 caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV7-34-1*01 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-34-1*02 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-4-1*01 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga >IGHV7-4-1*02 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*03 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg >IGHV7-4-1*04 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*05 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga >IGHV7-40*03 ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga >IGHV7-81*01 caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- a/baseline/baseline_url.txt Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -http://selection.med.yale.edu/baseline/ \ No newline at end of file
--- a/baseline/comparePDFs.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,225 +0,0 @@ -options("warn"=-1) - -#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r -# Compute p-value of two distributions -compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ -#print(c(length(dens1),length(dens2))) -if(length(dens1)>1 & length(dens2)>1 ){ - dens1<-dens1/sum(dens1) - dens2<-dens2/sum(dens2) - cum2 <- cumsum(dens2)-dens2/2 - tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) - #print(tmp) - if(tmp>0.5)tmp<-tmp-1 - return( tmp ) - } - else { - return(NA) - } - #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) -} - - -require("grid") -arg <- commandArgs(TRUE) -#arg <- c("300143","4","5") -arg[!arg=="clonal"] -input <- arg[1] -output <- arg[2] -rowIDs <- as.numeric( sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } ) ) - -numbSeqs = length(rowIDs) - -if ( is.na(rowIDs[1]) | numbSeqs>10 ) { - stop( paste("Error: Please select between one and 10 seqeunces to compare.") ) -} - -#load( paste("output/",sessionID,".RData",sep="") ) -load( input ) -#input - -xMarks = seq(-20,20,length.out=4001) - -plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){ - yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1 - - if(length(xlim==2)){ - xMin=xlim[1] - xMax=xlim[2] - } else { - xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] - xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] - xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] - xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] - - xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1] - xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1] - xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])] - xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])] - - xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE) - xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE) - } - - sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x - grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex)) - x <- sigma - pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc")) - #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex)) - pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05))) - grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc") - grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc") - grid.rect() - grid.xaxis(gp = gpar(cex=cex/1.1)) - yticks = pretty(c(-yMax,yMax),8) - yticks = yticks[yticks>(-yMax) & yticks<(yMax)] - grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1)) - if(length(listPDFs[pdf1][[1]][["CDR"]])>1){ - ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y - grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2)) - } - if(length(listPDFs[pdf1][[1]][["FWR"]])>1){ - yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y - grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2)) - } - - if(length(listPDFs[pdf2][[1]][["CDR"]])>1){ - ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y - grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2)) - } - if(length(listPDFs[pdf2][[1]][["FWR"]])>1){ - yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y - grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2)) - } - - grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1)) - grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3)) - - grid.text("All", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex)) - grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex)) - - if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){ - pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]]) - pval = formatC(as.numeric(pCDRFWR),digits=3) - grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2)) - } - grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5)) - grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5)) - popViewport(2) -} -#plot_grid_s(1) - - -p2col<-function(p=0.01){ - breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51) - i<-findInterval(p,breaks) - cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0), - rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) ) - return(cols[i]) -} - - -plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){ - if(upper){ - pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]]) - pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]]) - pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]]) - pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]]) - grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc") - grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc") - grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc") - grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc") - - grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925))) - grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925))) - - grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) - grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) - grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) - grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) - - - # grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex)) - # grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex)) - } - else{ - } -} - - -################################################################################## -################## The whole OCD's matrix ######################################## -################################################################################## - -#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) -pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) - -pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null"))))) - -for( seqOne in 1:numbSeqs+1){ - pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1)) - if(seqOne>2){ - grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") - grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") - grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc") - - grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center") - grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center") - } - grid.rect(gp = gpar(col=grey(0.9))) - grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center") - popViewport(1) -} - -for( seqOne in 1:numbSeqs+1){ - pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1)) - if(seqOne<=numbSeqs){ - grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") - grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") - grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc") - grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270) - grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270) - } - grid.rect(gp = gpar(col=grey(0.9))) - grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center") - popViewport(1) -} - -for( seqOne in 1:numbSeqs+1){ - for(seqTwo in 1:numbSeqs+1){ - pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne)) - if(seqTwo>seqOne){ - plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2) - grid.rect() - } - popViewport(1) - } -} - - -xMin=0 -xMax=0.01 -for(pdf1 in rowIDs){ - xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] - xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] - xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] - xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] - xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE) - xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE) -} - - - -for(i in 1:numbSeqs+1){ - for(j in (i-1):numbSeqs){ - pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1)) - grid.rect() - plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1) - popViewport(1) - } -} - -dev.off() - -cat("Success", paste(rowIDs,collapse="_"),sep=":") -
--- a/baseline/filter.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -arg = commandArgs(TRUE) -summaryfile = arg[1] -gappedfile = arg[2] -selection = arg[3] -output = arg[4] -print(paste("selection = ", selection)) - - -summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "") -gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "") - -fix_column_names = function(df){ - if("V.DOMAIN.Functionality" %in% names(df)){ - names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" - print("found V.DOMAIN.Functionality, changed") - } - if("V.DOMAIN.Functionality.comment" %in% names(df)){ - names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" - print("found V.DOMAIN.Functionality.comment, changed") - } - return(df) -} - -gappeddat = fix_column_names(gappeddat) - -#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) - -dat = cbind(gappeddat, summarydat$AA.JUNCTION) - -colnames(dat)[length(dat)] = "AA.JUNCTION" - -dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) -dat$VGene = gsub("[*].*", "", dat$VGene) - -dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) -dat$DGene = gsub("[*].*", "", dat$DGene) - -dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) -dat$JGene = gsub("[*].*", "", dat$JGene) - -print(str(dat)) - -dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) - -dat = dat[!duplicated(dat$past), ] - -print(paste("Sequences remaining after duplicate filter:", nrow(dat))) - -dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] - -print(paste("Sequences remaining after functionality filter:", nrow(dat))) - -print(paste("Sequences remaining:", nrow(dat))) - -write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/baseline/script_imgt.py Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -#import xlrd #avoid dep -import argparse -import re - -parser = argparse.ArgumentParser() -parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") -parser.add_argument("--ref", help="Reference file") -parser.add_argument("--output", help="Output file") -parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") - -args = parser.parse_args() - -print "script_imgt.py" -print "input:", args.input -print "ref:", args.ref -print "output:", args.output -print "id:", args.id - -refdic = dict() -with open(args.ref, 'rU') as ref: - currentSeq = "" - currentId = "" - for line in ref: - if line.startswith(">"): - if currentSeq is not "" and currentId is not "": - refdic[currentId[1:]] = currentSeq - currentId = line.rstrip() - currentSeq = "" - else: - currentSeq += line.rstrip() - refdic[currentId[1:]] = currentSeq - -print "Have", str(len(refdic)), "reference sequences" - -vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, -# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", -# r"(IGKV[0-3]D?-[0-9]{1,2})", -# r"(IGLV[0-9]-[0-9]{1,2})", -# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)", -# r"(TRGV[234589])", -# r"(TRDV[1-3])"] - -#vPattern = re.compile(r"|".join(vPattern)) -vPattern = re.compile("|".join(vPattern)) - -def filterGene(s, pattern): - if type(s) is not str: - return None - res = pattern.search(s) - if res: - return res.group(0) - return None - - - -currentSeq = "" -currentId = "" -first=True -with open(args.input, 'r') as i: - with open(args.output, 'a') as o: - o.write(">>>" + args.id + "\n") - outputdic = dict() - for line in i: - if first: - first = False - continue - linesplt = line.split("\t") - ref = filterGene(linesplt[1], vPattern) - if not ref or not linesplt[2].rstrip(): - continue - if ref in outputdic: - outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] - else: - outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] - #print outputdic - - for k in outputdic.keys(): - if k in refdic: - o.write(">>" + k + "\n") - o.write(refdic[k] + "\n") - for seq in outputdic[k]: - #print seq - o.write(">" + seq[0] + "\n") - o.write(seq[1] + "\n") - else: - print k + " not in reference, skipping " + k
--- a/baseline/script_xlsx.py Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -import xlrd -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") -parser.add_argument("--ref", help="Reference file") -parser.add_argument("--output", help="Output file") - -args = parser.parse_args() - -gene_column = 6 -id_column = 7 -seq_column = 8 -LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"] - - -refdic = dict() -with open(args.ref, 'r') as ref: - currentSeq = "" - currentId = "" - for line in ref.readlines(): - if line[0] is ">": - if currentSeq is not "" and currentId is not "": - refdic[currentId[1:]] = currentSeq - currentId = line.rstrip() - currentSeq = "" - else: - currentSeq += line.rstrip() - refdic[currentId[1:]] = currentSeq - -currentSeq = "" -currentId = "" -with xlrd.open_workbook(args.input, 'r') as wb: - with open(args.output, 'a') as o: - for sheet in wb.sheets(): - if sheet.cell(1,gene_column).value.find("IGHV") < 0: - print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name - continue - o.write(">>>" + sheet.name + "\n") - outputdic = dict() - for rowindex in range(1, sheet.nrows): - ref = sheet.cell(rowindex, gene_column).value.replace(">", "") - if ref in outputdic: - outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] - else: - outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] - #print outputdic - - for k in outputdic.keys(): - if k in refdic: - o.write(">>" + k + "\n") - o.write(refdic[k] + "\n") - for seq in outputdic[k]: - #print seq - o.write(">" + seq[0] + "\n") - o.write(seq[1] + "\n") - else: - print k + " not in reference, skipping " + k
--- a/baseline/wrapper.sh Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ -#!/bin/bash -dir="$(cd "$(dirname "$0")" && pwd)" - -testID=$1 -species=$2 -substitutionModel=$3 -mutabilityModel=$4 -clonal=$5 -fixIndels=$6 -region=$7 -inputs=$8 -inputs=($inputs) -IDs=$9 -IDs=($IDs) -ref=${10} -output=${11} -selection=${12} -output_table=${13} -outID="result" - -echo "$PWD" - -echo "testID = $testID" -echo "species = $species" -echo "substitutionModel = $substitutionModel" -echo "mutabilityModel = $mutabilityModel" -echo "clonal = $clonal" -echo "fixIndels = $fixIndels" -echo "region = $region" -echo "inputs = ${inputs[@]}" -echo "IDs = ${IDs[@]}" -echo "ref = $ref" -echo "output = $output" -echo "outID = $outID" - -fasta="$PWD/baseline.fasta" - - -count=0 -for current in ${inputs[@]} -do - f=$(file $current) - zipType="Zip archive" - if [[ "$f" == *"Zip archive"* ]] || [[ "$f" == *"XZ compressed data"* ]] - then - id=${IDs[$count]} - echo "id=$id" - if [[ "$f" == *"Zip archive"* ]] ; then - echo "Zip archive" - echo "unzip $input -d $PWD/files/" - unzip $current -d "$PWD/$id/" - elif [[ "$f" == *"XZ compressed data"* ]] ; then - echo "ZX archive" - echo "tar -xJf $input -C $PWD/files/" - mkdir -p "$PWD/$id/files" - tar -xJf $current -C "$PWD/$id/files/" - fi - filtered="$PWD/filtered_${id}.txt" - imgt_1_file="`find $PWD/$id -name '1_*.txt'`" - imgt_2_file="`find $PWD/$id -name '2_*.txt'`" - echo "1_Summary file: ${imgt_1_file}" - echo "2_IMGT-gapped file: ${imgt_2_file}" - echo "filter.r for $id" - Rscript $dir/filter.r ${imgt_1_file} ${imgt_2_file} "$selection" $filtered 2>&1 - - final="$PWD/final_${id}.txt" - cat $filtered | cut -f2,4,7 > $final - python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id - else - python $dir/script_xlsx.py --input $current --ref $ref --output $fasta - fi - count=$((count+1)) -done -workdir="$PWD" -cd $dir -echo "file: ${inputs[0]}" -#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1 -Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1 - -echo "$workdir/${outID}.txt" - -rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` -rows=($rows) -#unset rows[${#rows[@]}-1] - -cd $dir -Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1 -cp $workdir/result.txt ${output_table} - - - -
--- a/change_o/change_o_url.txt Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -https://changeo.readthedocs.io/en/version-0.4.4/ \ No newline at end of file
--- a/change_o/define_clones.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -input=args[1] -output=args[2] - -change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F) - -freq = data.frame(table(change.o$CLONE)) -freq2 = data.frame(table(freq$Freq)) - -freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1)) - -names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences") - -write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/change_o/define_clones.sh Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -#!/bin/bash -dir="$(cd "$(dirname "$0")" && pwd)" - -#define_clones.sh $input $noparse $scores $regions $out_file - -type=$1 -input=$2 - -mkdir -p $PWD/outdir - -cp $input $PWD/input.tab #file has to have a ".tab" extension - -if [ "bygroup" == "$type" ] ; then - mode=$3 - act=$4 - model=$5 - norm=$6 - sym=$7 - link=$8 - dist=$9 - output=${10} - output2=${11} - - DefineClones.py -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link - - Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 -else - method=$3 - output=$4 - output2=$5 - - DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method - - Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 -fi - -cp $PWD/outdir/output_clone-pass.tab $output - -rm -rf $PWD/outdir/
--- a/change_o/makedb.sh Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -#!/bin/bash -dir="$(cd "$(dirname "$0")" && pwd)" - -input=$1 -noparse=$2 -scores=$3 -regions=$4 -output=$5 - -if [ "true" == "$noparse" ] ; then - noparse="--noparse" -else - noparse="" -fi - -if [ "true" == "$scores" ] ; then - scores="--scores" -else - scores="" -fi - -if [ "true" == "$regions" ] ; then - regions="--regions" -else - regions="" -fi - -mkdir $PWD/outdir - -echo "makedb: $PWD/outdir" - -MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions - -mv $PWD/outdir/output_db-pass.tab $output - -rm -rf $PWD/outdir/
--- a/change_o/select_first_in_clone.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -input.file = args[1] -output.file = args[2] - -print("select_in_first_clone.r") -print(input.file) -print(output.file) - -input = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") - -input = input[!duplicated(input$CLONE),] - -names(input)[1] = "Sequence.ID" - -write.table(input, output.file, quote=F, sep="\t", row.names=F, col.names=T, na="")
--- a/check_unique_id.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the - -current_file = args[1] - -current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) - -if(!("Sequence number" %in% names(current))){ - stop("First argument doesn't contain the 'Sequence number' column") -} - -tbl = table(current[,"Sequence ID"]) -l_tbl = length(tbl) -check = any(tbl > 1) - -#if(l_tbl != nrow(current)){ # non unique IDs? -if(check){ - print("Sequence.ID is not unique for every sequence, adding sequence number to IDs") - for(i in 1:length(args)){ - current_file = args[i] - print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file)) - current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) - current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_") - write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T) - } -}
--- a/datatypes_conf.xml Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<datatypes> - <registration> - <datatype extension="imgt_archive" type="galaxy.datatypes.binary:CompressedArchive" display_in_upload="True" subclass="True"/> - </registration> -</datatypes>
--- a/gene_identification.py Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,226 +0,0 @@ -import re -import argparse -import time -starttime= int(time.time() * 1000) - -parser = argparse.ArgumentParser() -parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file") -parser.add_argument("--output", help="The annotated output file to be merged back with the summary file") - -args = parser.parse_args() - -infile = args.input -#infile = "test_VH-Ca_Cg_25nt/1_Summary_test_VH-Ca_Cg_25nt_241013.txt" -output = args.output -#outfile = "identified.txt" - -dic = dict() -total = 0 - - -first = True -IDIndex = 0 -seqIndex = 0 - -with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence - for line in f: - total += 1 - linesplt = line.split("\t") - if first: - print "linesplt", linesplt - IDIndex = linesplt.index("Sequence ID") - seqIndex = linesplt.index("Sequence") - first = False - continue - - ID = linesplt[IDIndex] - if len(linesplt) < 28: #weird rows without a sequence - dic[ID] = "" - else: - dic[ID] = linesplt[seqIndex] - -print "Number of input sequences:", len(dic) - -#old cm sequence: gggagtgcatccgccccaacccttttccccctcgtctcctgtgagaattccc -#old cg sequence: ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggccctgggctgcctggtcaaggactacttccccgaaccggtgacggtgtcgtggaactcaggcgccctgaccag - -#lambda/kappa reference sequence -searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg", - "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc", - "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc", - "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence - -compiledregex = {"ca": [], - "cg": [], - "ce": [], - "cm": []} - -#lambda/kappa reference sequence variable nucleotides -ca1 = {38: 't', 39: 'g', 48: 'a', 49: 'g', 51: 'c', 68: 'a', 73: 'c'} -ca2 = {38: 'g', 39: 'a', 48: 'c', 49: 'c', 51: 'a', 68: 'g', 73: 'a'} -cg1 = {0: 'c', 33: 'a', 38: 'c', 44: 'a', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'} -cg2 = {0: 'c', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'g', 132: 't'} -cg3 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'} -cg4 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'c', 132: 'c'} - -#remove last snp for shorter cg sequence --- note, also change varsInCG -del cg1[132] -del cg2[132] -del cg3[132] -del cg4[132] - -#reference sequences are cut into smaller parts of 'chunklength' length, and with 'chunklength' / 2 overlap -chunklength = 8 - -#create the chunks of the reference sequence with regular expressions for the variable nucleotides -for i in range(0, len(searchstrings["ca"]) - chunklength, chunklength / 2): - pos = i - chunk = searchstrings["ca"][i:i+chunklength] - result = "" - varsInResult = 0 - for c in chunk: - if pos in ca1.keys(): - varsInResult += 1 - result += "[" + ca1[pos] + ca2[pos] + "]" - else: - result += c - pos += 1 - compiledregex["ca"].append((re.compile(result), varsInResult)) - -for i in range(0, len(searchstrings["cg"]) - chunklength, chunklength / 2): - pos = i - chunk = searchstrings["cg"][i:i+chunklength] - result = "" - varsInResult = 0 - for c in chunk: - if pos in cg1.keys(): - varsInResult += 1 - result += "[" + "".join(set([cg1[pos], cg2[pos], cg3[pos], cg4[pos]])) + "]" - else: - result += c - pos += 1 - compiledregex["cg"].append((re.compile(result), varsInResult)) - -for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2): - compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False)) - -for i in range(0, len(searchstrings["ce"]) - chunklength + 1, chunklength / 2): - compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False)) - -def removeAndReturnMaxIndex(x): #simplifies a list comprehension - m = max(x) - index = x.index(m) - x[index] = 0 - return index - - -start_location = dict() -hits = dict() -alltotal = 0 -for key in compiledregex.keys(): #for ca/cg/cm/ce - regularexpressions = compiledregex[key] #get the compiled regular expressions - for ID in dic.keys()[0:]: #for every ID - if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists - hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} - currentIDHits = hits[ID] - seq = dic[ID] - lastindex = 0 - start_zero = len(searchstrings[key]) #allows the reference sequence to start before search sequence (start_locations of < 0) - start = [0] * (len(seq) + start_zero) - for i, regexp in enumerate(regularexpressions): #for every regular expression - relativeStartLocation = lastindex - (chunklength / 2) * i - if relativeStartLocation >= len(seq): - break - regex, hasVar = regexp - matches = regex.finditer(seq[lastindex:]) - for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop - lastindex += match.start() - start[relativeStartLocation + start_zero] += 1 - if hasVar: #if the regex has a variable nt in it - chunkstart = chunklength / 2 * i #where in the reference does this chunk start - chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end - if key == "ca": #just calculate the variable nt score for 'ca', cheaper - currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]]) - currentIDHits["ca2"] += len([1 for x in ca2 if chunkstart <= x < chunkend and ca2[x] == seq[lastindex + x - chunkstart]]) - elif key == "cg": #just calculate the variable nt score for 'cg', cheaper - currentIDHits["cg1"] += len([1 for x in cg1 if chunkstart <= x < chunkend and cg1[x] == seq[lastindex + x - chunkstart]]) - currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]]) - currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]]) - currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]]) - else: #key == "cm" #no variable regions in 'cm' or 'ce' - pass - break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped - else: #only runs if there were no hits - continue - #print "found ", regex.pattern , "at", lastindex, "adding one to", (lastindex - chunklength / 2 * i), "to the start array of", ID, "gene", key, "it's now:", start[lastindex - chunklength / 2 * i] - currentIDHits[key + "_hits"] += 1 - start_location[ID + "_" + key] = str([(removeAndReturnMaxIndex(start) + 1 - start_zero) for x in range(5) if len(start) > 0 and max(start) > 1]) - #start_location[ID + "_" + key] = str(start.index(max(start))) - - -varsInCA = float(len(ca1.keys()) * 2) -varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice -varsInCM = 0 -varsInCE = 0 - -def round_int(val): - return int(round(val)) - -first = True -seq_write_count=0 -with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence - with open(output, 'w') as o: - for line in f: - total += 1 - if first: - o.write("Sequence ID\tbest_match\tnt_hit_percentage\tchunk_hit_percentage\tstart_locations\n") - first = False - continue - linesplt = line.split("\t") - if linesplt[2] == "No results": - pass - ID = linesplt[1] - currentIDHits = hits[ID] - possibleca = float(len(compiledregex["ca"])) - possiblecg = float(len(compiledregex["cg"])) - possiblecm = float(len(compiledregex["cm"])) - possiblece = float(len(compiledregex["ce"])) - cahits = currentIDHits["ca_hits"] - cghits = currentIDHits["cg_hits"] - cmhits = currentIDHits["cm_hits"] - cehits = currentIDHits["ce_hits"] - if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene - ca1hits = currentIDHits["ca1"] - ca2hits = currentIDHits["ca2"] - if ca1hits >= ca2hits: - o.write(ID + "\tIGA1\t" + str(round_int(ca1hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") - else: - o.write(ID + "\tIGA2\t" + str(round_int(ca2hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") - elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene - cg1hits = currentIDHits["cg1"] - cg2hits = currentIDHits["cg2"] - cg3hits = currentIDHits["cg3"] - cg4hits = currentIDHits["cg4"] - if cg1hits >= cg2hits and cg1hits >= cg3hits and cg1hits >= cg4hits: #cg1 gene - o.write(ID + "\tIGG1\t" + str(round_int(cg1hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") - elif cg2hits >= cg1hits and cg2hits >= cg3hits and cg2hits >= cg4hits: #cg2 gene - o.write(ID + "\tIGG2\t" + str(round_int(cg2hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") - elif cg3hits >= cg1hits and cg3hits >= cg2hits and cg3hits >= cg4hits: #cg3 gene - o.write(ID + "\tIGG3\t" + str(round_int(cg3hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") - else: #cg4 gene - o.write(ID + "\tIGG4\t" + str(round_int(cg4hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") - else: #its a cm or ce gene - if cmhits >= cehits: - o.write(ID + "\tIGM\t100\t" + str(round_int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n") - else: - o.write(ID + "\tIGE\t100\t" + str(round_int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n") - seq_write_count += 1 - -print "Time: %i" % (int(time.time() * 1000) - starttime) - -print "Number of sequences written to file:", seq_write_count - - - - -
--- a/imgt_loader.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -summ.file = args[1] -aa.file = args[2] -junction.file = args[3] -out.file = args[4] - -summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T) -aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T) -junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T) - -fix_column_names = function(df){ - if("V.DOMAIN.Functionality" %in% names(df)){ - names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" - print("found V.DOMAIN.Functionality, changed") - } - if("V.DOMAIN.Functionality.comment" %in% names(df)){ - names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" - print("found V.DOMAIN.Functionality.comment, changed") - } - return(df) -} - -summ = fix_column_names(summ) -aa = fix_column_names(aa) -junction = fix_column_names(junction) - -old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation') -old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT') -old_junction_columns=c('JUNCTION') - -added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence') -added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT') - -added_junction_columns=c('P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION') -added_junction_columns=c(added_junction_columns, 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') - -out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")] - -out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"] -out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"] - -out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"] -out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"] - -out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"] -out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"] - -out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"] -out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"])) -out[,"Strand"] = summ[,"Orientation"] -out[,"CDR3.Found.How"] = "a" - -out[,added_summary_columns] = summ[,added_summary_columns] - -out[,added_sequence_columns] = aa[,added_sequence_columns] - -out[,added_junction_columns] = junction[,added_junction_columns] - -out[,"Top V Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"])) -out[,"Top D Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"])) -out[,"Top J Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"])) - -out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')] - -names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') - -out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"]) - -fltr = out[,"VDJ Frame"] == "in-frame" -if(any(fltr, na.rm = T)){ - out[fltr, "VDJ Frame"] = "In-frame" -} - -fltr = out[,"VDJ Frame"] == "null" -if(any(fltr, na.rm = T)){ - out[fltr, "VDJ Frame"] = "Out-of-frame" -} - -fltr = out[,"VDJ Frame"] == "out-of-frame" -if(any(fltr, na.rm = T)){ - out[fltr, "VDJ Frame"] = "Out-of-frame" -} - -fltr = out[,"VDJ Frame"] == "" -if(any(fltr, na.rm = T)){ - out[fltr, "VDJ Frame"] = "Out-of-frame" -} - -for(col in c('Top V Gene','Top D Gene','Top J Gene')){ - out[,col] = as.character(out[,col]) - fltr = out[,col] == "" - if(any(fltr, na.rm = T)){ - out[fltr,col] = "NA" - } -} - -write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T)
--- a/merge.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -input.1 = args[1] -input.2 = args[2] - -fields.1 = args[3] -fields.2 = args[4] - -field.1 = args[5] -field.2 = args[6] - -output = args[7] - -dat1 = read.table(input.1, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL) -if(fields.1 != "all"){ - fields.1 = unlist(strsplit(fields.1, ",")) - dat1 = dat1[,fields.1] -} -dat2 = read.table(input.2, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL) -if(fields.2 != "all"){ - fields.2 = unlist(strsplit(fields.2, ",")) - dat2 = dat2[,fields.2] -} - -dat3 = merge(dat1, dat2, by.x=field.1, by.y=field.2) - -write.table(dat3, output, sep="\t",quote=F,row.names=F,col.names=T)
--- a/merge_and_filter.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,304 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - - -summaryfile = args[1] -sequencesfile = args[2] -mutationanalysisfile = args[3] -mutationstatsfile = args[4] -hotspotsfile = args[5] -aafile = args[6] -gene_identification_file= args[7] -output = args[8] -before.unique.file = args[9] -unmatchedfile = args[10] -method=args[11] -functionality=args[12] -unique.type=args[13] -filter.unique=args[14] -filter.unique.count=as.numeric(args[15]) -class.filter=args[16] -empty.region.filter=args[17] - -print(paste("filter.unique.count:", filter.unique.count)) - -summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") - -fix_column_names = function(df){ - if("V.DOMAIN.Functionality" %in% names(df)){ - names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" - print("found V.DOMAIN.Functionality, changed") - } - if("V.DOMAIN.Functionality.comment" %in% names(df)){ - names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" - print("found V.DOMAIN.Functionality.comment, changed") - } - return(df) -} - -fix_non_unique_ids = function(df){ - df$Sequence.ID = paste(df$Sequence.ID, 1:nrow(df)) - return(df) -} - -summ = fix_column_names(summ) -sequences = fix_column_names(sequences) -mutationanalysis = fix_column_names(mutationanalysis) -mutationstats = fix_column_names(mutationstats) -hotspots = fix_column_names(hotspots) -AAs = fix_column_names(AAs) - -if(method == "blastn"){ - #"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" - gene_identification = gene_identification[!duplicated(gene_identification$qseqid),] - ref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52)) - gene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T) - gene_identification$chunk_hit_percentage = (gene_identification$length / gene_identification$ref.length) * 100 - gene_identification = gene_identification[,c("qseqid", "chunk_hit_percentage", "pident", "qstart", "sseqid")] - colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match") -} - -#print("Summary analysis files columns") -#print(names(summ)) - - - -input.sequence.count = nrow(summ) -print(paste("Number of sequences in summary file:", input.sequence.count)) - -filtering.steps = data.frame(character(0), numeric(0)) - -filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count)) - -filtering.steps[,1] = as.character(filtering.steps[,1]) -filtering.steps[,2] = as.character(filtering.steps[,2]) -#filtering.steps[,3] = as.numeric(filtering.steps[,3]) - -#print("summary files columns") -#print(names(summ)) - -summ = merge(summ, gene_identification, by="Sequence.ID") - -print(paste("Number of sequences after merging with gene identification:", nrow(summ))) - -summ = summ[summ$Functionality != "No results",] - -print(paste("Number of sequences after 'No results' filter:", nrow(summ))) - -filtering.steps = rbind(filtering.steps, c("After 'No results' filter", nrow(summ))) - -if(functionality == "productive"){ - summ = summ[summ$Functionality == "productive (see comment)" | summ$Functionality == "productive",] -} else if (functionality == "unproductive"){ - summ = summ[summ$Functionality == "unproductive (see comment)" | summ$Functionality == "unproductive",] -} else if (functionality == "remove_unknown"){ - summ = summ[summ$Functionality != "No results" & summ$Functionality != "unknown (see comment)" & summ$Functionality != "unknown",] -} - -print(paste("Number of sequences after functionality filter:", nrow(summ))) - -filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ))) - -if(F){ #to speed up debugging - set.seed(1) - summ = summ[sample(nrow(summ), floor(nrow(summ) * 0.03)),] - print(paste("Number of sequences after sampling 3%:", nrow(summ))) - - filtering.steps = rbind(filtering.steps, c("Number of sequences after sampling 3%", nrow(summ))) -} - -print("mutation analysis files columns") -print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])])) - -result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID") - -print(paste("Number of sequences after merging with mutation analysis file:", nrow(result))) - -#print("mutation stats files columns") -#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])])) - -result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID") - -print(paste("Number of sequences after merging with mutation stats file:", nrow(result))) - -print("hotspots files columns") -print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])])) - -result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID") - -print(paste("Number of sequences after merging with hotspots file:", nrow(result))) - -print("sequences files columns") -print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")) - -sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")] -names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") -result = merge(result, sequences, by="Sequence.ID", all.x=T) - -AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")] -names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA") -result = merge(result, AAs, by="Sequence.ID", all.x=T) - -print(paste("Number of sequences in result after merging with sequences:", nrow(result))) - -result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele) -result$VGene = gsub("[*].*", "", result$VGene) -result$DGene = gsub("^Homsap ", "", result$D.GENE.and.allele) -result$DGene = gsub("[*].*", "", result$DGene) -result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele) -result$JGene = gsub("[*].*", "", result$JGene) - -splt = strsplit(class.filter, "_")[[1]] -chunk_hit_threshold = as.numeric(splt[1]) -nt_hit_threshold = as.numeric(splt[2]) - -higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold) - -if(!all(higher_than, na.rm=T)){ #check for no unmatched - result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"]) -} - -if(class.filter == "101_101"){ - result$best_match = "all" -} - -write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) - -print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "", na.rm=T))) -print(paste("Number of empty FR2 sequences:", sum(result$FR2.IMGT.seq == "", na.rm=T))) -print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == "", na.rm=T))) -print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == "", na.rm=T))) - -if(empty.region.filter == "leader"){ - result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] -} else if(empty.region.filter == "FR1"){ - result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] -} else if(empty.region.filter == "CDR1"){ - result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] -} else if(empty.region.filter == "FR2"){ - result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] -} - -print(paste("After removal sequences that are missing a gene region:", nrow(result))) -filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) - -if(empty.region.filter == "leader"){ - result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] -} else if(empty.region.filter == "FR1"){ - result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] -} else if(empty.region.filter == "CDR1"){ - result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] -} else if(empty.region.filter == "FR2"){ - result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] -} - -print(paste("Number of sequences in result after n filtering:", nrow(result))) -filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) - -cleanup_columns = c("FR1.IMGT.Nb.of.mutations", - "CDR1.IMGT.Nb.of.mutations", - "FR2.IMGT.Nb.of.mutations", - "CDR2.IMGT.Nb.of.mutations", - "FR3.IMGT.Nb.of.mutations") - -for(col in cleanup_columns){ - result[,col] = gsub("\\(.*\\)", "", result[,col]) - result[,col] = as.numeric(result[,col]) - result[is.na(result[,col]),] = 0 -} - -write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) - - -if(filter.unique != "no"){ - clmns = names(result) - if(filter.unique == "remove_vjaa"){ - result$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA) - } else if(empty.region.filter == "leader"){ - result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "FR1"){ - result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "CDR1"){ - result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "FR2"){ - result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } - - if(grepl("remove", filter.unique)){ - result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] - unique.defs = data.frame(table(result$unique.def)) - unique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,] - result = result[result$unique.def %in% unique.defs$Var1,] - } - - if(filter.unique != "remove_vjaa"){ - result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it - } - - result = result[!duplicated(result$unique.def),] -} - -write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) - -filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) - -print(paste("Number of sequences in result after unique filtering:", nrow(result))) - -if(nrow(summ) == 0){ - stop("No data remaining after filter") -} - -result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it - -#result$past = "" -#cls = unlist(strsplit(unique.type, ",")) -#for (i in 1:nrow(result)){ -# result[i,"past"] = paste(result[i,cls], collapse=":") -#} - - - -result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) - -result.matched = result[!grepl("unmatched", result$best_match),] -result.unmatched = result[grepl("unmatched", result$best_match),] - -result = rbind(result.matched, result.unmatched) - -result = result[!(duplicated(result$past)), ] - -result = result[,!(names(result) %in% c("past", "best_match_class"))] - -print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result))) - -filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result))) - -unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] - -print(paste("Number of rows in result:", nrow(result))) -print(paste("Number of rows in unmatched:", nrow(unmatched))) - -matched.sequences = result[!grepl("^unmatched", result$best_match),] - -write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\t",quote=F,row.names=F,col.names=T) - -matched.sequences.count = nrow(matched.sequences) -unmatched.sequences.count = sum(grepl("^unmatched", result$best_match)) -if(matched.sequences.count <= unmatched.sequences.count){ - print("WARNING NO MATCHED (SUB)CLASS SEQUENCES!!") -} - -filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count)) -filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count)) -filtering.steps[,2] = as.numeric(filtering.steps[,2]) -filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2) - -write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\t",quote=F,row.names=F,col.names=F) - -write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T) -write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
--- a/mutation_column_checker.py Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -import re - -mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") - -with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: - first = True - fr3_index = -1 - for i, line in enumerate(file_handle): - line_split = line.split("\t") - if first: - fr3_index = line_split.index("FR3-IMGT") - first = False - continue - - if len(line_split) < fr3_index: - continue - - fr3_data = line_split[fr3_index] - if len(fr3_data) > 5: - try: - test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] - except: - print(line_split[1]) - print("Something went wrong at line {line} with:".format(line=line_split[0])) - #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) - if i % 100000 == 0: - print(i)
--- a/naive_output.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -naive.file = args[1] -shm.file = args[2] -output.file.ca = args[3] -output.file.cg = args[4] -output.file.cm = args[5] - -naive = read.table(naive.file, sep="\t", header=T, quote="", fill=T) -shm.merge = read.table(shm.file, sep="\t", header=T, quote="", fill=T) - - -final = merge(naive, shm.merge[,c("Sequence.ID", "best_match")], by.x="ID", by.y="Sequence.ID") -print(paste("nrow final:", nrow(final))) -names(final)[names(final) == "best_match"] = "Sample" -final.numeric = final[,sapply(final, is.numeric)] -final.numeric[is.na(final.numeric)] = 0 -final[,sapply(final, is.numeric)] = final.numeric - -final.ca = final[grepl("^ca", final$Sample),] -final.cg = final[grepl("^cg", final$Sample),] -final.cm = final[grepl("^cm", final$Sample),] - -if(nrow(final.ca) > 0){ - final.ca$Replicate = 1 -} - -if(nrow(final.cg) > 0){ - final.cg$Replicate = 1 -} - -if(nrow(final.cm) > 0){ - final.cm$Replicate = 1 -} - -#print(paste("nrow final:", nrow(final))) -#final2 = final -#final2$Sample = gsub("[0-9]", "", final2$Sample) -#final = rbind(final, final2) -#final$Replicate = 1 - -write.table(final.ca, output.file.ca, quote=F, sep="\t", row.names=F, col.names=T) -write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T) -write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T) -
--- a/new_imgt.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -imgt.dir = args[1] -merged.file = args[2] -gene = args[3] - -merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="") - -if(!("Sequence.ID" %in% names(merged))){ #change-o db - print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'") - names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID" -} - -if(gene != "-"){ - merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),] -} - -if("best_match" %in% names(merged)){ - merged = merged[!grepl("unmatched", merged$best_match),] -} - -nrow_dat = 0 - -for(f in list.files(imgt.dir, pattern="*.txt$")){ - #print(paste("filtering", f)) - path = file.path(imgt.dir, f) - dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="") - - dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] - - nrow_dat = nrow(dat) - - if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file - dat[,grepl("^FR1", names(dat))] = 0 - } - - write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="") -} - -print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))
--- a/pattern_plots.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,178 +0,0 @@ -library(ggplot2) -library(reshape2) -library(scales) - -args <- commandArgs(trailingOnly = TRUE) - -input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt" - -plot1.path = args[2] -plot1.png = paste(plot1.path, ".png", sep="") -plot1.txt = paste(plot1.path, ".txt", sep="") -plot1.pdf = paste(plot1.path, ".pdf", sep="") - -plot2.path = args[3] -plot2.png = paste(plot2.path, ".png", sep="") -plot2.txt = paste(plot2.path, ".txt", sep="") -plot2.pdf = paste(plot2.path, ".pdf", sep="") - -plot3.path = args[4] -plot3.png = paste(plot3.path, ".png", sep="") -plot3.txt = paste(plot3.path, ".txt", sep="") -plot3.pdf = paste(plot3.path, ".pdf", sep="") - -clean.output = args[5] - -dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1) - -classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE") -xyz = c("x", "y", "z") -new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) - -names(dat) = new.names - -clean.dat = dat -clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))] - -write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA) - -dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T) -dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T) - -data1 = dat[c("RGYW.WRCY", "TW.WA"),] - -data1 = data1[,names(data1)[grepl(".z", names(data1))]] -names(data1) = gsub("\\..*", "", names(data1)) - -data1 = melt(t(data1)) - -names(data1) = c("Class", "Type", "value") - -chk = is.na(data1$value) -if(any(chk)){ - data1[chk, "value"] = 0 -} - -data1 = data1[order(data1$Type),] - -write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) - -p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives") -p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) -#p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4")) -png(filename=plot1.png, width=510, height=300) -print(p) -dev.off() - -ggsave(plot1.pdf, p) - -data2 = dat[c(1, 5:8),] - -data2 = data2[,names(data2)[grepl("\\.x", names(data2))]] -names(data2) = gsub(".x", "", names(data2)) - -data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]] - -data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1) - -data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]] -data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1) - -data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0 -data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0 -data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0 -data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0 - -data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),])) - -names(data2) = c("Class", "Type", "value") - -chk = is.na(data2$value) -if(any(chk)){ - data2[chk, "value"] = 0 -} - -data2 = data2[order(data2$Type),] - -write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) - -p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns") -p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) -#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) -png(filename=plot2.png, width=480, height=300) -print(p) -dev.off() - -ggsave(plot2.pdf, p) - -data3 = dat[c(5, 6, 8, 17:20),] -data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] -names(data3) = gsub(".x", "", names(data3)) - -data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1) - -data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1) - -data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1) - -data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0 -data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0 - -data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0 -data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0 - -data3["A/T",is.nan(unlist(data3["A/T",]))] = 0 -data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0 - -data3 = melt(t(data3[8:10,])) -names(data3) = c("Class", "Type", "value") - -chk = is.na(data3$value) -if(any(chk)){ - data3[chk, "value"] = 0 -} - -data3 = data3[order(data3$Type),] - -write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) - -p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns") -p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) -#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) -png(filename=plot3.png, width=480, height=300) -print(p) -dev.off() - -ggsave(plot3.pdf, p) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/plot_pdf.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -library(ggplot2) - -args <- commandArgs(trailingOnly = TRUE) -print(args) - -input = args[1] -outputdir = args[2] -setwd(outputdir) - -load(input) - -print(names(pdfplots)) - -for(n in names(pdfplots)){ - print(paste("n:", n)) - ggsave(pdfplots[[n]], file=n) -}
--- a/sequence_overview.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,363 +0,0 @@ -library(reshape2) - -args <- commandArgs(trailingOnly = TRUE) - -before.unique.file = args[1] -merged.file = args[2] -outputdir = args[3] -gene.classes = unlist(strsplit(args[4], ",")) -hotspot.analysis.sum.file = args[5] -NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") -NTsum.file = paste(outputdir, "ntsum.txt", sep="/") -main.html = "index.html" -empty.region.filter = args[6] - - -setwd(outputdir) - -before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") -hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") - -#before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] - -if(empty.region.filter == "leader"){ - before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) -} else if(empty.region.filter == "FR1"){ - before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) -} else if(empty.region.filter == "CDR1"){ - before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) -} else if(empty.region.filter == "FR2"){ - before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) -} - -IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] -IDs$best_match = as.character(IDs$best_match) - -dat = data.frame(table(before.unique$seq_conc)) - -names(dat) = c("seq_conc", "Freq") - -dat$seq_conc = factor(dat$seq_conc) - -dat = dat[order(as.character(dat$seq_conc)),] - -#writing html from R... -get.bg.color = function(val){ - if(val %in% c("TRUE", "FALSE", "T", "F")){ #if its a logical value, give the background a green/red color - return(ifelse(val,"#eafaf1","#f9ebea")) - } else if (!is.na(as.numeric(val))) { #if its a numerical value, give it a grey tint if its >0 - return(ifelse(val > 0,"#eaecee","white")) - } else { - return("white") - } -} -td = function(val) { - return(paste("<td bgcolor='", get.bg.color(val), "'>", val, "</td>", sep="")) -} -tr = function(val) { - return(paste(c("<tr>", sapply(val, td), "</tr>"), collapse="")) -} - -make.link = function(id, clss, val) { - paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") -} -tbl = function(df) { - res = "<table border='1'>" - for(i in 1:nrow(df)){ - res = paste(res, tr(df[i,]), sep="") - } - res = paste(res, "</table>") -} - -cat("<center><img src=''> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences occuring more than once are included. </center>", file=main.html, append=F) -cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T) - -if(empty.region.filter == "leader"){ - cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) -} else if(empty.region.filter == "FR1"){ - cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) -} else if(empty.region.filter == "CDR1"){ - cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) -} else if(empty.region.filter == "FR2"){ - cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) -} - -cat("<tr>", file=main.html, append=T) -cat("<th>Sequence</th><th>Functionality</th><th>IGA1</th><th>IGA2</th><th>IGG1</th><th>IGG2</th><th>IGG3</th><th>IGG4</th><th>IGM</th><th>IGE</th><th>UN</th>", file=main.html, append=T) -cat("<th>total IGA</th><th>total IGG</th><th>total IGM</th><th>total IGE</th><th>number of subclasses</th><th>present in both IGA and IGG</th><th>present in IGA, IGG and IGM</th><th>present in IGA, IGG and IGE</th><th>present in IGA, IGG, IGM and IGE</th><th>IGA1+IGA2</th>", file=main.html, append=T) -cat("<th>IGG1+IGG2</th><th>IGG1+IGG3</th><th>IGG1+IGG4</th><th>IGG2+IGG3</th><th>IGG2+IGG4</th><th>IGG3+IGG4</th>", file=main.html, append=T) -cat("<th>IGG1+IGG2+IGG3</th><th>IGG2+IGG3+IGG4</th><th>IGG1+IGG2+IGG4</th><th>IGG1+IGG3+IGG4</th><th>IGG1+IGG2+IGG3+IGG4</th>", file=main.html, append=T) -cat("</tr>", file=main.html, append=T) - - - -single.sequences=0 #sequence only found once, skipped -in.multiple=0 #same sequence across multiple subclasses -multiple.in.one=0 #same sequence multiple times in one subclass -unmatched=0 #all of the sequences are unmatched -some.unmatched=0 #one or more sequences in a clone are unmatched -matched=0 #should be the same als matched sequences - -sequence.id.page="by_id.html" - -for(i in 1:nrow(dat)){ - - ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA1", IDs$best_match),] - ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA2", IDs$best_match),] - - cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG1", IDs$best_match),] - cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG2", IDs$best_match),] - cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG3", IDs$best_match),] - cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG4", IDs$best_match),] - - cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGM", IDs$best_match),] - - ce = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGE", IDs$best_match),] - - un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),] - - allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, ce, un) - - ca1.n = nrow(ca1) - ca2.n = nrow(ca2) - - cg1.n = nrow(cg1) - cg2.n = nrow(cg2) - cg3.n = nrow(cg3) - cg4.n = nrow(cg4) - - cm.n = nrow(cm) - - ce.n = nrow(ce) - - un.n = nrow(un) - - classes = c(ca1.n, ca2.n, cg1.n, cg2.n, cg3.n, cg4.n, cm.n, ce.n, un.n) - - classes.sum = sum(classes) - - if(classes.sum == 1){ - single.sequences = single.sequences + 1 - next - } - - if(un.n == classes.sum){ - unmatched = unmatched + 1 - next - } - - classes.no.un = classes[-length(classes)] - - in.classes = sum(classes.no.un > 0) - - matched = matched + in.classes #count in how many subclasses the sequence occurs. - - if(any(classes == classes.sum)){ - multiple.in.one = multiple.in.one + 1 - } else if (un.n > 0) { - some.unmatched = some.unmatched + 1 - } else { - in.multiple = in.multiple + 1 - } - - id = as.numeric(dat[i,"seq_conc"]) - - functionality = paste(unique(allc[,"Functionality"]), collapse=",") - - by.id.row = c() - - if(ca1.n > 0){ - cat(tbl(ca1), file=paste("IGA1_", id, ".html", sep="")) - } - - if(ca2.n > 0){ - cat(tbl(ca2), file=paste("IGA2_", id, ".html", sep="")) - } - - if(cg1.n > 0){ - cat(tbl(cg1), file=paste("IGG1_", id, ".html", sep="")) - } - - if(cg2.n > 0){ - cat(tbl(cg2), file=paste("IGG2_", id, ".html", sep="")) - } - - if(cg3.n > 0){ - cat(tbl(cg3), file=paste("IGG3_", id, ".html", sep="")) - } - - if(cg4.n > 0){ - cat(tbl(cg4), file=paste("IGG4_", id, ".html", sep="")) - } - - if(cm.n > 0){ - cat(tbl(cm), file=paste("IGM_", id, ".html", sep="")) - } - - if(ce.n > 0){ - cat(tbl(ce), file=paste("IGE_", id, ".html", sep="")) - } - - if(un.n > 0){ - cat(tbl(un), file=paste("un_", id, ".html", sep="")) - } - - ca1.html = make.link(id, "IGA1", ca1.n) - ca2.html = make.link(id, "IGA2", ca2.n) - - cg1.html = make.link(id, "IGG1", cg1.n) - cg2.html = make.link(id, "IGG2", cg2.n) - cg3.html = make.link(id, "IGG3", cg3.n) - cg4.html = make.link(id, "IGG4", cg4.n) - - cm.html = make.link(id, "IGM", cm.n) - - ce.html = make.link(id, "IGE", ce.n) - - un.html = make.link(id, "un", un.n) - - #extra columns - ca.n = ca1.n + ca2.n - - cg.n = cg1.n + cg2.n + cg3.n + cg4.n - - #in.classes - - in.ca.cg = (ca.n > 0 & cg.n > 0) - - in.ca.cg.cm = (ca.n > 0 & cg.n > 0 & cm.n > 0) - - in.ca.cg.ce = (ca.n > 0 & cg.n > 0 & ce.n > 0) - - in.ca.cg.cm.ce = (ca.n > 0 & cg.n > 0 & cm.n > 0 & ce.n > 0) - - in.ca1.ca2 = (ca1.n > 0 & ca2.n > 0) - - in.cg1.cg2 = (cg1.n > 0 & cg2.n > 0) - in.cg1.cg3 = (cg1.n > 0 & cg3.n > 0) - in.cg1.cg4 = (cg1.n > 0 & cg4.n > 0) - in.cg2.cg3 = (cg2.n > 0 & cg3.n > 0) - in.cg2.cg4 = (cg2.n > 0 & cg4.n > 0) - in.cg3.cg4 = (cg3.n > 0 & cg4.n > 0) - - in.cg1.cg2.cg3 = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0) - in.cg2.cg3.cg4 = (cg2.n > 0 & cg3.n > 0 & cg4.n > 0) - in.cg1.cg2.cg4 = (cg1.n > 0 & cg2.n > 0 & cg4.n > 0) - in.cg1.cg3.cg4 = (cg1.n > 0 & cg3.n > 0 & cg4.n > 0) - - in.cg.all = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0 & cg4.n > 0) - - #rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html) - rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, ce.html, un.html) - rw = c(rw, ca.n, cg.n, cm.n, ce.n, in.classes, in.ca.cg, in.ca.cg.cm, in.ca.cg.ce, in.ca.cg.cm.ce, in.ca1.ca2, in.cg1.cg2, in.cg1.cg3, in.cg1.cg4, in.cg2.cg3, in.cg2.cg4, in.cg3.cg4, in.cg1.cg2.cg3, in.cg2.cg3.cg4, in.cg1.cg2.cg4, in.cg1.cg3.cg4, in.cg.all) - - - - cat(tr(rw), file=main.html, append=T) - - - for(i in 1:nrow(allc)){ #generate html by id - html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"]) - cat(paste(html, "<br />"), file=sequence.id.page, append=T) - } -} - -cat("</table>", file=main.html, append=T) - -print(paste("Single sequences:", single.sequences)) -print(paste("Sequences in multiple subclasses:", in.multiple)) -print(paste("Multiple sequences in one subclass:", multiple.in.one)) -print(paste("Matched with unmatched:", some.unmatched)) -print(paste("Count that should match 'matched' sequences:", matched)) - -#ACGT overview - -#NToverview = merged[!grepl("^unmatched", merged$best_match),] -NToverview = merged - -if(empty.region.filter == "leader"){ - NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) -} else if(empty.region.filter == "FR1"){ - NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) -} else if(empty.region.filter == "CDR1"){ - NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) -} else if(empty.region.filter == "FR2"){ - NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) -} - -NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) -NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) -NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) -NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) - -#Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) - -#NToverview = rbind(NToverview, NTsum) - -NTresult = data.frame(nt=c("A", "C", "T", "G")) - -for(clazz in gene.classes){ - print(paste("class:", clazz)) - NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),] - print(paste("nrow:", nrow(NToverview.sub))) - new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G)) - new.col.y = sum(new.col.x) - new.col.z = round(new.col.x / new.col.y * 100, 2) - - tmp = names(NTresult) - NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) - names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep="")) -} - -NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")] - -names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T") - -write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) - -NToverview = NToverview[!grepl("unmatched", NToverview$best_match),] - -new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G)) -new.col.y = sum(new.col.x) -new.col.z = round(new.col.x / new.col.y * 100, 2) - -tmp = names(NTresult) -NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) -names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep="")) - -names(hotspot.analysis.sum) = names(NTresult) - -hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) - -write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/shm_clonality.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,144 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} -@font-face - {font-family:Tahoma; - panose-1:2 11 6 4 3 5 4 4 2 4;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -a:link, span.MsoHyperlink - {color:blue; - text-decoration:underline;} -a:visited, span.MsoHyperlinkFollowed - {color:purple; - text-decoration:underline;} -p - {margin-right:0in; - margin-left:0in; - font-size:12.0pt; - font-family:"Times New Roman","serif";} -p.MsoAcetate, li.MsoAcetate, div.MsoAcetate - {mso-style-link:"Balloon Text Char"; - margin:0in; - margin-bottom:.0001pt; - font-size:8.0pt; - font-family:"Tahoma","sans-serif";} -p.msochpdefault, li.msochpdefault, div.msochpdefault - {mso-style-name:msochpdefault; - margin-right:0in; - margin-left:0in; - font-size:12.0pt; - font-family:"Calibri","sans-serif";} -p.msopapdefault, li.msopapdefault, div.msopapdefault - {mso-style-name:msopapdefault; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:12.0pt; - font-family:"Times New Roman","serif";} -span.apple-converted-space - {mso-style-name:apple-converted-space;} -span.BalloonTextChar - {mso-style-name:"Balloon Text Char"; - mso-style-link:"Balloon Text"; - font-family:"Tahoma","sans-serif";} -.MsoChpDefault - {font-size:10.0pt; - font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US link=blue vlink=purple> - -<div class=WordSection1> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><b><span lang=EN-GB style='color:black'>References</span></b></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><span lang=EN-GB style='color:black'>Gupta, -Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, -Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a -name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell -immunoglobulin repertoire sequencing data: Table 1. In<span -class=apple-converted-space> </span><em>Bioinformatics, 31 (20), pp. -3356–3358.</em><span class=apple-converted-space><i> </i></span>[</span><a -href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span -lang=EN-GB style='color:#303030'>doi:10.1093/bioinformatics/btv359</span></a><span -lang=EN-GB style='color:black'>][</span><a -href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span -lang=EN-GB style='color:#303030'>Link</span></a><span lang=EN-GB -style='color:black'>]</span></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><span lang=EN-GB style='color:black'> </span></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><a name="OLE_LINK110"><u><span lang=EN-GB -style='color:black'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><span lang=EN-GB style='color:black'>In -these tabs information on the clonal relation of transcripts can be found. To -calculate clonal relation Change-O is used (Gupta et al, PMID: 26069265). -Transcripts are considered clonally related if they have maximal three nucleotides -difference in their CDR3 sequence and the same first V segment (as assigned by -IMGT). Results are represented in a table format showing the clone size and the -number of clones or sequences with this clone size. Change-O settings used are -the </span><span lang=EN-GB>nucleotide hamming distance substitution model with -a complete distance of maximal three. For clonal assignment the first gene -segments were used, and the distances were not normalized. In case of -asymmetric distances, the minimal distance was used.<span style='color:black'> </span></span></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><span lang=EN-GB style='color:black'> </span></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><u><span lang=EN-GB style='color:black'>Overlap -tab</span></u><span lang=EN-GB style='color:black'> </span></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><span lang=EN-GB style='color:black'>This -tab gives information on with which (sub)classe(s) each unique analyzed region -(based on the exact nucleotide sequence of the analyzes region and the CDR3 -nucleotide sequence) is found with. This gives information if the combination -of the exact same nucleotide sequence of the analyzed region and the CDR3 -sequence can be found in multiple (sub)classes.</span></p> - -<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; -text-align:justify;background:white'><span style='color:black'><img src=""> Please note that this tab is based on all -sequences before filter unique sequences and the remove duplicates based on -filters are applied. In this table only sequences occuring more than once are -included. </span></p> - -</div> - -</body> - -</html>
--- a/shm_csr.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,95 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -a:link, span.MsoHyperlink - {color:blue; - text-decoration:underline;} -a:visited, span.MsoHyperlinkFollowed - {color:purple; - text-decoration:underline;} -span.apple-converted-space - {mso-style-name:apple-converted-space;} -.MsoChpDefault - {font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US link=blue vlink=purple> - -<div class=WordSection1> - -<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The -graphs in this tab give insight into the subclass distribution of IGG and IGA -transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Human Cµ, Cα, Cγ and Cε -constant genes are assigned using a </span><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script -specifically designed for human (sub)class assignment in repertoire data as -described in van Schouwenburg and IJspeert et al, submitted for publication. In -this script the reference sequences for the subclasses are divided in 8 -nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are -then individually aligned in the right order to each input sequence. The -percentage of the chunks identified in each rearrangement is calculated in the -‘chunk hit percentage’. </span><span lang=EN-GB style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif"'>Cα and Cγ -subclasses are very homologous and only differ in a few nucleotides. To assign -subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height: -115%;font-family:"Times New Roman","serif"'>‘nt hit percentage’ is calculated. -This percentage indicates how well the chunks covering the subclass specific -nucleotide match with the different subclasses. </span><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information -on normal distribution of subclasses in healthy individuals of different ages -can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a -name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA -subclass distribution</span></u></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie -chart showing the relative distribution of IGA1 and IGA2 transcripts in the -sample.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG -subclass distribution</span></u></p> - -<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie -chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4 -transcripts in the sample.</span></p> - -</div> - -</body> - -</html>
--- a/shm_csr.py Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,501 +0,0 @@ -import argparse -import logging -import sys -import os -import re - -from collections import defaultdict - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation") - parser.add_argument("--genes", help="The genes available in the 'best_match' column") - parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=['leader', 'FR1', 'CDR1', 'FR2']) - parser.add_argument("--output", help="Output file") - - args = parser.parse_args() - - infile = args.input - genes = str(args.genes).split(",") - empty_region_filter = args.empty_region_filter - outfile = args.output - - genedic = dict() - - mutationdic = dict() - mutationMatcher = re.compile("^(.)(\d+).(.),?[ ]?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?") - mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?.?([A-Z])?(.*)?") - mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") - mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") - NAMatchResult = (None, None, None, None, None, None, '') - geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes} - linecount = 0 - - IDIndex = 0 - best_matchIndex = 0 - fr1Index = 0 - cdr1Index = 0 - fr2Index = 0 - cdr2Index = 0 - fr3Index = 0 - first = True - IDlist = [] - mutationList = [] - mutationListByID = {} - cdr1LengthDic = {} - cdr2LengthDic = {} - - fr1LengthDict = {} - fr2LengthDict = {} - fr3LengthDict = {} - - cdr1LengthIndex = 0 - cdr2LengthIndex = 0 - - fr1SeqIndex = 0 - fr2SeqIndex = 0 - fr3SeqIndex = 0 - - tandem_sum_by_class = defaultdict(int) - expected_tandem_sum_by_class = defaultdict(float) - - with open(infile, 'ru') as i: - for line in i: - if first: - linesplt = line.split("\t") - IDIndex = linesplt.index("Sequence.ID") - best_matchIndex = linesplt.index("best_match") - fr1Index = linesplt.index("FR1.IMGT") - cdr1Index = linesplt.index("CDR1.IMGT") - fr2Index = linesplt.index("FR2.IMGT") - cdr2Index = linesplt.index("CDR2.IMGT") - fr3Index = linesplt.index("FR3.IMGT") - cdr1LengthIndex = linesplt.index("CDR1.IMGT.seq") - cdr2LengthIndex = linesplt.index("CDR2.IMGT.seq") - fr1SeqIndex = linesplt.index("FR1.IMGT.seq") - fr2SeqIndex = linesplt.index("FR2.IMGT.seq") - fr3SeqIndex = linesplt.index("FR3.IMGT.seq") - first = False - continue - linecount += 1 - linesplt = line.split("\t") - ID = linesplt[IDIndex] - genedic[ID] = linesplt[best_matchIndex] - - mutationdic[ID + "_FR1"] = [] - if len(linesplt[fr1Index]) > 5 and empty_region_filter == "leader": - mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] - - mutationdic[ID + "_CDR1"] = [] - if len(linesplt[cdr1Index]) > 5 and empty_region_filter in ["leader", "FR1"]: - mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] - - mutationdic[ID + "_FR2"] = [] - if len(linesplt[fr2Index]) > 5 and empty_region_filter in ["leader", "FR1", "CDR1"]: - mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] - - mutationdic[ID + "_CDR2"] = [] - if len(linesplt[cdr2Index]) > 5: - mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] - - mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] - - mutationdic[ID + "_FR3"] = [] - if len(linesplt[fr3Index]) > 5: - mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] - - mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] - mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] - - cdr1Length = len(linesplt[cdr1LengthIndex]) - cdr2Length = len(linesplt[cdr2LengthIndex]) - - #print linesplt[fr2SeqIndex] - fr1Length = len(linesplt[fr1SeqIndex]) if empty_region_filter == "leader" else 0 - fr2Length = len(linesplt[fr2SeqIndex]) if empty_region_filter in ["leader", "FR1", "CDR1"] else 0 - fr3Length = len(linesplt[fr3SeqIndex]) - - cdr1LengthDic[ID] = cdr1Length - cdr2LengthDic[ID] = cdr2Length - - fr1LengthDict[ID] = fr1Length - fr2LengthDict[ID] = fr2Length - fr3LengthDict[ID] = fr3Length - - IDlist += [ID] - print "len(mutationdic) =", len(mutationdic) - - with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle: - for ID, lst in mutationdic.iteritems(): - for mut in lst: - out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in mut]))) - - #tandem mutation stuff - tandem_frequency = defaultdict(int) - mutation_frequency = defaultdict(int) - - mutations_by_id_dic = {} - first = True - mutation_by_id_file = os.path.join(os.path.dirname(outfile), "mutation_by_id.txt") - with open(mutation_by_id_file, 'r') as mutation_by_id: - for l in mutation_by_id: - if first: - first = False - continue - splt = l.split("\t") - mutations_by_id_dic[splt[0]] = int(splt[1]) - - tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt") - with open(tandem_file, 'w') as o: - highest_tandem_length = 0 - - o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") - for ID in IDlist: - mutations = mutationListByID[ID] - if len(mutations) == 0: - continue - last_mut = max(mutations, key=lambda x: int(x[1])) - - last_mut_pos = int(last_mut[1]) - - mut_positions = [False] * (last_mut_pos + 1) - - for mutation in mutations: - frm, where, to, frmAA, whereAA, toAA, thing = mutation - where = int(where) - mut_positions[where] = True - - tandem_muts = [] - tandem_start = -1 - tandem_length = 0 - for i in range(len(mut_positions)): - if mut_positions[i]: - if tandem_start == -1: - tandem_start = i - tandem_length += 1 - #print "".join(["1" if x else "0" for x in mut_positions[:i+1]]) - else: - if tandem_length > 1: - tandem_muts.append((tandem_start, tandem_length)) - #print "{0}{1} {2}:{3}".format(" " * (i - tandem_length), "^" * tandem_length, tandem_start, tandem_length) - tandem_start = -1 - tandem_length = 0 - if tandem_length > 1: # if the sequence ends with a tandem mutation - tandem_muts.append((tandem_start, tandem_length)) - - if len(tandem_muts) > 0: - if highest_tandem_length < len(tandem_muts): - highest_tandem_length = len(tandem_muts) - - region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID] - longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0) - num_mutations = mutations_by_id_dic[ID] # len(mutations) - f_num_mutations = float(num_mutations) - num_tandem_muts = len(tandem_muts) - expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length) - o.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(ID, - str(num_mutations), - str(num_tandem_muts), - str(region_length), - str(round(expected_tandem_muts, 2)), - str(longest_tandem[1]), - str(tandem_muts))) - gene = genedic[ID] - if gene.find("unmatched") == -1: - tandem_sum_by_class[gene] += num_tandem_muts - expected_tandem_sum_by_class[gene] += expected_tandem_muts - - tandem_sum_by_class["all"] += num_tandem_muts - expected_tandem_sum_by_class["all"] += expected_tandem_muts - - gene = gene[:3] - if gene in ["IGA", "IGG"]: - tandem_sum_by_class[gene] += num_tandem_muts - expected_tandem_sum_by_class[gene] += expected_tandem_muts - else: - tandem_sum_by_class["unmatched"] += num_tandem_muts - expected_tandem_sum_by_class["unmatched"] += expected_tandem_muts - - - for tandem_mut in tandem_muts: - tandem_frequency[str(tandem_mut[1])] += 1 - #print "\t".join([ID, str(len(tandem_muts)), str(longest_tandem[1]) , str(tandem_muts)]) - - tandem_freq_file = os.path.join(os.path.dirname(outfile), "tandem_frequency.txt") - with open(tandem_freq_file, 'w') as o: - for frq in sorted([int(x) for x in tandem_frequency.keys()]): - o.write("{0}\t{1}\n".format(frq, tandem_frequency[str(frq)])) - - tandem_row = [] - genes_extra = list(genes) - genes_extra.append("all") - for x, y, in zip([tandem_sum_by_class[x] for x in genes_extra], [expected_tandem_sum_by_class[x] for x in genes_extra]): - if y != 0: - tandem_row += [x, round(y, 2), round(x / y, 2)] - else: - tandem_row += [x, round(y, 2), 0] - - tandem_freq_file = os.path.join(os.path.dirname(outfile), "shm_overview_tandem_row.txt") - with open(tandem_freq_file, 'w') as o: - o.write("Tandems/Expected (ratio),{0}\n".format(",".join([str(x) for x in tandem_row]))) - - #print mutationList, linecount - - AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] and i[5] != ";" else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent - if AALength < 60: - AALength = 64 - - AA_mutation = [0] * AALength - AA_mutation_dic = {"IGA": AA_mutation[:], "IGG": AA_mutation[:], "IGM": AA_mutation[:], "IGE": AA_mutation[:], "unm": AA_mutation[:], "all": AA_mutation[:]} - AA_mutation_empty = AA_mutation[:] - - print "AALength:", AALength - aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt" - with open(aa_mutations_by_id_file, 'w') as o: - o.write("ID\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n") - for ID in mutationListByID.keys(): - AA_mutation_for_ID = AA_mutation_empty[:] - for mutation in mutationListByID[ID]: - if mutation[4] and mutation[5] != ";": - AA_mutation_position = int(mutation[4]) - try: - AA_mutation[AA_mutation_position] += 1 - AA_mutation_for_ID[AA_mutation_position] += 1 - except Exception as e: - print e - print mutation - sys.exit() - clss = genedic[ID][:3] - AA_mutation_dic[clss][AA_mutation_position] += 1 - o.write(ID + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in AA_mutation_for_ID[1:]]) + "\n") - - - - #absent AA stuff - absentAACDR1Dic = defaultdict(list) - absentAACDR1Dic[5] = range(29,36) - absentAACDR1Dic[6] = range(29,35) - absentAACDR1Dic[7] = range(30,35) - absentAACDR1Dic[8] = range(30,34) - absentAACDR1Dic[9] = range(31,34) - absentAACDR1Dic[10] = range(31,33) - absentAACDR1Dic[11] = [32] - - absentAACDR2Dic = defaultdict(list) - absentAACDR2Dic[0] = range(55,65) - absentAACDR2Dic[1] = range(56,65) - absentAACDR2Dic[2] = range(56,64) - absentAACDR2Dic[3] = range(57,64) - absentAACDR2Dic[4] = range(57,63) - absentAACDR2Dic[5] = range(58,63) - absentAACDR2Dic[6] = range(58,62) - absentAACDR2Dic[7] = range(59,62) - absentAACDR2Dic[8] = range(59,61) - absentAACDR2Dic[9] = [60] - - absentAA = [len(IDlist)] * (AALength-1) - for k, cdr1Length in cdr1LengthDic.iteritems(): - for c in absentAACDR1Dic[cdr1Length]: - absentAA[c] -= 1 - - for k, cdr2Length in cdr2LengthDic.iteritems(): - for c in absentAACDR2Dic[cdr2Length]: - absentAA[c] -= 1 - - - aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/absent_aa_id.txt" - with open(aa_mutations_by_id_file, 'w') as o: - o.write("ID\tcdr1length\tcdr2length\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n") - for ID in IDlist: - absentAAbyID = [1] * (AALength-1) - cdr1Length = cdr1LengthDic[ID] - for c in absentAACDR1Dic[cdr1Length]: - absentAAbyID[c] -= 1 - - cdr2Length = cdr2LengthDic[ID] - for c in absentAACDR2Dic[cdr2Length]: - absentAAbyID[c] -= 1 - o.write(ID + "\t" + str(cdr1Length) + "\t" + str(cdr2Length) + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in absentAAbyID]) + "\n") - - if linecount == 0: - print "No data, exiting" - with open(outfile, 'w') as o: - o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) - o.write("WRCY (%)," + ("0,0,0\n" * len(genes))) - o.write("WA (%)," + ("0,0,0\n" * len(genes))) - o.write("TW (%)," + ("0,0,0\n" * len(genes))) - import sys - - sys.exit() - - hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)") - RGYWCount = {} - WRCYCount = {} - WACount = {} - TWCount = {} - - #IDIndex = 0 - ataIndex = 0 - tatIndex = 0 - aggctatIndex = 0 - atagcctIndex = 0 - first = True - with open(infile, 'ru') as i: - for line in i: - if first: - linesplt = line.split("\t") - ataIndex = linesplt.index("X.a.t.a") - tatIndex = linesplt.index("t.a.t.") - aggctatIndex = linesplt.index("X.a.g.g.c.t..a.t.") - atagcctIndex = linesplt.index("X.a.t..a.g.c.c.t.") - first = False - continue - linesplt = line.split("\t") - gene = linesplt[best_matchIndex] - ID = linesplt[IDIndex] - RGYW = [(int(x), int(y), z) for (x, y, z) in - [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] - WRCY = [(int(x), int(y), z) for (x, y, z) in - [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] - WA = [(int(x), int(y), z) for (x, y, z) in - [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] - TW = [(int(x), int(y), z) for (x, y, z) in - [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] - RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0 - - with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "RGYW.txt"), 'a') as out_handle: - for hotspot in RGYW: - out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in hotspot]))) - - mutationList = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] - for mutation in mutationList: - frm, where, to, AAfrm, AAwhere, AAto, junk = mutation - mutation_in_RGYW = any(((start <= int(where) <= end) for (start, end, region) in RGYW)) - mutation_in_WRCY = any(((start <= int(where) <= end) for (start, end, region) in WRCY)) - mutation_in_WA = any(((start <= int(where) <= end) for (start, end, region) in WA)) - mutation_in_TW = any(((start <= int(where) <= end) for (start, end, region) in TW)) - - in_how_many_motifs = sum([mutation_in_RGYW, mutation_in_WRCY, mutation_in_WA, mutation_in_TW]) - - if in_how_many_motifs > 0: - RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs - WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs - WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs - TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs - - mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt") - if not os.path.exists(mutation_by_id_file): - with open(mutations_in_motifs_file, 'w') as out_handle: - out_handle.write("{0}\n".format("\t".join([ - "Sequence.ID", - "mutation_position", - "region", - "from_nt", - "to_nt", - "mutation_position_AA", - "from_AA", - "to_AA", - "motif", - "motif_start_nt", - "motif_end_nt", - "rest" - ]))) - - with open(mutations_in_motifs_file, 'a') as out_handle: - motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW} - for mutation in mutationList: - frm, where, to, AAfrm, AAwhere, AAto, junk = mutation - for motif in motif_dic.keys(): - - for start, end, region in motif_dic[motif]: - if start <= int(where) <= end: - out_handle.write("{0}\n".format( - "\t".join([ - ID, - where, - region, - frm, - to, - str(AAwhere), - str(AAfrm), - str(AAto), - motif, - str(start), - str(end), - str(junk) - ]) - )) - - - - def mean(lst): - return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0 - - - def median(lst): - lst = sorted(lst) - l = len(lst) - if l == 0: - return 0 - if l == 1: - return lst[0] - - l = int(l / 2) - - if len(lst) % 2 == 0: - return float(lst[l] + lst[(l - 1)]) / 2.0 - else: - return lst[l] - - funcs = {"mean": mean, "median": median, "sum": sum} - - directory = outfile[:outfile.rfind("/") + 1] - value = 0 - valuedic = dict() - - for fname in funcs.keys(): - for gene in genes: - with open(directory + gene + "_" + fname + "_value.txt", 'r') as v: - valuedic[gene + "_" + fname] = float(v.readlines()[0].rstrip()) - with open(directory + "all_" + fname + "_value.txt", 'r') as v: - valuedic["total_" + fname] = float(v.readlines()[0].rstrip()) - - - def get_xyz(lst, gene, f, fname): - x = round(round(f(lst), 1)) - y = valuedic[gene + "_" + fname] - z = str(round(x / float(y) * 100, 1)) if y != 0 else "0" - return (str(x), str(y), z) - - dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount} - arr = ["RGYW", "WRCY", "WA", "TW"] - - for fname in funcs.keys(): - func = funcs[fname] - foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt" - with open(foutfile, 'w') as o: - for typ in arr: - o.write(typ + " (%)") - curr = dic[typ] - for gene in genes: - geneMatcher = geneMatchers[gene] - if valuedic[gene + "_" + fname] is 0: - o.write(",0,0,0") - else: - x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname) - o.write("," + x + "," + y + "," + z) - x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname) - #x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname) - o.write("," + x + "," + y + "," + z + "\n") - - - # for testing - seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt" - with open(seq_motif_file, 'w') as o: - o.write("ID\tRGYW\tWRCY\tWA\tTW\n") - for ID in IDlist: - #o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n") - o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n") - -if __name__ == "__main__": - main()
--- a/shm_csr.r Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,561 +0,0 @@ -library(data.table) -library(ggplot2) -library(reshape2) - -args <- commandArgs(trailingOnly = TRUE) - -input = args[1] -genes = unlist(strsplit(args[2], ",")) -outputdir = args[3] -empty.region.filter = args[4] -setwd(outputdir) - -#dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) - -dat = data.frame(fread(input, sep="\t", header=T, stringsAsFactors=F)) #fread because read.table suddenly skips certain rows... - -if(length(dat$Sequence.ID) == 0){ - setwd(outputdir) - result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) - row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)") - write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) - transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) - row.names(transitionTable) = c("A", "C", "G", "T") - transitionTable["A","A"] = NA - transitionTable["C","C"] = NA - transitionTable["G","G"] = NA - transitionTable["T","T"] = NA - - write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) - cat("0", file="n.txt") - stop("No data") -} - -cleanup_columns = c("FR1.IMGT.c.a", - "FR2.IMGT.g.t", - "CDR1.IMGT.Nb.of.nucleotides", - "CDR2.IMGT.t.a", - "FR1.IMGT.c.g", - "CDR1.IMGT.c.t", - "FR2.IMGT.a.c", - "FR2.IMGT.Nb.of.mutations", - "FR2.IMGT.g.c", - "FR2.IMGT.a.g", - "FR3.IMGT.t.a", - "FR3.IMGT.t.c", - "FR2.IMGT.g.a", - "FR3.IMGT.c.g", - "FR1.IMGT.Nb.of.mutations", - "CDR1.IMGT.g.a", - "CDR1.IMGT.t.g", - "CDR1.IMGT.g.c", - "CDR2.IMGT.Nb.of.nucleotides", - "FR2.IMGT.a.t", - "CDR1.IMGT.Nb.of.mutations", - "CDR3.IMGT.Nb.of.nucleotides", - "CDR1.IMGT.a.g", - "FR3.IMGT.a.c", - "FR1.IMGT.g.a", - "FR3.IMGT.a.g", - "FR1.IMGT.a.t", - "CDR2.IMGT.a.g", - "CDR2.IMGT.Nb.of.mutations", - "CDR2.IMGT.g.t", - "CDR2.IMGT.a.c", - "CDR1.IMGT.t.c", - "FR3.IMGT.g.c", - "FR1.IMGT.g.t", - "FR3.IMGT.g.t", - "CDR1.IMGT.a.t", - "FR1.IMGT.a.g", - "FR3.IMGT.a.t", - "FR3.IMGT.Nb.of.nucleotides", - "FR2.IMGT.t.c", - "CDR2.IMGT.g.a", - "FR2.IMGT.t.a", - "CDR1.IMGT.t.a", - "FR2.IMGT.t.g", - "FR3.IMGT.t.g", - "FR2.IMGT.Nb.of.nucleotides", - "FR1.IMGT.t.a", - "FR1.IMGT.t.g", - "FR3.IMGT.c.t", - "FR1.IMGT.t.c", - "CDR2.IMGT.a.t", - "FR2.IMGT.c.t", - "CDR1.IMGT.g.t", - "CDR2.IMGT.t.g", - "FR1.IMGT.Nb.of.nucleotides", - "CDR1.IMGT.c.g", - "CDR2.IMGT.t.c", - "FR3.IMGT.g.a", - "CDR1.IMGT.a.c", - "FR2.IMGT.c.a", - "FR3.IMGT.Nb.of.mutations", - "FR2.IMGT.c.g", - "CDR2.IMGT.g.c", - "FR1.IMGT.g.c", - "CDR2.IMGT.c.t", - "FR3.IMGT.c.a", - "CDR1.IMGT.c.a", - "CDR2.IMGT.c.g", - "CDR2.IMGT.c.a", - "FR1.IMGT.c.t", - "FR1.IMGT.Nb.of.silent.mutations", - "FR2.IMGT.Nb.of.silent.mutations", - "FR3.IMGT.Nb.of.silent.mutations", - "FR1.IMGT.Nb.of.nonsilent.mutations", - "FR2.IMGT.Nb.of.nonsilent.mutations", - "FR3.IMGT.Nb.of.nonsilent.mutations") - -print("Cleaning up columns") - -for(col in cleanup_columns){ - dat[,col] = gsub("\\(.*\\)", "", dat[,col]) - #dat[dat[,col] == "",] = "0" - dat[,col] = as.numeric(dat[,col]) - dat[is.na(dat[,col]),col] = 0 -} - -regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3") -if(empty.region.filter == "FR1") { - regions = c("CDR1", "FR2", "CDR2", "FR3") -} else if (empty.region.filter == "CDR1") { - regions = c("FR2", "CDR2", "FR3") -} else if (empty.region.filter == "FR2") { - regions = c("CDR2", "FR3") -} - -pdfplots = list() #save() this later to create the pdf plots in another script (maybe avoids the "address (nil), cause memory not mapped") - -sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) } - -print("aggregating data into new columns") - -VRegionMutations_columns = paste(regions, ".IMGT.Nb.of.mutations", sep="") -dat$VRegionMutations = apply(dat, FUN=sum_by_row, 1, columns=VRegionMutations_columns) - -VRegionNucleotides_columns = paste(regions, ".IMGT.Nb.of.nucleotides", sep="") -dat$FR3.IMGT.Nb.of.nucleotides = nchar(dat$FR3.IMGT.seq) -dat$VRegionNucleotides = apply(dat, FUN=sum_by_row, 1, columns=VRegionNucleotides_columns) - -transitionMutations_columns = paste(rep(regions, each=4), c(".IMGT.a.g", ".IMGT.g.a", ".IMGT.c.t", ".IMGT.t.c"), sep="") -dat$transitionMutations = apply(dat, FUN=sum_by_row, 1, columns=transitionMutations_columns) - -transversionMutations_columns = paste(rep(regions, each=8), c(".IMGT.a.c",".IMGT.c.a",".IMGT.a.t",".IMGT.t.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t",".IMGT.t.g"), sep="") -dat$transversionMutations = apply(dat, FUN=sum_by_row, 1, columns=transversionMutations_columns) - -transitionMutationsAtGC_columns = paste(rep(regions, each=2), c(".IMGT.g.a",".IMGT.c.t"), sep="") -dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) - -totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.c.g",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.g.a",".IMGT.g.t"), sep="") -#totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.c.g",".IMGT.g.t"), sep="") -dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) - -transitionMutationsAtAT_columns = paste(rep(regions, each=2), c(".IMGT.a.g",".IMGT.t.c"), sep="") -dat$transitionMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtAT_columns) - -totalMutationsAtAT_columns = paste(rep(regions, each=6), c(".IMGT.a.g",".IMGT.a.c",".IMGT.a.t",".IMGT.t.g",".IMGT.t.c",".IMGT.t.a"), sep="") -#totalMutationsAtAT_columns = paste(rep(regions, each=5), c(".IMGT.a.g",".IMGT.t.c",".IMGT.a.c",".IMGT.g.c",".IMGT.t.g"), sep="") -dat$totalMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtAT_columns) - -FRRegions = regions[grepl("FR", regions)] -CDRRegions = regions[grepl("CDR", regions)] - -FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="") -dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns) - -CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="") -dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns) - -FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="") -dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns) - -CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="") -dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) - -mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR") -write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T) - -setwd(outputdir) - -write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) - -base.order.x = data.frame(base=c("A", "C", "G", "T"), order.x=1:4) -base.order.y = data.frame(base=c("T", "G", "C", "A"), order.y=1:4) - -calculate_result = function(i, gene, dat, matrx, f, fname, name){ - tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),] - - j = i - 1 - x = (j * 3) + 1 - y = (j * 3) + 2 - z = (j * 3) + 3 - - if(nrow(tmp) > 0){ - if(fname == "sum"){ - matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) - matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) - matrx[1,z] = round(f(matrx[1,x] / matrx[1,y]) * 100, digits=1) - } else { - matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) - matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) - matrx[1,z] = round(f(tmp$VRegionMutations / tmp$VRegionNucleotides) * 100, digits=1) - } - - matrx[2,x] = round(f(tmp$transitionMutations, na.rm=T), digits=1) - matrx[2,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) - matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1) - - matrx[3,x] = round(f(tmp$transversionMutations, na.rm=T), digits=1) - matrx[3,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) - matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1) - - matrx[4,x] = round(f(tmp$transitionMutationsAtGC, na.rm=T), digits=1) - matrx[4,y] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1) - matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) - - matrx[5,x] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1) - matrx[5,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) - matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) - - matrx[6,x] = round(f(tmp$transitionMutationsAtAT, na.rm=T), digits=1) - matrx[6,y] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1) - matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) - - matrx[7,x] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1) - matrx[7,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) - matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1) - - matrx[8,x] = round(f(tmp$nonSilentMutationsFR, na.rm=T), digits=1) - matrx[8,y] = round(f(tmp$silentMutationsFR, na.rm=T), digits=1) - matrx[8,z] = round(matrx[8,x] / matrx[8,y], digits=1) - - matrx[9,x] = round(f(tmp$nonSilentMutationsCDR, na.rm=T), digits=1) - matrx[9,y] = round(f(tmp$silentMutationsCDR, na.rm=T), digits=1) - matrx[9,z] = round(matrx[9,x] / matrx[9,y], digits=1) - - if(fname == "sum"){ - - regions.fr = regions[grepl("FR", regions)] - regions.fr = paste(regions.fr, ".IMGT.Nb.of.nucleotides", sep="") - regions.cdr = regions[grepl("CDR", regions)] - regions.cdr = paste(regions.cdr, ".IMGT.Nb.of.nucleotides", sep="") - - if(length(regions.fr) > 1){ #in case there is only on FR region (rowSums needs >1 column) - matrx[10,x] = round(f(rowSums(tmp[,regions.fr], na.rm=T)), digits=1) - } else { - matrx[10,x] = round(f(tmp[,regions.fr], na.rm=T), digits=1) - } - matrx[10,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) - matrx[10,z] = round(matrx[10,x] / matrx[10,y] * 100, digits=1) - - if(length(regions.cdr) > 1){ #in case there is only on CDR region - matrx[11,x] = round(f(rowSums(tmp[,regions.cdr], na.rm=T)), digits=1) - } else { - matrx[11,x] = round(f(tmp[,regions.cdr], na.rm=T), digits=1) - } - matrx[11,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) - matrx[11,z] = round(matrx[11,x] / matrx[11,y] * 100, digits=1) - } - } - - transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) - row.names(transitionTable) = c("A", "C", "G", "T") - transitionTable["A","A"] = NA - transitionTable["C","C"] = NA - transitionTable["G","G"] = NA - transitionTable["T","T"] = NA - - if(nrow(tmp) > 0){ - for(nt1 in nts){ - for(nt2 in nts){ - if(nt1 == nt2){ - next - } - NT1 = LETTERS[letters == nt1] - NT2 = LETTERS[letters == nt2] - FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") - CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") - FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") - CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") - FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") - if (empty.region.filter == "leader"){ - transitionTable[NT1,NT2] = sum(tmp[,c(FR1, CDR1, FR2, CDR2, FR3)]) - } else if (empty.region.filter == "FR1") { - transitionTable[NT1,NT2] = sum(tmp[,c(CDR1, FR2, CDR2, FR3)]) - } else if (empty.region.filter == "CDR1") { - transitionTable[NT1,NT2] = sum(tmp[,c(FR2, CDR2, FR3)]) - } else if (empty.region.filter == "FR2") { - transitionTable[NT1,NT2] = sum(tmp[,c(CDR2, FR3)]) - } - } - } - transition = transitionTable - transition$id = names(transition) - - transition2 = melt(transition, id.vars="id") - - transition2 = merge(transition2, base.order.x, by.x="id", by.y="base") - - transition2 = merge(transition2, base.order.y, by.x="variable", by.y="base") - - transition2[is.na(transition2$value),]$value = 0 - - if(any(transition2$value != 0)){ #having a transition table filled with 0 is bad - print("Plotting heatmap and transition") - png(filename=paste("transitions_stacked_", name, ".png", sep="")) - p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar - p = p + xlab("From base") + ylab("") + ggtitle("Bargraph transition information") + guides(fill=guide_legend(title=NULL)) - p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) - #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) - print(p) - dev.off() - - pdfplots[[paste("transitions_stacked_", name, ".pdf", sep="")]] <<- p - - png(filename=paste("transitions_heatmap_", name, ".png", sep="")) - p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap - p = p + xlab("To base") + ylab("From Base") + ggtitle("Heatmap transition information") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) - print(p) - dev.off() - - pdfplots[[paste("transitions_heatmap_", name, ".pdf", sep="")]] <<- p - } else { - #print("No data to plot") - } - } - - #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep="")) - write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) - write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep="")) - cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep="")) - #print(paste(fname, name, nrow(tmp))) - matrx -} -nts = c("a", "c", "g", "t") -zeros=rep(0, 4) -funcs = c(median, sum, mean) -fnames = c("median", "sum", "mean") - -print("Creating result tables") - -for(i in 1:length(funcs)){ - func = funcs[[i]] - fname = fnames[[i]] - - print(paste("Creating table for", fname)) - - rows = 9 - if(fname == "sum"){ - rows = 11 - } - matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) - for(i in 1:length(genes)){ - matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) - } - matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") - - result = data.frame(matrx) - if(fname == "sum"){ - row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") - } else { - row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") - } - write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) -} - -print("Adding median number of mutations to sum table") -sum.table = read.table("mutations_sum.txt", sep=",", header=F) -median.table = read.table("mutations_median.txt", sep=",", header=F) - -new.table = sum.table[1,] -new.table[2,] = median.table[1,] -new.table[3:12,] = sum.table[2:11,] -new.table[,1] = as.character(new.table[,1]) -new.table[2,1] = "Median of Number of Mutations (%)" - -#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] - -write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) - -print("Plotting IGA piechart") - -dat = dat[!grepl("^unmatched", dat$best_match),] - -#blegh - -genesForPlot = dat[grepl("IGA", dat$best_match),]$best_match - -if(length(genesForPlot) > 0){ - genesForPlot = data.frame(table(genesForPlot)) - colnames(genesForPlot) = c("Gene","Freq") - genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) - - pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) - pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4")) - pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) - pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) - pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclass distribution", "( n =", sum(genesForPlot$Freq), ")")) - write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) - - png(filename="IGA.png") - print(pc) - dev.off() - - pdfplots[["IGA.pdf"]] <- pc -} - -print("Plotting IGG piechart") - -genesForPlot = dat[grepl("IGG", dat$best_match),]$best_match - -if(length(genesForPlot) > 0){ - genesForPlot = data.frame(table(genesForPlot)) - colnames(genesForPlot) = c("Gene","Freq") - genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) - - pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) - pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred")) - pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) - pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) - pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclass distribution", "( n =", sum(genesForPlot$Freq), ")")) - write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) - - png(filename="IGG.png") - print(pc) - dev.off() - - pdfplots[["IGG.pdf"]] <- pc -} - -print("Plotting scatterplot") - -dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) -dat.clss = dat - -dat.clss$best_match = substr(dat.clss$best_match, 0, 3) - -dat.clss = rbind(dat, dat.clss) - -p = ggplot(dat.clss, aes(best_match, percentage_mutations)) -p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) -p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) -p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) -p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) - -png(filename="scatter.png") -print(p) -dev.off() - -pdfplots[["scatter.pdf"]] <- p - -write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) - -print("Plotting frequency ranges plot") - -dat$best_match_class = substr(dat$best_match, 0, 3) -freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") -dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels) - -frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match_class")]) - -frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match_class", "frequency_bins")]) - -frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") - -frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) - -p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) -p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) -p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) - -png(filename="frequency_ranges.png") -print(p) -dev.off() - -pdfplots[["frequency_ranges.pdf"]] <- p - -save(pdfplots, file="pdfplots.RData") - -frequency_bins_data_by_class = frequency_bins_data - -frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),] - -frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins) -frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" -frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" - -write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) - -frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) - -frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) - -frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match") - -frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) - -frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),] -frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins) -frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" -frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" - -write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
--- a/shm_csr.xml Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,240 +0,0 @@ -<tool id="shm_csr" name="SHM & CSR pipeline" version="1.0"> - <description></description> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.16.0">numpy</requirement> - <requirement type="package" version="1.2.0">xlrd</requirement> - <requirement type="package" version="3.0.0">r-ggplot2</requirement> - <requirement type="package" version="1.4.3">r-reshape2</requirement> - <requirement type="package" version="0.5.0">r-scales</requirement> - <requirement type="package" version="3.4_5">r-seqinr</requirement> - <requirement type="package" version="1.11.4">r-data.table</requirement> - </requirements> - <command interpreter="bash"> - #if str ( $filter_unique.filter_unique_select ) == "remove": - wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast - #else: - wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast - #end if - </command> - <inputs> - <param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" /> - <param name="empty_region_filter" type="select" label="Sequence starts at" help="" > - <option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option> - <option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option> - <option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option> - <option value="FR2">FR2: include CDR2,FR3 in filters</option> - </param> - <param name="functionality" type="select" label="Functionality filter" help="" > - <option value="productive" selected="true">Productive (Productive and Productive see comment)</option> - <option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option> - <option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option> - </param> - <conditional name="filter_unique"> - <param name="filter_unique_select" type="select" label="Filter unique sequences" help="See below for an example."> - <option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option> - <option value="remove_vjaa">Remove uniques (Based on V+J+CDR3 (AA))</option> - <option value="keep">Keep uniques (Based on nucleotide sequence + C)</option> - <option value="no">No</option> - </param> - <when value="remove"> - <param name="filter_unique_clone_count" size="4" type="integer" label="How many sequences should be in a group to keep 1 of them" value="2" min="2"/> - </when> - <when value="keep"></when> - <when value="no"></when> - </conditional> - <param name="unique" type="select" label="Remove duplicates based on" help="" > - <option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option> - <option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option> - <option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option> - <option value="CDR3.IMGT.AA">CDR3 (AA)</option> - - <option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option> - <option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option> - <option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option> - <option value="CDR3.IMGT.seq">CDR3 (nt)</option> - <option value="Sequence.ID" selected="true">Don't remove duplicates</option> - </param> - <conditional name="class_filter_cond"> - <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" > - <option value="70_70" selected="true">>70% class and >70% subclass</option> - <option value="60_55">>60% class and >55% subclass</option> - <option value="70_0">>70% class</option> - <option value="60_0">>60% class</option> - <option value="19_0">>19% class</option> - <option value="101_101">Do not assign (sub)class</option> - </param> - <when value="70_70"></when> - <when value="60_55"></when> - <when value="70_0"></when> - <when value="60_0"></when> - <when value="19_0"></when> - <when value="101_101"></when> - </conditional> - <conditional name="naive_output_cond"> - <param name="naive_output" type="select" label="Output new IMGT archives per class into your history?"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - <when value="yes"></when> - <when value="no"></when> - </conditional> - <param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" > - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - </inputs> - <outputs> - <data format="html" name="out_file" label = "SHM & CSR on ${in_file.name}"/> - <data format="imgt_archive" name="naive_output_ca" label = "Filtered IMGT IGA: ${in_file.name}" > - <filter>naive_output_cond['naive_output'] == "yes"</filter> - <filter>class_filter_cond['class_filter'] != "101_101"</filter> - </data> - <data format="imgt_archive" name="naive_output_cg" label = "Filtered IMGT IGG: ${in_file.name}" > - <filter>naive_output_cond['naive_output'] == "yes"</filter> - <filter>class_filter_cond['class_filter'] != "101_101"</filter> - </data> - <data format="imgt_archive" name="naive_output_cm" label = "Filtered IMGT IGM: ${in_file.name}" > - <filter>naive_output_cond['naive_output'] == "yes"</filter> - <filter>class_filter_cond['class_filter'] != "101_101"</filter> - </data> - <data format="imgt_archive" name="naive_output_ce" label = "Filtered IMGT IGE: ${in_file.name}" > - <filter>naive_output_cond['naive_output'] == "yes"</filter> - <filter>class_filter_cond['class_filter'] != "101_101"</filter> - </data> - <data format="imgt_archive" name="naive_output_all" label = "Filtered IMGT all: ${in_file.name}" > - <filter>naive_output_cond['naive_output'] == "yes"</filter> - <filter>class_filter_cond['class_filter'] == "101_101"</filter> - </data> - </outputs> - <tests> - <test> - <param name="fast" value="yes"/> - <output name="out_file" file="test1.html"/> - </test> - </tests> - <help> -<![CDATA[ -**References** - -Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_] - -.. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457 - -Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_] - -.. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359 - ------ - -**Input files** - -IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu. - -.. class:: infomark - -Note: Files can be uploaded by using “get data†and “upload file†and selecting “IMGT archive“ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names. - ------ - -**Sequence starts at** - -Identifies the region which will be included in the analysis (analysed region) - -- Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded. -- Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded. -- All other filtering/analysis is based on the analysed region. - ------ - -**Functionality filter** - -Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT. - -**Filter unique sequences** - -*Remove unique:* - - -This filter consists of two different steps. - -Step 1: removes all sequences of which the nucleotide sequence in the “analysed region†and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step. - -Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class). - -.. class:: infomark - -This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes. - -*Keep unique:* - -Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). - -Example of the sequences that are included using either the “remove unique filter†or the “keep unique filter†- -+--------------------------+ -| unique filter | -+--------+--------+--------+ -| values | remove | keep | -+--------+--------+--------+ -| A | A | A | -+--------+--------+--------+ -| A | B | B | -+--------+--------+--------+ -| B | D | C | -+--------+--------+--------+ -| B | | D | -+--------+--------+--------+ -| C | | | -+--------+--------+--------+ -| D | | | -+--------+--------+--------+ -| D | | | -+--------+--------+--------+ - ------ - -**Remove duplicates based on** - -Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen. - -.. class:: infomark - -Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results. - ------ - -**Human Class/Subclass filter** - -.. class:: warningmark - -Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline. - -The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below). - -The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage. - -*Chunk hit percentage*: The percentage of the chunks that is aligned - -*Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass. -The option “>25% class†can be chosen when you only are interested in the class (Cα/Cγ/Cµ/Cɛ) of your sequences and the length of your sequence is not long enough to assign the subclasses. - ------ - -**Output new IMGT archives per class into your history?** - -If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline. - ------ - -**Execute** - -Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found. - -]]> - </help> - <citations> - <citation type="doi">10.1093/nar/gks457</citation> - <citation type="doi">10.1093/bioinformatics/btv359</citation> - </citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/.gitattributes Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,2 @@ +# Auto detect text files and perform LF normalization +* text=auto
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/.gitignore Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,4 @@ + +shm_csr\.tar\.gz + +\.vscode/settings\.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/LICENSE Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 david + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/README.md Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,13 @@ +# SHM CSR + +Somatic hypermutation and class switch recombination pipeline. +The docker version can be found [here](https://github.com/ErasmusMC-Bioinformatics/ARGalaxy-docker). + +# Dependencies +-------------------- +[Python 2.7](https://www.python.org/) +[Change-O](https://changeo.readthedocs.io/en/version-0.4.4/) +[Baseline](http://selection.med.yale.edu/baseline/) +[R data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf) +[R ggplot2](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf) +[R reshape2](https://cran.r-project.org/web/packages/reshape/reshape.pdf)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/aa_histogram.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,69 @@ +library(ggplot2) + +args <- commandArgs(trailingOnly = TRUE) + +mutations.by.id.file = args[1] +absent.aa.by.id.file = args[2] +genes = strsplit(args[3], ",")[[1]] +genes = c(genes, "") +outdir = args[4] + + +print("---------------- read input ----------------") + +mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") +absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") + +for(gene in genes){ + graph.title = paste(gene, "AA mutation frequency") + if(gene == ""){ + mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] + + graph.title = "AA mutation frequency all" + } else { + mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] + } + print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) + if(nrow(mutations.by.id.gene) == 0){ + next + } + + mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) + aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) + + dat_freq = mutations.at.position / aa.at.position + dat_freq[is.na(dat_freq)] = 0 + dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) + + + print("---------------- plot ----------------") + + m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black")) + m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) + m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") + m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") + m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") + m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") + m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") + m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) + m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) + #m = m + scale_colour_manual(values=c("black")) + + print("---------------- write/print ----------------") + + + dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) + + write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + + png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) + print(m) + dev.off() + + ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/Baseline_Functions.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,2287 @@ +######################################################################################### +# License Agreement +# +# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE +# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER +# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE +# OR COPYRIGHT LAW IS PROHIBITED. +# +# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE +# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED +# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN +# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. +# +# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences +# Coded by: Mohamed Uduman & Gur Yaari +# Copyright 2012 Kleinstein Lab +# Version: 1.3 (01/23/2014) +######################################################################################### + +# Global variables + + FILTER_BY_MUTATIONS = 1000 + + # Nucleotides + NUCLEOTIDES = c("A","C","G","T") + + # Amino Acids + AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G") + names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG") + names(AMINO_ACIDS) <- names(AMINO_ACIDS) + + #Amino Acid Traits + #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y" + #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface") + TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N") + names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS)) + TRAITS_AMINO_ACIDS <- array(NA,21) + + # Codon Table + CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12)) + + # Substitution Model: Smith DS et al. 1996 + substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + load("FiveS_Substitution.RData") + + # Mutability Models: Shapiro GS et al. 2002 + triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T) + triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T) + triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT") + load("FiveS_Mutability.RData") + +# Functions + + # Translate codon to amino acid + translateCodonToAminoAcid<-function(Codon){ + return(AMINO_ACIDS[Codon]) + } + + # Translate amino acid to trait change + translateAminoAcidToTraitChange<-function(AminoAcid){ + return(TRAITS_AMINO_ACIDS[AminoAcid]) + } + + # Initialize Amino Acid Trait Changes + initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){ + if(!is.null(traitChangeFileName)){ + tryCatch( + traitChange <- read.delim(traitChangeFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading trait changes. Please check file name/path and format.\n") + q() + } + ) + }else{ + traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98 + } + TRAITS_AMINO_ACIDS <<- traitChange + } + + # Read in formatted nucleotide substitution matrix + initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){ + if(!is.null(subsMatFileName)){ + tryCatch( + subsMat <- read.delim(subsMatFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading substitution matrix. Please check file name/path and format.\n") + q() + } + ) + if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x))) + }else{ + if(substitutionModel==1)subsMat <- substitution_Literature_Mouse + if(substitutionModel==2)subsMat <- substitution_Flu_Human + if(substitutionModel==3)subsMat <- substitution_Flu25_Human + + } + + if(substitutionModel==0){ + subsMat <- matrix(1,4,4) + subsMat[,] = 1/3 + subsMat[1,1] = 0 + subsMat[2,2] = 0 + subsMat[3,3] = 0 + subsMat[4,4] = 0 + } + + + NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-") + if(substitutionModel==5){ + subsMat <- FiveS_Substitution + return(subsMat) + }else{ + subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4)) + return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) ) + } + } + + + # Read in formatted Mutability file + initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){ + if(!is.null(mutabilityMatFileName)){ + tryCatch( + mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading mutability matrix. Please check file name/path and format.\n") + q() + } + ) + }else{ + mutabilityMat <- triMutability_Literature_Human + if(species==2) mutabilityMat <- triMutability_Literature_Mouse + } + + if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)} + + if(mutabilityModel==5){ + mutabilityMat <- FiveS_Mutability + return(mutabilityMat) + }else{ + return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) ) + } + } + + # Read FASTA file formats + # Modified from read.fasta from the seqinR package + baseline.read.fasta <- + function (file = system.file("sequences/sample.fasta", package = "seqinr"), + seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, + set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, + strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong, + endian = .Platform$endian, apply.mask = TRUE) + { + seqtype <- match.arg(seqtype) + + lines <- readLines(file) + + if (legacy.mode) { + comments <- grep("^;", lines) + if (length(comments) > 0) + lines <- lines[-comments] + } + + + ind_groups<-which(substr(lines, 1L, 3L) == ">>>") + lines_mod<-lines + + if(!length(ind_groups)){ + lines_mod<-c(">>>All sequences combined",lines) + } + + ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>") + + lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod))) + id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1 + lines[id] <- "THIS IS A FAKE SEQUENCE" + lines[-id] <- lines_mod + rm(lines_mod) + + ind <- which(substr(lines, 1L, 1L) == ">") + nseq <- length(ind) + if (nseq == 0) { + stop("no line starting with a > character found") + } + start <- ind + 1 + end <- ind - 1 + + while( any(which(ind%in%end)) ){ + ind=ind[-which(ind%in%end)] + nseq <- length(ind) + if (nseq == 0) { + stop("no line starting with a > character found") + } + start <- ind + 1 + end <- ind - 1 + } + + end <- c(end[-1], length(lines)) + sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = "")) + if (seqonly) + return(sequences) + nomseq <- lapply(seq_len(nseq), function(i) { + + #firstword <- strsplit(lines[ind[i]], " ")[[1]][1] + substr(lines[ind[i]], 2, nchar(lines[ind[i]])) + + }) + if (seqtype == "DNA") { + if (forceDNAtolower) { + sequences <- as.list(tolower(chartr(".","-",sequences))) + }else{ + sequences <- as.list(toupper(chartr(".","-",sequences))) + } + } + if (as.string == FALSE) + sequences <- lapply(sequences, s2c) + if (set.attributes) { + for (i in seq_len(nseq)) { + Annot <- lines[ind[i]] + if (strip.desc) + Annot <- substr(Annot, 2L, nchar(Annot)) + attributes(sequences[[i]]) <- list(name = nomseq[[i]], + Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", + DNA = "SeqFastadna")) + } + } + names(sequences) <- nomseq + return(sequences) + } + + + # Replaces non FASTA characters in input files with N + replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){ + gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE) + } + + # Find the germlines in the FASTA list + germlinesInFile <- function(seqIDs){ + firstChar = sapply(seqIDs,function(x){substr(x,1,1)}) + secondChar = sapply(seqIDs,function(x){substr(x,2,2)}) + return(firstChar==">" & secondChar!=">") + } + + # Find the groups in the FASTA list + groupsInFile <- function(seqIDs){ + sapply(seqIDs,function(x){substr(x,1,2)})==">>" + } + + # In the process of finding germlines/groups, expand from the start to end of the group + expandTillNext <- function(vecPosToID){ + IDs = names(vecPosToID) + posOfInterests = which(vecPosToID) + + expandedID = rep(NA,length(IDs)) + expandedIDNames = gsub(">","",IDs[posOfInterests]) + startIndexes = c(1,posOfInterests[-1]) + stopIndexes = c(posOfInterests[-1]-1,length(IDs)) + expandedID = unlist(sapply(1:length(startIndexes),function(i){ + rep(i,stopIndexes[i]-startIndexes[i]+1) + })) + names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){ + rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1) + })) + return(expandedID) + } + + # Process FASTA (list) to return a matrix[input, germline) + processInputAdvanced <- function(inputFASTA){ + + seqIDs = names(inputFASTA) + numbSeqs = length(seqIDs) + posGermlines1 = germlinesInFile(seqIDs) + numbGermlines = sum(posGermlines1) + posGroups1 = groupsInFile(seqIDs) + numbGroups = sum(posGroups1) + consDef = NA + + if(numbGermlines==0){ + posGermlines = 2 + numbGermlines = 1 + } + + glPositionsSum = cumsum(posGermlines1) + glPositions = table(glPositionsSum) + #Find the position of the conservation row + consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1 + if( length(consDefPos)> 0 ){ + consDefID = match(consDefPos, glPositionsSum) + #The coservation rows need to be pulled out and stores seperately + consDef = inputFASTA[consDefID] + inputFASTA = inputFASTA[-consDefID] + + seqIDs = names(inputFASTA) + numbSeqs = length(seqIDs) + posGermlines1 = germlinesInFile(seqIDs) + numbGermlines = sum(posGermlines1) + posGroups1 = groupsInFile(seqIDs) + numbGroups = sum(posGroups1) + if(numbGermlines==0){ + posGermlines = 2 + numbGermlines = 1 + } + } + + posGroups <- expandTillNext(posGroups1) + posGermlines <- expandTillNext(posGermlines1) + posGermlines[posGroups1] = 0 + names(posGermlines)[posGroups1] = names(posGroups)[posGroups1] + posInput = rep(TRUE,numbSeqs) + posInput[posGroups1 | posGermlines1] = FALSE + + matInput = matrix(NA, nrow=sum(posInput), ncol=2) + rownames(matInput) = seqIDs[posInput] + colnames(matInput) = c("Input","Germline") + + vecInputFASTA = unlist(inputFASTA) + matInput[,1] = vecInputFASTA[posInput] + matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ] + + germlines = posGermlines[posInput] + groups = posGroups[posInput] + + return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef )) + } + + + # Replace leading and trailing dashes in the sequence + replaceLeadingTrailingDashes <- function(x,readEnd){ + iiGap = unlist(gregexpr("-",x[1])) + ggGap = unlist(gregexpr("-",x[2])) + #posToChange = intersect(iiGap,ggGap) + + + seqIn = replaceLeadingTrailingDashesHelper(x[1]) + seqGL = replaceLeadingTrailingDashesHelper(x[2]) + seqTemplate = rep('N',readEnd) + seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd]) + seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd]) +# if(posToChange!=-1){ +# seqIn[posToChange] = "-" +# seqGL[posToChange] = "-" +# } + + seqIn = c2s(seqIn[1:readEnd]) + seqGL = c2s(seqGL[1:readEnd]) + + lenGL = nchar(seqGL) + if(lenGL<readEnd){ + seqGL = paste(seqGL,c2s(rep("N",readEnd-lenGL)),sep="") + } + + lenInput = nchar(seqIn) + if(lenInput<readEnd){ + seqIn = paste(seqIn,c2s(rep("N",readEnd-lenInput)),sep="") + } + return( c(seqIn,seqGL) ) + } + + replaceLeadingTrailingDashesHelper <- function(x){ + grepResults = gregexpr("-*",x) + grepResultsPos = unlist(grepResults) + grepResultsLen = attr(grepResults[[1]],"match.length") + #print(paste("x = '", x, "'", sep="")) + x = s2c(x) + if(x[1]=="-"){ + x[1:grepResultsLen[1]] = "N" + } + if(x[length(x)]=="-"){ + x[(length(x)-grepResultsLen[length(grepResultsLen)]+1):length(x)] = "N" + } + return(x) + } + + + + + # Check sequences for indels + checkForInDels <- function(matInputP){ + insPos <- checkInsertion(matInputP) + delPos <- checkDeletions(matInputP) + return(list("Insertions"=insPos, "Deletions"=delPos)) + } + + # Check sequences for insertions + checkInsertion <- function(matInputP){ + insertionCheck = apply( matInputP,1, function(x){ + inputGaps <- as.vector( gregexpr("-",x[1])[[1]] ) + glGaps <- as.vector( gregexpr("-",x[2])[[1]] ) + return( is.finite( match(FALSE, glGaps%in%inputGaps ) ) ) + }) + return(as.vector(insertionCheck)) + } + # Fix inserstions + fixInsertions <- function(matInputP){ + insPos <- checkInsertion(matInputP) + sapply((1:nrow(matInputP))[insPos],function(rowIndex){ + x <- matInputP[rowIndex,] + inputGaps <- gregexpr("-",x[1])[[1]] + glGaps <- gregexpr("-",x[2])[[1]] + posInsertions <- glGaps[!(glGaps%in%inputGaps)] + inputInsertionToN <- s2c(x[2]) + inputInsertionToN[posInsertions]!="-" + inputInsertionToN[posInsertions] <- "N" + inputInsertionToN <- c2s(inputInsertionToN) + matInput[rowIndex,2] <<- inputInsertionToN + }) + return(insPos) + } + + # Check sequences for deletions + checkDeletions <-function(matInputP){ + deletionCheck = apply( matInputP,1, function(x){ + inputGaps <- as.vector( gregexpr("-",x[1])[[1]] ) + glGaps <- as.vector( gregexpr("-",x[2])[[1]] ) + return( is.finite( match(FALSE, inputGaps%in%glGaps ) ) ) + }) + return(as.vector(deletionCheck)) + } + # Fix sequences with deletions + fixDeletions <- function(matInputP){ + delPos <- checkDeletions(matInputP) + sapply((1:nrow(matInputP))[delPos],function(rowIndex){ + x <- matInputP[rowIndex,] + inputGaps <- gregexpr("-",x[1])[[1]] + glGaps <- gregexpr("-",x[2])[[1]] + posDeletions <- inputGaps[!(inputGaps%in%glGaps)] + inputDeletionToN <- s2c(x[1]) + inputDeletionToN[posDeletions] <- "N" + inputDeletionToN <- c2s(inputDeletionToN) + matInput[rowIndex,1] <<- inputDeletionToN + }) + return(delPos) + } + + + # Trim DNA sequence to the last codon + trimToLastCodon <- function(seqToTrim){ + seqLen = nchar(seqToTrim) + trimmedSeq = s2c(seqToTrim) + poi = seqLen + tailLen = 0 + + while(trimmedSeq[poi]=="-" || trimmedSeq[poi]=="."){ + tailLen = tailLen + 1 + poi = poi - 1 + } + + trimmedSeq = c2s(trimmedSeq[1:(seqLen-tailLen)]) + seqLen = nchar(trimmedSeq) + # Trim sequence to last codon + if( getCodonPos(seqLen)[3] > seqLen ) + trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) ) + + return(trimmedSeq) + } + + # Given a nuclotide position, returns the pos of the 3 nucs that made the codon + # e.g. nuc 86 is part of nucs 85,86,87 + getCodonPos <- function(nucPos){ + codonNum = (ceiling(nucPos/3))*3 + return( (codonNum-2):codonNum) + } + + # Given a nuclotide position, returns the codon number + # e.g. nuc 86 = codon 29 + getCodonNumb <- function(nucPos){ + return( ceiling(nucPos/3) ) + } + + # Given a codon, returns all the nuc positions that make the codon + getCodonNucs <- function(codonNumb){ + getCodonPos(codonNumb*3) + } + + computeCodonTable <- function(testID=1){ + + if(testID<=4){ + # Pre-compute every codons + intCounter = 1 + for(pOne in NUCLEOTIDES){ + for(pTwo in NUCLEOTIDES){ + for(pThree in NUCLEOTIDES){ + codon = paste(pOne,pTwo,pThree,sep="") + colnames(CODON_TABLE)[intCounter] = codon + intCounter = intCounter + 1 + CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12))) + } + } + } + chars = c("N","A","C","G","T", "-") + for(a in chars){ + for(b in chars){ + for(c in chars){ + if(a=="N" | b=="N" | c=="N"){ + #cat(paste(a,b,c),sep="","\n") + CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) + } + } + } + } + + chars = c("-","A","C","G","T") + for(a in chars){ + for(b in chars){ + for(c in chars){ + if(a=="-" | b=="-" | c=="-"){ + #cat(paste(a,b,c),sep="","\n") + CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) + } + } + } + } + CODON_TABLE <<- as.matrix(CODON_TABLE) + } + } + + collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){ + #print(length(vecInputSeqs)) + vecInputSeqs = unique(vecInputSeqs) + if(length(vecInputSeqs)==1){ + return( list( c(vecInputSeqs,glSeq), F) ) + }else{ + charInputSeqs <- sapply(vecInputSeqs, function(x){ + s2c(x)[1:readEnd] + }) + charGLSeq <- s2c(glSeq) + matClone <- sapply(1:readEnd, function(i){ + posNucs = unique(charInputSeqs[i,]) + posGL = charGLSeq[i] + error = FALSE + if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){ + return(c("-",error)) + } + if(length(posNucs)==1) + return(c(posNucs[1],error)) + else{ + if("N"%in%posNucs){ + error=TRUE + } + if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){ + return( c(posGL,error) ) + }else{ + #return( c(sample(posNucs[posNucs!="N"],1),error) ) + if(nonTerminalOnly==0){ + return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) ) + }else{ + posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL] + posNucsTable = table(posNucs) + if(sum(posNucsTable>1)==0){ + return( c(posGL,error) ) + }else{ + return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) ) + } + } + + } + } + }) + + + #print(length(vecInputSeqs)) + return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,])) + } + } + + # Compute the expected for each sequence-germline pair + getExpectedIndividual <- function(matInput){ + if( any(grep("multicore",search())) ){ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){ + computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) + }) + + ul_LisGLs_Exp = unlist(LisGLs_Exp) + return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) + }else{ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){ + computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) + }) + + ul_LisGLs_Exp = unlist(LisGLs_Exp) + return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) + + } + } + + # Compute mutabilities of sequence based on the tri-nucleotide model + computeMutabilities <- function(paramSeq){ + seqLen = nchar(paramSeq) + seqMutabilites = rep(NA,seqLen) + + gaplessSeq = gsub("-", "", paramSeq) + gaplessSeqLen = nchar(gaplessSeq) + gaplessSeqMutabilites = rep(NA,gaplessSeqLen) + + if(mutabilityModel!=5){ + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqMutabilites[pos] = + tapply( c( + getMutability( substr(subSeq,1,3), 3) , + getMutability( substr(subSeq,2,4), 2), + getMutability( substr(subSeq,3,5), 1) + ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE + ) + #Pos 1 + subSeq = substr(gaplessSeq,1,3) + gaplessSeqMutabilites[1] = getMutability(subSeq , 1) + #Pos 2 + subSeq = substr(gaplessSeq,1,4) + gaplessSeqMutabilites[2] = mean( c( + getMutability( substr(subSeq,1,3), 2) , + getMutability( substr(subSeq,2,4), 1) + ),na.rm=T + ) + seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites + return(seqMutabilites) + }else{ + + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T) + seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites + return(seqMutabilites) + } + + } + + # Returns the mutability of a triplet at a given position + getMutability <- function(codon, pos=1:3){ + triplets <- rownames(mutability) + mutability[ match(codon,triplets) ,pos] + } + + getMutability5 <- function(fivemer){ + return(mutability[fivemer]) + } + + # Returns the substitution probabilty + getTransistionProb <- function(nuc){ + substitution[nuc,] + } + + getTransistionProb5 <- function(fivemer){ + if(any(which(fivemer==colnames(substitution)))){ + return(substitution[,fivemer]) + }else{ + return(array(NA,4)) + } + } + + # Given a nuc, returns the other 3 nucs it can mutate to + canMutateTo <- function(nuc){ + NUCLEOTIDES[- which(NUCLEOTIDES==nuc)] + } + + # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to + canMutateToProb <- function(nuc){ + substitution[nuc,canMutateTo(nuc)] + } + + # Compute targeting, based on precomputed mutatbility & substitution + computeTargeting <- function(param_strSeq,param_vecMutabilities){ + + if(substitutionModel!=5){ + vecSeq = s2c(param_strSeq) + matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } ) + #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } ) + dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq))) + return (matTargeting) + }else{ + + seqLen = nchar(param_strSeq) + seqsubstitution = matrix(NA,ncol=seqLen,nrow=4) + paramSeq <- param_strSeq + gaplessSeq = gsub("-", "", paramSeq) + gaplessSeqLen = nchar(gaplessSeq) + gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4) + + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T) + seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution + #matTargeting <- param_vecMutabilities %*% seqsubstitution + matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`) + dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen)) + return (matTargeting) + } + } + + # Compute the mutations types + computeMutationTypes <- function(param_strSeq){ + #cat(param_strSeq,"\n") + #vecSeq = trimToLastCodon(param_strSeq) + lenSeq = nchar(param_strSeq) + vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)}) + matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F) + dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes))) + return(matMutationTypes) + } + computeMutationTypesFast <- function(param_strSeq){ + matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F) + #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq))) + return(matMutationTypes) + } + mutationTypeOptimized <- function( matOfCodons ){ + apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) + } + + # Returns a vector of codons 1 mutation away from the given codon + permutateAllCodon <- function(codon){ + cCodon = s2c(codon) + matCodons = t(array(cCodon,dim=c(3,12))) + matCodons[1:4,1] = NUCLEOTIDES + matCodons[5:8,2] = NUCLEOTIDES + matCodons[9:12,3] = NUCLEOTIDES + apply(matCodons,1,c2s) + } + + # Given two codons, tells you if the mutation is R or S (based on your definition) + mutationType <- function(codonFrom,codonTo){ + if(testID==4){ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + mutationType = "S" + if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){ + mutationType = "R" + } + if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ + mutationType = "Stop" + } + return(mutationType) + } + }else if(testID==5){ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + if(codonFrom==codonTo){ + mutationType = "S" + }else{ + codonFrom = s2c(codonFrom) + codonTo = s2c(codonTo) + mutationType = "Stop" + nucOfI = codonFrom[which(codonTo!=codonFrom)] + if(nucOfI=="C"){ + mutationType = "R" + }else if(nucOfI=="G"){ + mutationType = "S" + } + } + return(mutationType) + } + }else{ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + mutationType = "S" + if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){ + mutationType = "R" + } + if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ + mutationType = "Stop" + } + return(mutationType) + } + } + } + + + #given a mat of targeting & it's corresponding mutationtypes returns + #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR + computeExpected <- function(paramTargeting,paramMutationTypes){ + # Replacements + RPos = which(paramMutationTypes=="R") + #FWR + Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T) + #CDR + Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T) + # Silents + SPos = which(paramMutationTypes=="S") + #FWR + Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T) + #CDR + Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T) + + return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR)) + } + + # Count the mutations in a sequence + # each mutation is treated independently + analyzeMutations2NucUri_website <- function( rev_in_matrix ){ + paramGL = rev_in_matrix[2,] + paramSeq = rev_in_matrix[1,] + + #Fill seq with GL seq if gapped + #if( any(paramSeq=="-") ){ + # gapPos_Seq = which(paramSeq=="-") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + + + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + + analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) ) + + } + + #1 = GL + #2 = Seq + analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){ + paramGL = in_matrix[2,] + paramSeq = in_matrix[1,] + paramSeqUri = paramGL + #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]}) + mutations_val = paramGL != paramSeq + if(any(mutations_val)){ + mutationPos = {1:length(mutations_val)}[mutations_val] + mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + if(any(mutationPos)){ + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + } + if(any(!is.na(mutationInfo))){ + return(mutationInfo[!is.na(mutationInfo)]) + }else{ + return(NA) + } + + + }else{ + return (NA) + } + } + + processNucMutations2 <- function(mu){ + if(!is.na(mu)){ + #R + if(any(mu=="R")){ + Rs = mu[mu=="R"] + nucNumbs = as.numeric(names(Rs)) + R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) + R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) + }else{ + R_CDR = 0 + R_FWR = 0 + } + + #S + if(any(mu=="S")){ + Ss = mu[mu=="S"] + nucNumbs = as.numeric(names(Ss)) + S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) + S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) + }else{ + S_CDR = 0 + S_FWR = 0 + } + + + retVec = c(R_CDR,S_CDR,R_FWR,S_FWR) + retVec[is.na(retVec)]=0 + return(retVec) + }else{ + return(rep(0,4)) + } + } + + + ## Z-score Test + computeZScore <- function(mat, test="Focused"){ + matRes <- matrix(NA,ncol=2,nrow=(nrow(mat))) + if(test=="Focused"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + if(test=="Local"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + if(test=="Imbalanced"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))}) + R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))}) + R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + matRes[is.nan(matRes)] = NA + return(matRes) + } + + # Return a p-value for a z-score + z2p <- function(z){ + p=NA + if( !is.nan(z) && !is.na(z)){ + if(z>0){ + p = (1 - pnorm(z,0,1)) + } else if(z<0){ + p = (-1 * pnorm(z,0,1)) + } else{ + p = 0.5 + } + }else{ + p = NA + } + return(p) + } + + + ## Bayesian Test + + # Fitted parameter for the bayesian framework +BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986) + CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2)) + + # Given x, M & p, returns a pdf + calculate_bayes <- function ( x=3, N=10, p=0.33, + i=CONST_i, + max_sigma=20,length_sigma=4001 + ){ + if(!0%in%N){ + G <- max(length(x),length(N),length(p)) + x=array(x,dim=G) + N=array(N,dim=G) + p=array(p,dim=G) + sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma) + sigma_1<-log({i/{1-i}}/{p/{1-p}}) + index<-min(N,60) + y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1)) + if(!sum(is.na(y))){ + tmp<-approx(sigma_1,y,sigma_s)$y + tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}} + }else{ + return(NA) + } + }else{ + return(NA) + } + } + # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection + computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){ + flagOneSeq = F + if(nrow(mat)==1){ + mat=rbind(mat,mat) + flagOneSeq = T + } + if(test=="Focused"){ + #CDR + P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0) + X = c(mat[,1],0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0) + X = c(mat[,3],0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="Local"){ + #CDR + P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0) + X = c(mat[,1],0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0) + X = c(mat[,3],0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="Imbalanced"){ + #CDR + P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5) + N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5) + N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="ImbalancedSilent"){ + #CDR + P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5) + N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5) + N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(flagOneSeq==T){ + bayesCDR = bayesCDR[1] + bayesFWR = bayesFWR[1] + } + return( list("CDR"=bayesCDR, "FWR"=bayesFWR) ) + } + + ##Covolution + break2chunks<-function(G=1000){ + base<-2^round(log(sqrt(G),2),0) + return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base))) + } + + PowersOfTwo <- function(G=100){ + exponents <- array() + i = 0 + while(G > 0){ + i=i+1 + exponents[i] <- floor( log2(G) ) + G <- G-2^exponents[i] + } + return(exponents) + } + + convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){ + G = ncol(cons) + if(G>1){ + for(gen in log(G,2):1){ + ll<-seq(from=2,to=2^gen,by=2) + sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)}) + } + } + return( cons[,1] ) + } + + convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){ + if(length(ncol(cons))) G<-ncol(cons) + groups <- PowersOfTwo(G) + matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G ) + startIndex = 1 + for( i in 1:length(groups) ){ + stopIndex <- 2^groups[i] + startIndex - 1 + if(stopIndex!=startIndex){ + matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma ) + startIndex = stopIndex + 1 + } + else { + if(G>1) matG[,i] <- cons[,startIndex:stopIndex] + else matG[,i] <- cons + #startIndex = stopIndex + 1 + } + } + return( list( matG, groups ) ) + } + + weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ + lx<-length(x) + ly<-length(y) + if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){ + if(w<1){ + y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y + x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y + lx<-length(x1) + ly<-length(y1) + } + else { + y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y + x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y + lx<-length(x1) + ly<-length(y1) + } + } + else{ + x1<-x + y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y + ly<-length(y1) + } + tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y + tmp[tmp<=0] = 0 + return(tmp/sum(tmp)) + } + + calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ + matG <- listMatG[[1]] + groups <- listMatG[[2]] + i = 1 + resConv <- matG[,i] + denom <- 2^groups[i] + if(length(groups)>1){ + while( i<length(groups) ){ + i = i + 1 + resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma) + #cat({{2^groups[i]}/denom},"\n") + denom <- denom + 2^groups[i] + } + } + return(resConv) + } + + # Given a list of PDFs, returns a convoluted PDF + groupPosteriors <- function( listPosteriors, max_sigma=20, length_sigma=4001 ,Threshold=2 ){ + listPosteriors = listPosteriors[ !is.na(listPosteriors) ] + Length_Postrior<-length(listPosteriors) + if(Length_Postrior>1 & Length_Postrior<=Threshold){ + cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) + listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) + y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + }else if(Length_Postrior==1) return(listPosteriors[[1]]) + else if(Length_Postrior==0) return(NA) + else { + cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) + y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma ) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + } + } + + fastConv<-function(cons, max_sigma=20, length_sigma=4001){ + chunks<-break2chunks(G=ncol(cons)) + if(ncol(cons)==3) chunks<-2:1 + index_chunks_end <- cumsum(chunks) + index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1) + index_chunks <- cbind(index_chunks_start,index_chunks_end) + + case <- sum(chunks!=chunks[1]) + if(case==1) End <- max(1,((length(index_chunks)/2)-1)) + else End <- max(1,((length(index_chunks)/2))) + + firsts <- sapply(1:End,function(i){ + indexes<-index_chunks[i,1]:index_chunks[i,2] + convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]] + }) + if(case==0){ + result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) ) + }else if(case==1){ + last<-list(calculate_bayesGHelper( + convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] ) + ),0) + result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts)) + result<-calculate_bayesGHelper( + list( + cbind( + result_first,last[[1]]), + c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2)) + ) + ) + } + return(as.vector(result)) + } + + # Computes the 95% CI for a pdf + calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ + if(length(Pdf)!=length_sigma) return(NA) + sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) + cdf = cumsum(Pdf) + cdf = cdf/cdf[length(cdf)] + return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) + } + + # Computes a mean for a pdf + calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){ + if(length(Pdf)!=length_sigma) return(NA) + sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) + norm = {length_sigma-1}/2/max_sigma + return( (Pdf%*%sigma_s/norm) ) + } + + # Returns the mean, and the 95% CI for a pdf + calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ + if(is.na(Pdf)) + return(rep(NA,3)) + bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma) + bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma) + return(c(bMean, bCI)) + } + + # Computes the p-value of a pdf + computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){ + if(length(Pdf)>1){ + norm = {length_sigma-1}/2/max_sigma + pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm + if(pVal>0.5){ + pVal = pVal-1 + } + return(pVal) + }else{ + return(NA) + } + } + + # Compute p-value of two distributions + compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ + #print(c(length(dens1),length(dens2))) + if(length(dens1)>1 & length(dens2)>1 ){ + dens1<-dens1/sum(dens1) + dens2<-dens2/sum(dens2) + cum2 <- cumsum(dens2)-dens2/2 + tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) + #print(tmp) + if(tmp>0.5)tmp<-tmp-1 + return( tmp ) + } + else { + return(NA) + } + #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) + } + + # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations) + numberOfSeqsWithMutations <- function(matMutations,test=1){ + if(test==4)test=2 + cdrSeqs <- 0 + fwrSeqs <- 0 + if(test==1){#focused + cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) }) + fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) }) + if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) + if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) + } + if(test==2){#local + cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) }) + fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) }) + if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) + if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) + } + return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs)) +} + + + +shadeColor <- function(sigmaVal=NA,pVal=NA){ + if(is.na(sigmaVal) & is.na(pVal)) return(NA) + if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal) + if(is.na(pVal) || pVal==1 || pVal==0){ + returnColor = "#FFFFFF"; + }else{ + colVal=abs(pVal); + + if(sigmaVal<0){ + if(colVal>0.1) + returnColor = "#CCFFCC"; + if(colVal<=0.1) + returnColor = "#99FF99"; + if(colVal<=0.050) + returnColor = "#66FF66"; + if(colVal<=0.010) + returnColor = "#33FF33"; + if(colVal<=0.005) + returnColor = "#00FF00"; + + }else{ + if(colVal>0.1) + returnColor = "#FFCCCC"; + if(colVal<=0.1) + returnColor = "#FF9999"; + if(colVal<=0.05) + returnColor = "#FF6666"; + if(colVal<=0.01) + returnColor = "#FF3333"; + if(colVal<0.005) + returnColor = "#FF0000"; + } + } + + return(returnColor) +} + + + +plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){ + if(!log){ + x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac + y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac + }else { + if(log==2){ + x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac + y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) + } + if(log==1){ + x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) + y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac + } + if(log==3){ + x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) + y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) + } + } + return(c("x"=x,"y"=y)) +} + +# SHMulation + + # Based on targeting, introduce a single mutation & then update the targeting + oneMutation <- function(){ + # Pick a postion + mutation + posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting)) + posNucNumb = ceiling(posMutation/4) # Nucleotide number + posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to + + #mutate the simulation sequence + seqSimVec <- s2c(seqSim) + seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind] + seqSim <<- c2s(seqSimVec) + + #update Mutability, Targeting & MutationsTypes + updateMutabilityNTargeting(posNucNumb) + + #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) + return(posNucNumb) + } + + updateMutabilityNTargeting <- function(position){ + min_i<-max((position-2),1) + max_i<-min((position+2),nchar(seqSim)) + min_ii<-min(min_i,3) + + #mutability - update locally + seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)] + + + #targeting - compute locally + seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i]) + seqTargeting[is.na(seqTargeting)] <<- 0 + #mutCodonPos = getCodonPos(position) + mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3]) + #cat(mutCodonPos,"\n") + mutTypeCodon = getCodonPos(position) + seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) + # Stop = 0 + if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){ + seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0 + } + + + #Selection + selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R")) + # CDR + selectedCDR = selectedPos[which(matCDR[selectedPos]==T)] + seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR) + seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K + + # FWR + selectedFWR = selectedPos[which(matFWR[selectedPos]==T)] + seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR) + seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K + + } + + + + # Validate the mutation: if the mutation has not been sampled before validate it, else discard it. + validateMutation <- function(){ + if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation + uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1 + mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos + }else{ + if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation + mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)] + uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1 + } + } + } + + + + # Places text (labels) at normalized coordinates + myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){ + par(xpd=TRUE) + if(!log) + text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol) + else { + if(log==2) + text( + par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac, + 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), + w,cex=cex,adj=adj,col=thecol) + if(log==1) + text( + 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), + par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac, + w,cex=cex,adj=adj,col=thecol) + if(log==3) + text( + 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), + 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), + w,cex=cex,adj=adj,col=thecol) + } + par(xpd=FALSE) + } + + + + # Count the mutations in a sequence + analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){ + + paramGL = s2c(matInput[inputMatrixIndex,2]) + paramSeq = s2c(matInput[inputMatrixIndex,1]) + + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + mutations_val = paramGL != paramSeq + + if(any(mutations_val)){ + mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + codonSeqWhole = paramSeq[pos_array] + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + + mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfoWhole) = mutationPos + + mutationInfo <- mutationInfo[!is.na(mutationInfo)] + mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] + + if(any(!is.na(mutationInfo))){ + + #Filter based on Stop (at the codon level) + if(seqWithStops==1){ + nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) + mutationInfo = mutationInfo[nucleotidesAtStopCodons] + mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] + }else{ + countStops = sum(mutationInfoWhole=="Stop") + if(seqWithStops==2 & countStops==0) mutationInfo = NA + if(seqWithStops==3 & countStops>0) mutationInfo = NA + } + + if(any(!is.na(mutationInfo))){ + #Filter mutations based on multipleMutation + if(multipleMutation==1 & !is.na(mutationInfo)){ + mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) + tableMutationCodons <- table(mutationCodons) + codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) + if(any(codonsWithMultipleMutations)){ + #remove the nucleotide mutations in the codons with multiple mutations + mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] + #replace those codons with Ns in the input sequence + paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" + matInput[inputMatrixIndex,1] <<- c2s(paramSeq) + } + } + + #Filter mutations based on the model + if(any(mutationInfo)==T | is.na(any(mutationInfo))){ + + if(model==1 & !is.na(mutationInfo)){ + mutationInfo <- mutationInfo[mutationInfo=="S"] + } + if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo) + else return(NA) + }else{ + return(NA) + } + }else{ + return(NA) + } + + + }else{ + return(NA) + } + + + }else{ + return (NA) + } + } + + analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){ + + paramGL = s2c(inputArray[2]) + paramSeq = s2c(inputArray[1]) + inputSeq <- inputArray[1] + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + mutations_val = paramGL != paramSeq + + if(any(mutations_val)){ + mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + codonSeqWhole = paramSeq[pos_array] + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + + mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfoWhole) = mutationPos + + mutationInfo <- mutationInfo[!is.na(mutationInfo)] + mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] + + if(any(!is.na(mutationInfo))){ + + #Filter based on Stop (at the codon level) + if(seqWithStops==1){ + nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) + mutationInfo = mutationInfo[nucleotidesAtStopCodons] + mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] + }else{ + countStops = sum(mutationInfoWhole=="Stop") + if(seqWithStops==2 & countStops==0) mutationInfo = NA + if(seqWithStops==3 & countStops>0) mutationInfo = NA + } + + if(any(!is.na(mutationInfo))){ + #Filter mutations based on multipleMutation + if(multipleMutation==1 & !is.na(mutationInfo)){ + mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) + tableMutationCodons <- table(mutationCodons) + codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) + if(any(codonsWithMultipleMutations)){ + #remove the nucleotide mutations in the codons with multiple mutations + mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] + #replace those codons with Ns in the input sequence + paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" + #matInput[inputMatrixIndex,1] <<- c2s(paramSeq) + inputSeq <- c2s(paramSeq) + } + } + + #Filter mutations based on the model + if(any(mutationInfo)==T | is.na(any(mutationInfo))){ + + if(model==1 & !is.na(mutationInfo)){ + mutationInfo <- mutationInfo[mutationInfo=="S"] + } + if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq)) + else return(list(NA,inputSeq)) + }else{ + return(list(NA,inputSeq)) + } + }else{ + return(list(NA,inputSeq)) + } + + + }else{ + return(list(NA,inputSeq)) + } + + + }else{ + return (list(NA,inputSeq)) + } + } + + # triMutability Background Count + buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ + + #rowOrigMatInput = matInput[inputMatrixIndex,] + seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) + seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) + #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL) + tempInput <- cbind(seqInput,seqGL) + seqLength = nchar(seqGL) + list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops) + mutationCount <- list_analyzeMutationsFixed[[1]] + seqInput <- list_analyzeMutationsFixed[[2]] + BackgroundMatrix = mutabilityMatrix + MutationMatrix = mutabilityMatrix + MutationCountMatrix = mutabilityMatrix + if(!is.na(mutationCount)){ + if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ + + fivermerStartPos = 1:(seqLength-4) + fivemerLength <- length(fivermerStartPos) + fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) + fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) + + #Background + for(fivemerIndex in 1:fivemerLength){ + fivemer = fivemerGL[fivemerIndex] + if(!any(grep("N",fivemer))){ + fivemerCodonPos = fivemerCodon(fivemerIndex) + fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3]) + + # All mutations model + #if(!any(grep("N",fivemerReadingFrameCodon))){ + if(model==0){ + if(stopMutations==0){ + if(!any(grep("N",fivemerReadingFrameCodonInputSeq))) + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1) + }else{ + if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){ + positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1] + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon]) + } + } + }else{ # Only silent mutations + if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){ + positionWithinCodon = which(fivemerCodonPos==3) + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon]) + } + } + #} + } + } + + #Mutations + if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] + if(model==1) mutationCount = mutationCount[mutationCount=="S"] + mutationPositions = as.numeric(names(mutationCount)) + mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] + mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] + countMutations = 0 + for(mutationPosition in mutationPositions){ + fivemerIndex = mutationPosition-2 + fivemer = fivemerSeq[fivemerIndex] + GLfivemer = fivemerGL[fivemerIndex] + fivemerCodonPos = fivemerCodon(fivemerIndex) + fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){ + if(model==0){ + countMutations = countMutations + 1 + MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1) + MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) + }else{ + if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){ + countMutations = countMutations + 1 + positionWithinCodon = which(fivemerCodonPos==3) + glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) + inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon) + MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc]) + MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) + } + } + } + } + + seqMutability = MutationMatrix/BackgroundMatrix + seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) + #cat(inputMatrixIndex,"\t",countMutations,"\n") + return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) + + } + } + + } + + #Returns the codon position containing the middle nucleotide + fivemerCodon <- function(fivemerIndex){ + codonPos = list(2:4,1:3,3:5) + fivemerType = fivemerIndex%%3 + return(codonPos[[fivemerType+1]]) + } + + #returns probability values for one mutation in codons resulting in R, S or Stop + probMutations <- function(typeOfMutation){ + matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3))) + for(codon in rownames(matMutationProb)){ + if( !any(grep("N",codon)) ){ + for(muPos in 1:3){ + matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T) + glNuc = matCodon[1,muPos] + matCodon[,muPos] = canMutateTo(glNuc) + substitutionRate = substitution[glNuc,matCodon[,muPos]] + typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation]) + } + } + } + + return(matMutationProb) + } + + + + +#Mapping Trinucleotides to fivemers +mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){ + rownames(triMutability) <- triMutability_Names + Fivemer<-rep(NA,1024) + names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5) + Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE))) + Fivemer<-Fivemer/sum(Fivemer) + return(Fivemer) +} + +collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE)) +} + + + +CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE)) +} + +#Uses the real counts of the mutated fivemers +CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE)) +} + +bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){ +N<-sum(x) +if(N){ +p<-x/N +k<-length(x)-1 +tmp<-rmultinom(M, size = N, prob=p) +tmp_p<-apply(tmp,2,function(y)y/N) +(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k)))) +} +else return(matrix(0,2,length(x))) +} + + + + +bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){ + +N<-sum(x) +k<-length(x) +y<-rep(1:k,x) +tmp<-sapply(1:M,function(i)sample(y,n)) +if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n +if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n +(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1))))) +} + + + +p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){ +n=sum(x_obs) +N<-sum(x) +k<-length(x) +y<-rep(1:k,x) +tmp<-sapply(1:M,function(i)sample(y,n)) +if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i))) +if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i))) +tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M), +sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M)) +sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])}) +} + +#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA" +# Remove SNPs from IMGT germline segment alleles +generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){ + repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) + alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2]) + SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){ + Indices<-NULL + for(i in 1:length(x)){ + firstSeq = s2c(x[[1]]) + iSeq = s2c(x[[i]]) + Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." )) + } + return(sort(unique(Indices))) + }) + repertoireOut <- repertoireIn + repertoireOut <- lapply(names(repertoireOut), function(repertoireName){ + alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2] + geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1] + alleleSeq <- s2c(repertoireOut[[repertoireName]]) + alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N" + alleleSeq <- c2s(alleleSeq) + repertoireOut[[repertoireName]] <- alleleSeq + }) + names(repertoireOut) <- names(repertoireIn) + write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile) + +} + + + + + + +############ +groupBayes2 = function(indexes, param_resultMat){ + + BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])})) + BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])})) + #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])})) + #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])})) + #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])})) + #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])})) + return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR, + "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) ) + #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR, + #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR)) +# "BayesGDist_Global_CDR" = BayesGDist_Global_CDR, +# "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) ) + + +} + + +calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){ + G <- max(length(x),length(N),length(p)) + x=array(x,dim=G) + N=array(N,dim=G) + p=array(p,dim=G) + + indexOfZero = N>0 & p>0 + N = N[indexOfZero] + x = x[indexOfZero] + p = p[indexOfZero] + G <- length(x) + + if(G){ + + cons<-array( dim=c(length_sigma,G) ) + if(G==1) { + return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma)) + } + else { + for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma) + listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) + y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + } + }else{ + return(NA) + } +} + + +calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ + matG <- listMatG[[1]] + groups <- listMatG[[2]] + i = 1 + resConv <- matG[,i] + denom <- 2^groups[i] + if(length(groups)>1){ + while( i<length(groups) ){ + i = i + 1 + resConv <- weighted_conv(resConv, matG[,i], w= {{2^groups[i]}/denom} ,length_sigma=length_sigma) + #cat({{2^groups[i]}/denom},"\n") + denom <- denom + 2^groups[i] + } + } + return(resConv) +} + +weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ +lx<-length(x) +ly<-length(y) +if({lx<m}| {{lx*w}<m}| {{ly}<m}| {{ly*w}<m}){ +if(w<1){ +y1<-approx(1:ly,y,seq(1,ly,length.out=m))$y +x1<-approx(1:lx,x,seq(1,lx,length.out=m/w))$y +lx<-length(x1) +ly<-length(y1) +} +else { +y1<-approx(1:ly,y,seq(1,ly,length.out=m*w))$y +x1<-approx(1:lx,x,seq(1,lx,length.out=m))$y +lx<-length(x1) +ly<-length(y1) +} +} +else{ +x1<-x +y1<-approx(1:ly,y,seq(1,ly,length.out=floor(lx*w)))$y +ly<-length(y1) +} +tmp<-approx(x=1:(lx+ly-1),y=convolve(x1,rev(y1),type="open"),xout=seq(1,lx+ly-1,length.out=length_sigma))$y +tmp[tmp<=0] = 0 +return(tmp/sum(tmp)) +} + +######################## + + + + +mutabilityMatrixONE<-rep(0,4) +names(mutabilityMatrixONE)<-NUCLEOTIDES + + # triMutability Background Count + buildMutabilityModelONE <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ + + #rowOrigMatInput = matInput[inputMatrixIndex,] + seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) + seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) + matInput[inputMatrixIndex,] <<- c(seqInput,seqGL) + seqLength = nchar(seqGL) + mutationCount <- analyzeMutations(inputMatrixIndex, model, multipleMutation, seqWithStops) + BackgroundMatrix = mutabilityMatrixONE + MutationMatrix = mutabilityMatrixONE + MutationCountMatrix = mutabilityMatrixONE + if(!is.na(mutationCount)){ + if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")<length(mutationCount))) | (model==1 & (sum(mutationCount=="S")>0)) ){ + +# ONEmerStartPos = 1:(seqLength) +# ONEmerLength <- length(ONEmerStartPos) + ONEmerGL <- s2c(seqGL) + ONEmerSeq <- s2c(seqInput) + + #Background + for(ONEmerIndex in 1:seqLength){ + ONEmer = ONEmerGL[ONEmerIndex] + if(ONEmer!="N"){ + ONEmerCodonPos = getCodonPos(ONEmerIndex) + ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) + ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] ) + + # All mutations model + #if(!any(grep("N",ONEmerReadingFrameCodon))){ + if(model==0){ + if(stopMutations==0){ + if(!any(grep("N",ONEmerReadingFrameCodonInputSeq))) + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1) + }else{ + if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){ + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1] + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon]) + } + } + }else{ # Only silent mutations + if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){ + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon]) + } + } + } + } + } + + #Mutations + if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] + if(model==1) mutationCount = mutationCount[mutationCount=="S"] + mutationPositions = as.numeric(names(mutationCount)) + mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] + mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] + countMutations = 0 + for(mutationPosition in mutationPositions){ + ONEmerIndex = mutationPosition + ONEmer = ONEmerSeq[ONEmerIndex] + GLONEmer = ONEmerGL[ONEmerIndex] + ONEmerCodonPos = getCodonPos(ONEmerIndex) + ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos]) + ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos]) + if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){ + if(model==0){ + countMutations = countMutations + 1 + MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1) + MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) + }else{ + if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){ + countMutations = countMutations + 1 + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) + glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) + inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon) + MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc]) + MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) + } + } + } + } + + seqMutability = MutationMatrix/BackgroundMatrix + seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) + #cat(inputMatrixIndex,"\t",countMutations,"\n") + return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) +# tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix) + } + } + +################ +# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $ + +trim <- function(s, recode.factor=TRUE, ...) + UseMethod("trim", s) + +trim.default <- function(s, recode.factor=TRUE, ...) + s + +trim.character <- function(s, recode.factor=TRUE, ...) +{ + s <- sub(pattern="^ +", replacement="", x=s) + s <- sub(pattern=" +$", replacement="", x=s) + s +} + +trim.factor <- function(s, recode.factor=TRUE, ...) +{ + levels(s) <- trim(levels(s)) + if(recode.factor) { + dots <- list(x=s, ...) + if(is.null(dots$sort)) dots$sort <- sort + s <- do.call(what=reorder.factor, args=dots) + } + s +} + +trim.list <- function(s, recode.factor=TRUE, ...) + lapply(s, trim, recode.factor=recode.factor, ...) + +trim.data.frame <- function(s, recode.factor=TRUE, ...) +{ + s[] <- trim.list(s, recode.factor=recode.factor, ...) + s +} +####################################### +# Compute the expected for each sequence-germline pair by codon +getExpectedIndividualByCodon <- function(matInput){ +if( any(grep("multicore",search())) ){ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){ + Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") + sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) + } + ) + }) + + LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){ + Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") + sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) + } + ) + }) + + Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) + }else{ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){ + Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") + sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) + } + ) + }) + + LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){ + Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") + sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) + } + ) + }) + + Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) + } +} + +# getObservedMutationsByCodon <- function(listMutations){ +# numbSeqs <- length(listMutations) +# obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) +# obsMu_S <- obsMu_R +# temp <- mclapply(1:length(listMutations), function(i){ +# arrMutations = listMutations[[i]] +# RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) +# RPos <- sapply(RPos,getCodonNumb) +# if(any(RPos)){ +# tabR <- table(RPos) +# obsMu_R[i,as.numeric(names(tabR))] <<- tabR +# } +# +# SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) +# SPos <- sapply(SPos,getCodonNumb) +# if(any(SPos)){ +# tabS <- table(SPos) +# obsMu_S[i,names(tabS)] <<- tabS +# } +# } +# ) +# return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) +# } + +getObservedMutationsByCodon <- function(listMutations){ + numbSeqs <- length(listMutations) + obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) + obsMu_S <- obsMu_R + temp <- lapply(1:length(listMutations), function(i){ + arrMutations = listMutations[[i]] + RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) + RPos <- sapply(RPos,getCodonNumb) + if(any(RPos)){ + tabR <- table(RPos) + obsMu_R[i,as.numeric(names(tabR))] <<- tabR + } + + SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) + SPos <- sapply(SPos,getCodonNumb) + if(any(SPos)){ + tabS <- table(SPos) + obsMu_S[i,names(tabS)] <<- tabS + } + } + ) + return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/Baseline_Main.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,388 @@ +######################################################################################### +# License Agreement +# +# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE +# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER +# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE +# OR COPYRIGHT LAW IS PROHIBITED. +# +# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE +# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED +# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN +# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. +# +# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences +# Coded by: Mohamed Uduman & Gur Yaari +# Copyright 2012 Kleinstein Lab +# Version: 1.3 (01/23/2014) +######################################################################################### + +op <- options(); +options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1) +library('seqinr') +if( F & Sys.info()[1]=="Linux"){ + library("multicore") +} + +# Load functions and initialize global variables +source("Baseline_Functions.r") + +# Initialize parameters with user provided arguments + arg <- commandArgs(TRUE) + #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample") + #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample") + #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu") + testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local + species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse + substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS + mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS + clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations + fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels + region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3 + inputFilePath <- arg[8] # Full path to input file + outputPath <- arg[9] # Full path to location of output files + outputID <- arg[10] # ID for session output + + + if(testID==5){ + traitChangeModel <- 1 + if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998 + initializeTraitChange(traitChangeModel) + } + +# Initialize other parameters/variables + + # Initialzie the codon table ( definitions of R/S ) + computeCodonTable(testID) + + # Initialize + # Test Name + testName<-"Focused" + if(testID==2) testName<-"Local" + if(testID==3) testName<-"Imbalanced" + if(testID==4) testName<-"ImbalancedSilent" + + # Indel placeholders initialization + indelPos <- NULL + delPos <- NULL + insPos <- NULL + + # Initialize in Tranistion & Mutability matrixes + substitution <- initializeSubstitutionMatrix(substitutionModel,species) + mutability <- initializeMutabilityMatrix(mutabilityModel,species) + + # FWR/CDR boundaries + flagTrim <- F + if( is.na(region[7])){ + flagTrim <- T + region[7]<-region[6] + } + readStart = min(region,na.rm=T) + readEnd = max(region,na.rm=T) + if(readStart>1){ + region = region - (readStart - 1) + } + region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) ) + region_Cod = region + + readStart = (readStart*3)-2 + readEnd = (readEnd*3) + + FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])), + rep(FALSE,(region_Nuc[3]-region_Nuc[2])), + rep(TRUE,(region_Nuc[4]-region_Nuc[3])), + rep(FALSE,(region_Nuc[5]-region_Nuc[4])), + rep(TRUE,(region_Nuc[6]-region_Nuc[5])), + rep(FALSE,(region_Nuc[7]-region_Nuc[6])) + ) + CDR_Nuc <- (1-FWR_Nuc) + CDR_Nuc <- as.logical(CDR_Nuc) + FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T) + CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T) + + FWR_Codon <- c( rep(TRUE,(region[2])), + rep(FALSE,(region[3]-region[2])), + rep(TRUE,(region[4]-region[3])), + rep(FALSE,(region[5]-region[4])), + rep(TRUE,(region[6]-region[5])), + rep(FALSE,(region[7]-region[6])) + ) + CDR_Codon <- (1-FWR_Codon) + CDR_Codon <- as.logical(CDR_Codon) + + +# Read input FASTA file + tryCatch( + inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) + , error = function(ex){ + cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") + q() + } + ) + + if (length(inputFASTA)==1) { + cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") + q() + } + + # Process sequence IDs/names + names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)}) + + # Convert non nucleotide characters to N + inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)]) + inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars) + + # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence] + processedInput <- processInputAdvanced(inputFASTA) + matInput <- processedInput[[1]] + germlines <- processedInput[[2]] + lenGermlines = length(unique(germlines)) + groups <- processedInput[[3]] + lenGroups = length(unique(groups)) + rm(processedInput) + rm(inputFASTA) + +# # remove clones with less than 2 seqeunces +# tableGL <- table(germlines) +# singletons <- which(tableGL<8) +# rowsToRemove <- match(singletons,germlines) +# if(any(rowsToRemove)){ +# matInput <- matInput[-rowsToRemove,] +# germlines <- germlines[-rowsToRemove] +# groups <- groups[-rowsToRemove] +# } +# +# # remove unproductive seqs +# nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))}) +# if(any(nonFuctionalSeqs)){ +# if(sum(nonFuctionalSeqs)==length(germlines)){ +# write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } +# matInput <- matInput[-which(nonFuctionalSeqs),] +# germlines <- germlines[-which(nonFuctionalSeqs)] +# germlines[1:length(germlines)] <- 1:length(germlines) +# groups <- groups[-which(nonFuctionalSeqs)] +# } +# +# if(class(matInput)=="character"){ +# write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } +# +# if(nrow(matInput)<10 | is.null(nrow(matInput))){ +# write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } + +# replace leading & trailing "-" with "N: + matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd)) + + # Trim (nucleotide) input sequences to the last codon + #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon) + +# # Check for Indels +# if(fixIndels){ +# delPos <- fixDeletions(matInput) +# insPos <- fixInsertions(matInput) +# }else{ +# # Check for indels +# indelPos <- checkForInDels(matInput) +# indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)}) +# } + + # If indels are present, remove mutations in the seqeunce & throw warning at end + #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) }) + + colnames(matInput)=c("Input","Germline") + + # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions + germlinesOriginal = NULL + if(clonal){ + germlinesOriginal <- germlines + collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){ + collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1)) + }) + matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])})) + names_groups = tapply(groups,germlines,function(x){names(x[1])}) + groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))}) + names(groups) = names_groups + + names_germlines = tapply(germlines,germlines,function(x){names(x[1])}) + germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} ) + names(germlines) = names_germlines + matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])}) + } + + +# Selection Analysis + + +# if (length(germlines)>sequenceLimit) { +# # Code to parallelize processing goes here +# stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") ) +# } + +# if (length(germlines)<sequenceLimit) {} + + # Compute expected mutation frequencies + matExpected <- getExpectedIndividual(matInput) + + # Count observed number of mutations in the different regions + mutations <- lapply( 1:nrow(matInput), function(i){ + #cat(i,"\n") + seqI = s2c(matInput[i,1]) + seqG = s2c(matInput[i,2]) + matIGL = matrix(c(seqI,seqG),ncol=length(seqI),nrow=2,byrow=T) + retVal <- NA + tryCatch( + retVal <- analyzeMutations2NucUri(matIGL) + , error = function(ex){ + retVal <- NA + } + ) + + + return( retVal ) + }) + + matObserved <- t(sapply( mutations, processNucMutations2 )) + numberOfSeqsWithMutations <- numberOfSeqsWithMutations(matObserved, testID) + + #if(sum(numberOfSeqsWithMutations)==0){ + # write.table("No mutated sequences",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) + # q() + #} + + matMutationInfo <- cbind(matObserved,matExpected) + rm(matObserved,matExpected) + + + #Bayesian PDFs + bayes_pdf = computeBayesianScore(matMutationInfo, test=testName, max_sigma=20,length_sigma=4001) + bayesPDF_cdr = bayes_pdf[[1]] + bayesPDF_fwr = bayes_pdf[[2]] + rm(bayes_pdf) + + bayesPDF_germlines_cdr = tapply(bayesPDF_cdr,germlines,function(x) groupPosteriors(x,length_sigma=4001)) + bayesPDF_germlines_fwr = tapply(bayesPDF_fwr,germlines,function(x) groupPosteriors(x,length_sigma=4001)) + + bayesPDF_groups_cdr = tapply(bayesPDF_cdr,groups,function(x) groupPosteriors(x,length_sigma=4001)) + bayesPDF_groups_fwr = tapply(bayesPDF_fwr,groups,function(x) groupPosteriors(x,length_sigma=4001)) + + if(lenGroups>1){ + groups <- c(groups,lenGroups+1) + names(groups)[length(groups)] = "All sequences combined" + bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001) + bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001) + } + + #Bayesian Outputs + bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo)) + bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo)) + bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo)) + bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo)) + bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo)) + bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo)) + + #P-values + simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP) + simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP) + + simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP) + simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP) + + simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP) + simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP) + + + #Format output + + # Round expected mutation frequencies to 3 decimal places + matMutationInfo[germlinesOriginal[indelPos],] = NA + if(nrow(matMutationInfo)==1){ + matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3) + }else{ + matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3)) + } + + listPDFs = list() + nRows = length(unique(groups)) + length(unique(germlines)) + length(groups) + + matOutput = matrix(NA,ncol=18,nrow=nRows) + rowNumb = 1 + for(G in unique(groups)){ + #print(G) + matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G]) + listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]]) + names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1] + #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){ + gs = unique(germlines[groups==G]) + rowNumb = rowNumb+1 + if( !is.na(gs) ){ + for( g in gs ){ + matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g]) + listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]]) + names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1] + rowNumb = rowNumb+1 + indexesOfInterest = which(germlines==g) + numbSeqsOfInterest = length(indexesOfInterest) + rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1)) + matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest), + rownames(matInput)[indexesOfInterest], + c(matMutationInfo[indexesOfInterest,1:4]), + c(matMutationInfo[indexesOfInterest,5:8]), + c(bayes_cdr[indexesOfInterest,]), + c(bayes_fwr[indexesOfInterest,]), + c(simgaP_cdr[indexesOfInterest]), + c(simgaP_fwr[indexesOfInterest]) + ), ncol=18, nrow=numbSeqsOfInterest,byrow=F) + increment=0 + for( ioi in indexesOfInterest){ + listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]]) + names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi] + increment = increment + 1 + } + rowNumb=max(rowNumb)+1 + + } + } + } + colsToFormat = 11:18 + matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3) + matOutput[matOutput== " NaN"] = NA + + + + colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S", + "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S", + paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"), + paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_") + ) + fileName = paste(outputPath,outputID,".txt",sep="") + write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA) + fileName = paste(outputPath,outputID,".RData",sep="") + save(listPDFs,file=fileName) + +indelWarning = FALSE +if(sum(indelPos)>0){ + indelWarning = "<P>Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis."; + indelWarning = paste( indelWarning , "<UL>", sep="" ) + for(indels in names(indelPos)[indelPos]){ + indelWarning = paste( indelWarning , "<LI>", indels, "</LI>", sep="" ) + } + indelWarning = paste( indelWarning , "</UL></P>", sep="" ) +} + +cloneWarning = FALSE +if(clonal==1){ + if(sum(matInputErrors)>0){ + cloneWarning = "<P>Warning: The following clones have sequences of unequal length."; + cloneWarning = paste( cloneWarning , "<UL>", sep="" ) + for(clone in names(matInputErrors)[matInputErrors]){ + cloneWarning = paste( cloneWarning , "<LI>", names(germlines)[as.numeric(clone)], "</LI>", sep="" ) + } + cloneWarning = paste( cloneWarning , "</UL></P>", sep="" ) + } +} +cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,703 @@ +>IGHV1-18*01 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-18*02 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc +>IGHV1-18*03 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1-18*04 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga +>IGHV1-2*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*04 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*05 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga +>IGHV1-24*01 +caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-3*01 +caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga +>IGHV1-3*02 +caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga +>IGHV1-38-4*01 +caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca +>IGHV1-45*01 +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana +>IGHV1-45*02 +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata +>IGHV1-45*03 +.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga +>IGHV1-46*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-46*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-46*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga +>IGHV1-58*01 +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-58*02 +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-68*01 +caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata +>IGHV1-69*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*02 +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1-69*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc +>IGHV1-69*04 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*05 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1-69*06 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*07 +.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag +>IGHV1-69*08 +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*09 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*10 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*11 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*12 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*13 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*14 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69-2*01 +gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-69-2*02 +.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag +>IGHV1-69D*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-8*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-8*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-NL1*01 +caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga +>IGHV1/OR15-1*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga +>IGHV1/OR15-1*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga +>IGHV1/OR15-1*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga +>IGHV1/OR15-1*04 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga +>IGHV1/OR15-2*01 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-2*02 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-2*03 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-3*01 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-3*02 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1/OR15-3*03 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-4*01 +caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-5*01 +.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga +>IGHV1/OR15-5*02 +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga +>IGHV1/OR15-9*01 +caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga +>IGHV1/OR21-1*01 +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga +>IGHV2-10*01 +caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac +>IGHV2-26*01 +caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac +>IGHV2-5*01 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*02 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*03 +................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt +>IGHV2-5*04| +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac +>IGHV2-5*05 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*06 +cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga +>IGHV2-5*08 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*09 +caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*01 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*02 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*03 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*04 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac +>IGHV2-70*05 +..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga +>IGHV2-70*06 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*07 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*08 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*09 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg +>IGHV2-70*10 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*11 +cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*12 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*13 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac +>IGHV2-70D*04 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70D*14 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2/OR16-5*01 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag +>IGHV3-11*01 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-11*03 +caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga +>IGHV3-11*04 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-11*05 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-11*06 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-13*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*02 +gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga +>IGHV3-13*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*05 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-15*01 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*02 +gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*03 +gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*04 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*05 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*06 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*07 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*08 +gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg +>IGHV3-16*01 +gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-16*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-19*01 +acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-20*01 +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-20*02 +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-21*01 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-21*02 +gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-21*03 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga +>IGHV3-21*04 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-22*01 +gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-22*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-23*01 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*02 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*03 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*05 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa +>IGHV3-23D*01 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23D*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-25*01 +gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-25*02 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-25*03 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga +>IGHV3-25*04 +gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga +>IGHV3-25*05 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-29*01 +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*04 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*05 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga +>IGHV3-30*06 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*07 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*08 +caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga +>IGHV3-30*09 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*10 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*11 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*12 +caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*13 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*14 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*15 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*16 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*17 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*18 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30*19 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-2*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca +>IGHV3-30-22*01 +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc +>IGHV3-30-3*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-3*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-3*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-33*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg +>IGHV3-30-42*01 +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30-5*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-5*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-52*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg +>IGHV3-32*01 +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc +>AIGHV3-33*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*02 +caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-33*04 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*05 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*06 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-33-2*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca +>IGHV3-35*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa +>IGHV3-38*01| +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata +>IGHV3-38*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38-3*01 +gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga +>IGHV3-43*01 +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43*02 +gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43D*01 +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-47*01 +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga +>IGHV3-47*02 +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga +>IGHV3-48*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-48*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga +>IGHV3-48*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga +>IGHV3-48*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-49*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*05 +gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-52*01 +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg +>IGHV3-52*02 +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga +>IGHV3-52*03 +gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga +>IGHV3-53*01 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-53*02 +gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-53*03 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga +>IGHV3-53*04 +gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga +>IGHV3-54*01 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-54*02 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg +>IGHV3-54*04 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-62*01 +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga +>IGHV3-63*01 +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt +>IGHV3-63*02 +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa +>IGHV3-64*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga +>IGHV3-64*02 +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga +>IGHV3-64*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-64*04 +caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-64*05 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-64D*06 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-66*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-66*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga +>IGHV3-66*03 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-66*04 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca +>IGHV3-69-1*01 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-69-1*02 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga +>IGHV3-7*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-7*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga +>IGHV3-7*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-71*01 +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-71*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga +>IGHV3-71*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-72*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga +>IGHV3-72*02 +....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat +>IGHV3-73*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-73*02 +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-74*01 +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-74*02 +gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga +>IGHV3-74*03 +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-9*01 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-9*02 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-9*03 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata +>IGHV3-NL1*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3/OR15-7*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*02 +gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*05 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga +>IGHV3/OR16-10*01 +gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga +>IGHV3/OR16-10*02 +gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga +>IGHV3/OR16-10*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga +>IGHV3/OR16-12*01 +gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga +>IGHV3/OR16-13*01 +gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga +>IGHV3/OR16-14*01 +gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga +>IGHV3/OR16-15*01 +gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa +>IGHV3/OR16-15*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga +>IGHV3/OR16-16*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga +>IGHV3/OR16-6*02 +gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg +>IGHV3/OR16-8*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa +>IGHV3/OR16-8*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca +>IGHV3/OR16-9*01 +gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa +>IGHV4-28*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga +>IGHV4-28*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga +>IGHV4-28*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*06 +caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*07 +caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-30-2*01 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-30-2*02 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg +>IGHV4-30-2*03 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca +>IGHV4-30-2*04 +...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-30-2*05 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-2*06 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-30-4*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>XIGHV4-30-4*04 +caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg +>IGHV4-30-4*05 +..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*06 +...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-31*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*04 +caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg +>IGHV4-31*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg +>IGHV4-31*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-31*10 +caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga +>IGHV4-34*01 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*02 +caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*03 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*04 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*05 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*06 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*07 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*08 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg +>IGHV4-34*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-34*10 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-34*11 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga +>IGHV4-34*12 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga +>IGHV4-34*13 +...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-38-2*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga +>IGHV4-38-2*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-39*01 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca +>IGHV4-39*02 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga +>IGHV4-39*03 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg +>IGHV4-39*04 +..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac +>IGHV4-39*05 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-39*06 +cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-39*07 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga +>IGHV4-4*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*05 +caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*06 +............................................................ +...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-55*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-55*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-55*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg +>IGHV4-55*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg +>IGHV4-55*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-55*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-59*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-59*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-59*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg +>IGHV4-59*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-59*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-59*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg +>IGHV4-59*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga +>IGHV4-59*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca +>IGHV4-59*09 +...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg +>IGHV4-59*10 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-61*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-61*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg +>IGHV4-61*05 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga +>IGHV4-61*06 +...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-61*07 +...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca +>IGHV4-61*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV5-10-1*01 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-10-1*02 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca +>IGHV5-10-1*03 +gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-10-1*04 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*01 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*02 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*03 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*04 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*05 +.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg +>IGHV5-78*01 +gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga +>IGHV6-1*01 +caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV6-1*02 +caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV7-34-1*01 +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-34-1*02 +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-4-1*01 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga +>IGHV7-4-1*02 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-4-1*03 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg +>IGHV7-4-1*04 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-4-1*05 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga +>AIGHV7-40*03| +ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga +>IGHV7-81*01 +caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/IMGTVHreferencedataset20161215.fa Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,1 @@ +>IGHV1-18*01 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-18*02 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc >IGHV1-18*03 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1-18*04 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-2*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*05 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-24*01 caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-3*01 caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga >IGHV1-3*02 caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga >IGHV1-38-4*01 caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca >IGHV1-45*01 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana >IGHV1-45*02 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata >IGHV1-45*03 .....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga >IGHV1-46*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga >IGHV1-58*01 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-58*02 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-68*01 caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata >IGHV1-69*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*02 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc >IGHV1-69*04 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*05 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*06 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*07 .....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag >IGHV1-69*08 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*09 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*10 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*11 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*12 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*13 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*14 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69-2*01 gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-69-2*02 .....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag >IGHV1-69D*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-8*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-8*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-NL1*01 caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga >IGHV1/OR15-1*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga >IGHV1/OR15-1*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-1*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga >IGHV1/OR15-1*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-2*01 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*02 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*03 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-3*01 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-3*02 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1/OR15-3*03 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-4*01 caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-5*01 .....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-5*02 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-9*01 caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV1/OR21-1*01 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV2-10*01 caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac >IGHV2-26*01 caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac >IGHV2-5*01 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*02 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*03 ................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt >IGHV2-5*04 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac >IGHV2-5*05 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*06 cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga >IGHV2-5*08 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*09 caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*01 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*02 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*03 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac >IGHV2-70*05 ..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga >IGHV2-70*06 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*07 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*08 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*09 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg >IGHV2-70*10 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*11 cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*12 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*13 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac >IGHV2-70D*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70D*14 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2/OR16-5*01 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag >IGHV3-11*01 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*03 caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga >IGHV3-11*04 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-11*05 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*06 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-13*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*02 gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga >IGHV3-13*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*05 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-15*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*02 gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*03 gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*04 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*06 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*07 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*08 gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3-16*01 gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-16*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-19*01 acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-20*01 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-20*02 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-21*01 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*02 gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*03 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga >IGHV3-21*04 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-22*01 gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-22*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-23*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*02 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*03 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*05 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa >IGHV3-23D*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-25*01 gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*02 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*03 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga >IGHV3-25*04 gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga >IGHV3-25*05 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-29*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga >IGHV3-30*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*07 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*08 caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-30*09 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*10 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*11 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*12 caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*13 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*14 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*15 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*16 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*17 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*18 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*19 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca >IGHV3-30-22*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc >IGHV3-30-3*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-3*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-3*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-33*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg >IGHV3-30-42*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30-5*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-5*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-52*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg >IGHV3-32*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc >IGHV3-33*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*02 caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca >IGHV3-35*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa >IGHV3-38*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata >IGHV3-38*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38-3*01 gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga >IGHV3-43*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43*02 gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43D*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata >IGHV3-47*01 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga >IGHV3-47*02 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga >IGHV3-48*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-48*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-49*01 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*02 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*03 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*04 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-52*01 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg >IGHV3-52*02 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-52*03 gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-53*01 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*02 gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga >IGHV3-53*04 gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga >IGHV3-54*01 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-54*02 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg >IGHV3-54*04 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-62*01 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga >IGHV3-63*01 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt >IGHV3-63*02 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa >IGHV3-64*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*02 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64*04 caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-64*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64D*06 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-66*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-66*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*04 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca >IGHV3-69-1*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-69-1*02 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-7*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga >IGHV3-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*01 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga >IGHV3-71*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-72*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga >IGHV3-72*02 ....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat >IGHV3-73*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-73*02 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-74*01 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-74*02 gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga >IGHV3-74*03 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-9*01 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*02 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*03 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata >IGHV3-NL1*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3/OR15-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*02 gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga >IGHV3/OR16-10*01 gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*02 gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga >IGHV3/OR16-12*01 gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga >IGHV3/OR16-13*01 gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-14*01 gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-15*01 gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa >IGHV3/OR16-15*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga >IGHV3/OR16-16*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga >IGHV3/OR16-6*02 gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3/OR16-8*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa >IGHV3/OR16-8*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca >IGHV3/OR16-9*01 gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa >IGHV4-28*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga >IGHV4-28*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga >IGHV4-28*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*06 caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa >IGHV4-28*07 caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-30-2*01 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-2*02 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-30-2*03 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca >IGHV4-30-2*04 ...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-30-2*05 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-2*06 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-30-4*04 caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg >IGHV4-30-4*05 ..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*06 ...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-31*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*04 caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-31*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg >IGHV4-31*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-31*10 caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga >IGHV4-34*01 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*02 caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*03 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*04 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*05 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*06 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*07 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*08 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg >IGHV4-34*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-34*10 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-34*11 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-34*12 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga >IGHV4-34*13 ...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-38-2*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga >IGHV4-38-2*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-39*01 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca >IGHV4-39*02 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga >IGHV4-39*03 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-39*04 ..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac >IGHV4-39*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-39*06 cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-39*07 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*05 caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*06 ...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-55*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg >IGHV4-55*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-55*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-55*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-59*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-59*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-59*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga >IGHV4-59*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca >IGHV4-59*09 ...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg >IGHV4-59*10 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-61*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-61*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg >IGHV4-61*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga >IGHV4-61*06 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-61*07 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca >IGHV4-61*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV5-10-1*01 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*02 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca >IGHV5-10-1*03 gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*04 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*01 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*02 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*03 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*04 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*05 .....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg >IGHV5-78*01 gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga >IGHV6-1*01 caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV6-1*02 caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV7-34-1*01 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-34-1*02 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-4-1*01 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga >IGHV7-4-1*02 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*03 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg >IGHV7-4-1*04 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*05 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga >IGHV7-40*03 ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga >IGHV7-81*01 caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/IMGTVHreferencedataset20161215.fasta Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,1 @@ +>IGHV1-18*01 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-18*02 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc >IGHV1-18*03 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1-18*04 caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-2*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga >IGHV1-2*05 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga >IGHV1-24*01 caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-3*01 caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga >IGHV1-3*02 caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga >IGHV1-38-4*01 caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca >IGHV1-45*01 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana >IGHV1-45*02 cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata >IGHV1-45*03 .....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga >IGHV1-46*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-46*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga >IGHV1-58*01 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-58*02 caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga >IGHV1-68*01 caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata >IGHV1-69*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*02 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc >IGHV1-69*04 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*05 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1-69*06 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*07 .....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag >IGHV1-69*08 caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*09 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*10 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*11 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*12 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*13 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69*14 caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-69-2*01 gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga >IGHV1-69-2*02 .....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag >IGHV1-69D*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1-8*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-8*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg >IGHV1-NL1*01 caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga >IGHV1/OR15-1*01 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga >IGHV1/OR15-1*02 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-1*03 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga >IGHV1/OR15-1*04 caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga >IGHV1/OR15-2*01 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*02 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-2*03 caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga >IGHV1/OR15-3*01 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-3*02 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga >IGHV1/OR15-3*03 caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-4*01 caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga >IGHV1/OR15-5*01 .....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-5*02 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga >IGHV1/OR15-9*01 caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV1/OR21-1*01 caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga >IGHV2-10*01 caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac >IGHV2-26*01 caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac >IGHV2-5*01 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*02 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*03 ................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt >IGHV2-5*04 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac >IGHV2-5*05 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*06 cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga >IGHV2-5*08 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-5*09 caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*01 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*02 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*03 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac >IGHV2-70*05 ..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga >IGHV2-70*06 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*07 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*08 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg >IGHV2-70*09 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg >IGHV2-70*10 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*11 cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70*12 cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac >IGHV2-70*13 caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac >IGHV2-70D*04 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2-70D*14 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac >IGHV2/OR16-5*01 caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag >IGHV3-11*01 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*03 caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga >IGHV3-11*04 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-11*05 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-11*06 caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-13*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*02 gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga >IGHV3-13*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-13*05 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga >IGHV3-15*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*02 gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*03 gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*04 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*06 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*07 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga >IGHV3-15*08 gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3-16*01 gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-16*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-19*01 acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa >IGHV3-20*01 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-20*02 gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga >IGHV3-21*01 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*02 gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-21*03 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga >IGHV3-21*04 gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-22*01 gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-22*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga >IGHV3-23*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*02 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*03 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-23*05 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa >IGHV3-23D*01 gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga >IGHV3-25*01 gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*02 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-25*03 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga >IGHV3-25*04 gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga >IGHV3-25*05 gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga >IGHV3-29*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga >IGHV3-30*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*07 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*08 caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-30*09 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*10 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*11 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*12 caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*13 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*14 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*15 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*16 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*17 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30*18 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30*19 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca >IGHV3-30-22*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc >IGHV3-30-3*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-3*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-3*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-30-33*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg >IGHV3-30-42*01 gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt >IGHV3-30-5*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-5*02 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3-30-52*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg >IGHV3-32*01 gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc >IGHV3-33*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*02 caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*03 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33*04 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*05 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-33*06 caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga >IGHV3-33-2*01 gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca >IGHV3-35*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa >IGHV3-38*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata >IGHV3-38*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata >IGHV3-38-3*01 gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga >IGHV3-43*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43*02 gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata >IGHV3-43D*01 gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata >IGHV3-47*01 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga >IGHV3-47*02 gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga >IGHV3-48*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga >IGHV3-48*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-48*04 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-49*01 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*02 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*03 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*04 gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-49*05 gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga >IGHV3-52*01 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg >IGHV3-52*02 gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-52*03 gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga >IGHV3-53*01 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*02 gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-53*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga >IGHV3-53*04 gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga >IGHV3-54*01 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-54*02 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg >IGHV3-54*04 gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt >IGHV3-62*01 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga >IGHV3-63*01 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt >IGHV3-63*02 gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa >IGHV3-64*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*02 gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga >IGHV3-64*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64*04 caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-64*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-64D*06 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga >IGHV3-66*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga >IGHV3-66*03 gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga >IGHV3-66*04 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca >IGHV3-69-1*01 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-69-1*02 gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga >IGHV3-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-7*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga >IGHV3-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*01 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga >IGHV3-71*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga >IGHV3-71*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga >IGHV3-72*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga >IGHV3-72*02 ....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat >IGHV3-73*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-73*02 gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca >IGHV3-74*01 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-74*02 gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga >IGHV3-74*03 gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga >IGHV3-9*01 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*02 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata >IGHV3-9*03 gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata >IGHV3-NL1*01 caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga >IGHV3/OR15-7*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*02 gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*03 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga >IGHV3/OR15-7*05 gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga >IGHV3/OR16-10*01 gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*02 gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga >IGHV3/OR16-10*03 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga >IGHV3/OR16-12*01 gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga >IGHV3/OR16-13*01 gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-14*01 gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga >IGHV3/OR16-15*01 gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa >IGHV3/OR16-15*02 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga >IGHV3/OR16-16*01 gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga >IGHV3/OR16-6*02 gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg >IGHV3/OR16-8*01 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa >IGHV3/OR16-8*02 gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca >IGHV3/OR16-9*01 gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa >IGHV4-28*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga >IGHV4-28*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga >IGHV4-28*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-28*06 caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa >IGHV4-28*07 caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-30-2*01 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-2*02 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-30-2*03 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca >IGHV4-30-2*04 ...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-30-2*05 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-2*06 cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-30-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-30-4*04 caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg >IGHV4-30-4*05 ..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*06 ...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga >IGHV4-30-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-31*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-31*04 caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-31*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg >IGHV4-31*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg >IGHV4-31*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-31*10 caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga >IGHV4-34*01 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*02 caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*03 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*04 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*05 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-34*06 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*07 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-34*08 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg >IGHV4-34*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-34*10 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-34*11 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-34*12 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga >IGHV4-34*13 ...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg >IGHV4-38-2*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga >IGHV4-38-2*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-39*01 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca >IGHV4-39*02 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga >IGHV4-39*03 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-39*04 ..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac >IGHV4-39*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-39*06 cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-39*07 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga >IGHV4-4*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*05 caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-4*06 ...........................................................................tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-4*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-55*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-55*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg >IGHV4-55*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg >IGHV4-55*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg >IGHV4-55*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4-55*09 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa >IGHV4-59*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-59*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg >IGHV4-59*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*05 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg >IGHV4-59*06 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg >IGHV4-59*07 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga >IGHV4-59*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca >IGHV4-59*09 ...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg >IGHV4-59*10 caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata >IGHV4-61*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga >IGHV4-61*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4-61*04 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg >IGHV4-61*05 cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga >IGHV4-61*06 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga >IGHV4-61*07 ...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca >IGHV4-61*08 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*01 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*02 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV4/OR15-8*03 caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga >IGHV5-10-1*01 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*02 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca >IGHV5-10-1*03 gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-10-1*04 gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*01 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*02 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca >IGHV5-51*03 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*04 gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga >IGHV5-51*05 .....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg >IGHV5-78*01 gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga >IGHV6-1*01 caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV6-1*02 caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga >IGHV7-34-1*01 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-34-1*02 ...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta >IGHV7-4-1*01 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga >IGHV7-4-1*02 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*03 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg >IGHV7-4-1*04 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga >IGHV7-4-1*05 caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga >IGHV7-40*03 ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga >IGHV7-81*01 caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/baseline_url.txt Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,1 @@ +http://selection.med.yale.edu/baseline/ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/comparePDFs.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,225 @@ +options("warn"=-1) + +#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r +# Compute p-value of two distributions +compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ +#print(c(length(dens1),length(dens2))) +if(length(dens1)>1 & length(dens2)>1 ){ + dens1<-dens1/sum(dens1) + dens2<-dens2/sum(dens2) + cum2 <- cumsum(dens2)-dens2/2 + tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) + #print(tmp) + if(tmp>0.5)tmp<-tmp-1 + return( tmp ) + } + else { + return(NA) + } + #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) +} + + +require("grid") +arg <- commandArgs(TRUE) +#arg <- c("300143","4","5") +arg[!arg=="clonal"] +input <- arg[1] +output <- arg[2] +rowIDs <- as.numeric( sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } ) ) + +numbSeqs = length(rowIDs) + +if ( is.na(rowIDs[1]) | numbSeqs>10 ) { + stop( paste("Error: Please select between one and 10 seqeunces to compare.") ) +} + +#load( paste("output/",sessionID,".RData",sep="") ) +load( input ) +#input + +xMarks = seq(-20,20,length.out=4001) + +plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){ + yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1 + + if(length(xlim==2)){ + xMin=xlim[1] + xMax=xlim[2] + } else { + xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] + xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] + xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] + xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] + + xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1] + xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1] + xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])] + xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])] + + xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE) + xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE) + } + + sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x + grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex)) + x <- sigma + pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc")) + #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex)) + pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05))) + grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc") + grid.rect() + grid.xaxis(gp = gpar(cex=cex/1.1)) + yticks = pretty(c(-yMax,yMax),8) + yticks = yticks[yticks>(-yMax) & yticks<(yMax)] + grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1)) + if(length(listPDFs[pdf1][[1]][["CDR"]])>1){ + ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2)) + } + if(length(listPDFs[pdf1][[1]][["FWR"]])>1){ + yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2)) + } + + if(length(listPDFs[pdf2][[1]][["CDR"]])>1){ + ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2)) + } + if(length(listPDFs[pdf2][[1]][["FWR"]])>1){ + yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2)) + } + + grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1)) + grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3)) + + grid.text("All", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex)) + grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex)) + + if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){ + pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]]) + pval = formatC(as.numeric(pCDRFWR),digits=3) + grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2)) + } + grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5)) + grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5)) + popViewport(2) +} +#plot_grid_s(1) + + +p2col<-function(p=0.01){ + breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51) + i<-findInterval(p,breaks) + cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0), + rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) ) + return(cols[i]) +} + + +plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){ + if(upper){ + pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]]) + pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]]) + pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]]) + pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]]) + grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc") + grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc") + grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc") + grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc") + + grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925))) + grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925))) + + grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + + + # grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex)) + # grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex)) + } + else{ + } +} + + +################################################################################## +################## The whole OCD's matrix ######################################## +################################################################################## + +#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) +pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) + +pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null"))))) + +for( seqOne in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1)) + if(seqOne>2){ + grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") + grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc") + + grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center") + grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center") + } + grid.rect(gp = gpar(col=grey(0.9))) + grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center") + popViewport(1) +} + +for( seqOne in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1)) + if(seqOne<=numbSeqs){ + grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") + grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc") + grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270) + grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270) + } + grid.rect(gp = gpar(col=grey(0.9))) + grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center") + popViewport(1) +} + +for( seqOne in 1:numbSeqs+1){ + for(seqTwo in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne)) + if(seqTwo>seqOne){ + plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2) + grid.rect() + } + popViewport(1) + } +} + + +xMin=0 +xMax=0.01 +for(pdf1 in rowIDs){ + xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] + xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] + xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] + xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] + xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE) + xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE) +} + + + +for(i in 1:numbSeqs+1){ + for(j in (i-1):numbSeqs){ + pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1)) + grid.rect() + plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1) + popViewport(1) + } +} + +dev.off() + +cat("Success", paste(rowIDs,collapse="_"),sep=":") +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/filter.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,55 @@ +arg = commandArgs(TRUE) +summaryfile = arg[1] +gappedfile = arg[2] +selection = arg[3] +output = arg[4] +print(paste("selection = ", selection)) + + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "") +gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote = "") + +fix_column_names = function(df){ + if("V.DOMAIN.Functionality" %in% names(df)){ + names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" + print("found V.DOMAIN.Functionality, changed") + } + if("V.DOMAIN.Functionality.comment" %in% names(df)){ + names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" + print("found V.DOMAIN.Functionality.comment, changed") + } + return(df) +} + +gappeddat = fix_column_names(gappeddat) + +#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) + +dat = cbind(gappeddat, summarydat$AA.JUNCTION) + +colnames(dat)[length(dat)] = "AA.JUNCTION" + +dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) +dat$VGene = gsub("[*].*", "", dat$VGene) + +dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) +dat$DGene = gsub("[*].*", "", dat$DGene) + +dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) +dat$JGene = gsub("[*].*", "", dat$JGene) + +print(str(dat)) + +dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) + +dat = dat[!duplicated(dat$past), ] + +print(paste("Sequences remaining after duplicate filter:", nrow(dat))) + +dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] + +print(paste("Sequences remaining after functionality filter:", nrow(dat))) + +print(paste("Sequences remaining:", nrow(dat))) + +write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/script_imgt.py Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,86 @@ +#import xlrd #avoid dep +import argparse +import re + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") +parser.add_argument("--ref", help="Reference file") +parser.add_argument("--output", help="Output file") +parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") + +args = parser.parse_args() + +print "script_imgt.py" +print "input:", args.input +print "ref:", args.ref +print "output:", args.output +print "id:", args.id + +refdic = dict() +with open(args.ref, 'rU') as ref: + currentSeq = "" + currentId = "" + for line in ref: + if line.startswith(">"): + if currentSeq is not "" and currentId is not "": + refdic[currentId[1:]] = currentSeq + currentId = line.rstrip() + currentSeq = "" + else: + currentSeq += line.rstrip() + refdic[currentId[1:]] = currentSeq + +print "Have", str(len(refdic)), "reference sequences" + +vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, +# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", +# r"(IGKV[0-3]D?-[0-9]{1,2})", +# r"(IGLV[0-9]-[0-9]{1,2})", +# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)", +# r"(TRGV[234589])", +# r"(TRDV[1-3])"] + +#vPattern = re.compile(r"|".join(vPattern)) +vPattern = re.compile("|".join(vPattern)) + +def filterGene(s, pattern): + if type(s) is not str: + return None + res = pattern.search(s) + if res: + return res.group(0) + return None + + + +currentSeq = "" +currentId = "" +first=True +with open(args.input, 'r') as i: + with open(args.output, 'a') as o: + o.write(">>>" + args.id + "\n") + outputdic = dict() + for line in i: + if first: + first = False + continue + linesplt = line.split("\t") + ref = filterGene(linesplt[1], vPattern) + if not ref or not linesplt[2].rstrip(): + continue + if ref in outputdic: + outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] + else: + outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] + #print outputdic + + for k in outputdic.keys(): + if k in refdic: + o.write(">>" + k + "\n") + o.write(refdic[k] + "\n") + for seq in outputdic[k]: + #print seq + o.write(">" + seq[0] + "\n") + o.write(seq[1] + "\n") + else: + print k + " not in reference, skipping " + k
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/script_xlsx.py Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,58 @@ +import xlrd +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") +parser.add_argument("--ref", help="Reference file") +parser.add_argument("--output", help="Output file") + +args = parser.parse_args() + +gene_column = 6 +id_column = 7 +seq_column = 8 +LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"] + + +refdic = dict() +with open(args.ref, 'r') as ref: + currentSeq = "" + currentId = "" + for line in ref.readlines(): + if line[0] is ">": + if currentSeq is not "" and currentId is not "": + refdic[currentId[1:]] = currentSeq + currentId = line.rstrip() + currentSeq = "" + else: + currentSeq += line.rstrip() + refdic[currentId[1:]] = currentSeq + +currentSeq = "" +currentId = "" +with xlrd.open_workbook(args.input, 'r') as wb: + with open(args.output, 'a') as o: + for sheet in wb.sheets(): + if sheet.cell(1,gene_column).value.find("IGHV") < 0: + print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name + continue + o.write(">>>" + sheet.name + "\n") + outputdic = dict() + for rowindex in range(1, sheet.nrows): + ref = sheet.cell(rowindex, gene_column).value.replace(">", "") + if ref in outputdic: + outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] + else: + outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] + #print outputdic + + for k in outputdic.keys(): + if k in refdic: + o.write(">>" + k + "\n") + o.write(refdic[k] + "\n") + for seq in outputdic[k]: + #print seq + o.write(">" + seq[0] + "\n") + o.write(seq[1] + "\n") + else: + print k + " not in reference, skipping " + k
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/baseline/wrapper.sh Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,92 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +testID=$1 +species=$2 +substitutionModel=$3 +mutabilityModel=$4 +clonal=$5 +fixIndels=$6 +region=$7 +inputs=$8 +inputs=($inputs) +IDs=$9 +IDs=($IDs) +ref=${10} +output=${11} +selection=${12} +output_table=${13} +outID="result" + +echo "$PWD" + +echo "testID = $testID" +echo "species = $species" +echo "substitutionModel = $substitutionModel" +echo "mutabilityModel = $mutabilityModel" +echo "clonal = $clonal" +echo "fixIndels = $fixIndels" +echo "region = $region" +echo "inputs = ${inputs[@]}" +echo "IDs = ${IDs[@]}" +echo "ref = $ref" +echo "output = $output" +echo "outID = $outID" + +fasta="$PWD/baseline.fasta" + + +count=0 +for current in ${inputs[@]} +do + f=$(file $current) + zipType="Zip archive" + if [[ "$f" == *"Zip archive"* ]] || [[ "$f" == *"XZ compressed data"* ]] + then + id=${IDs[$count]} + echo "id=$id" + if [[ "$f" == *"Zip archive"* ]] ; then + echo "Zip archive" + echo "unzip $input -d $PWD/files/" + unzip $current -d "$PWD/$id/" + elif [[ "$f" == *"XZ compressed data"* ]] ; then + echo "ZX archive" + echo "tar -xJf $input -C $PWD/files/" + mkdir -p "$PWD/$id/files" + tar -xJf $current -C "$PWD/$id/files/" + fi + filtered="$PWD/filtered_${id}.txt" + imgt_1_file="`find $PWD/$id -name '1_*.txt'`" + imgt_2_file="`find $PWD/$id -name '2_*.txt'`" + echo "1_Summary file: ${imgt_1_file}" + echo "2_IMGT-gapped file: ${imgt_2_file}" + echo "filter.r for $id" + Rscript $dir/filter.r ${imgt_1_file} ${imgt_2_file} "$selection" $filtered 2>&1 + + final="$PWD/final_${id}.txt" + cat $filtered | cut -f2,4,7 > $final + python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id + else + python $dir/script_xlsx.py --input $current --ref $ref --output $fasta + fi + count=$((count+1)) +done +workdir="$PWD" +cd $dir +echo "file: ${inputs[0]}" +#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1 +Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1 + +echo "$workdir/${outID}.txt" + +rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` +rows=($rows) +#unset rows[${#rows[@]}-1] + +cd $dir +Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1 +cp $workdir/result.txt ${output_table} + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/change_o/change_o_url.txt Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,1 @@ +https://changeo.readthedocs.io/en/version-0.4.4/ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/change_o/define_clones.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,15 @@ +args <- commandArgs(trailingOnly = TRUE) + +input=args[1] +output=args[2] + +change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F) + +freq = data.frame(table(change.o$CLONE)) +freq2 = data.frame(table(freq$Freq)) + +freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1)) + +names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences") + +write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/change_o/define_clones.sh Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,39 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +#define_clones.sh $input $noparse $scores $regions $out_file + +type=$1 +input=$2 + +mkdir -p $PWD/outdir + +cp $input $PWD/input.tab #file has to have a ".tab" extension + +if [ "bygroup" == "$type" ] ; then + mode=$3 + act=$4 + model=$5 + norm=$6 + sym=$7 + link=$8 + dist=$9 + output=${10} + output2=${11} + + DefineClones.py -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + + Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 +else + method=$3 + output=$4 + output2=$5 + + DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + + Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 +fi + +cp $PWD/outdir/output_clone-pass.tab $output + +rm -rf $PWD/outdir/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/change_o/makedb.sh Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,36 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +input=$1 +noparse=$2 +scores=$3 +regions=$4 +output=$5 + +if [ "true" == "$noparse" ] ; then + noparse="--noparse" +else + noparse="" +fi + +if [ "true" == "$scores" ] ; then + scores="--scores" +else + scores="" +fi + +if [ "true" == "$regions" ] ; then + regions="--regions" +else + regions="" +fi + +mkdir $PWD/outdir + +echo "makedb: $PWD/outdir" + +MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions + +mv $PWD/outdir/output_db-pass.tab $output + +rm -rf $PWD/outdir/
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/change_o/select_first_in_clone.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,16 @@ +args <- commandArgs(trailingOnly = TRUE) + +input.file = args[1] +output.file = args[2] + +print("select_in_first_clone.r") +print(input.file) +print(output.file) + +input = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") + +input = input[!duplicated(input$CLONE),] + +names(input)[1] = "Sequence.ID" + +write.table(input, output.file, quote=F, sep="\t", row.names=F, col.names=T, na="")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/check_unique_id.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,25 @@ +args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the + +current_file = args[1] + +current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) + +if(!("Sequence number" %in% names(current))){ + stop("First argument doesn't contain the 'Sequence number' column") +} + +tbl = table(current[,"Sequence ID"]) +l_tbl = length(tbl) +check = any(tbl > 1) + +#if(l_tbl != nrow(current)){ # non unique IDs? +if(check){ + print("Sequence.ID is not unique for every sequence, adding sequence number to IDs") + for(i in 1:length(args)){ + current_file = args[i] + print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file)) + current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) + current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_") + write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T) + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/datatypes_conf.xml Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<datatypes> + <registration> + <datatype extension="imgt_archive" type="galaxy.datatypes.binary:CompressedArchive" display_in_upload="True" subclass="True"/> + </registration> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/gene_identification.py Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,226 @@ +import re +import argparse +import time +starttime= int(time.time() * 1000) + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file") +parser.add_argument("--output", help="The annotated output file to be merged back with the summary file") + +args = parser.parse_args() + +infile = args.input +#infile = "test_VH-Ca_Cg_25nt/1_Summary_test_VH-Ca_Cg_25nt_241013.txt" +output = args.output +#outfile = "identified.txt" + +dic = dict() +total = 0 + + +first = True +IDIndex = 0 +seqIndex = 0 + +with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence + for line in f: + total += 1 + linesplt = line.split("\t") + if first: + print "linesplt", linesplt + IDIndex = linesplt.index("Sequence ID") + seqIndex = linesplt.index("Sequence") + first = False + continue + + ID = linesplt[IDIndex] + if len(linesplt) < 28: #weird rows without a sequence + dic[ID] = "" + else: + dic[ID] = linesplt[seqIndex] + +print "Number of input sequences:", len(dic) + +#old cm sequence: gggagtgcatccgccccaacccttttccccctcgtctcctgtgagaattccc +#old cg sequence: ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggccctgggctgcctggtcaaggactacttccccgaaccggtgacggtgtcgtggaactcaggcgccctgaccag + +#lambda/kappa reference sequence +searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg", + "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc", + "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc", + "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence + +compiledregex = {"ca": [], + "cg": [], + "ce": [], + "cm": []} + +#lambda/kappa reference sequence variable nucleotides +ca1 = {38: 't', 39: 'g', 48: 'a', 49: 'g', 51: 'c', 68: 'a', 73: 'c'} +ca2 = {38: 'g', 39: 'a', 48: 'c', 49: 'c', 51: 'a', 68: 'g', 73: 'a'} +cg1 = {0: 'c', 33: 'a', 38: 'c', 44: 'a', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'} +cg2 = {0: 'c', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'g', 132: 't'} +cg3 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'} +cg4 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'c', 132: 'c'} + +#remove last snp for shorter cg sequence --- note, also change varsInCG +del cg1[132] +del cg2[132] +del cg3[132] +del cg4[132] + +#reference sequences are cut into smaller parts of 'chunklength' length, and with 'chunklength' / 2 overlap +chunklength = 8 + +#create the chunks of the reference sequence with regular expressions for the variable nucleotides +for i in range(0, len(searchstrings["ca"]) - chunklength, chunklength / 2): + pos = i + chunk = searchstrings["ca"][i:i+chunklength] + result = "" + varsInResult = 0 + for c in chunk: + if pos in ca1.keys(): + varsInResult += 1 + result += "[" + ca1[pos] + ca2[pos] + "]" + else: + result += c + pos += 1 + compiledregex["ca"].append((re.compile(result), varsInResult)) + +for i in range(0, len(searchstrings["cg"]) - chunklength, chunklength / 2): + pos = i + chunk = searchstrings["cg"][i:i+chunklength] + result = "" + varsInResult = 0 + for c in chunk: + if pos in cg1.keys(): + varsInResult += 1 + result += "[" + "".join(set([cg1[pos], cg2[pos], cg3[pos], cg4[pos]])) + "]" + else: + result += c + pos += 1 + compiledregex["cg"].append((re.compile(result), varsInResult)) + +for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2): + compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False)) + +for i in range(0, len(searchstrings["ce"]) - chunklength + 1, chunklength / 2): + compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False)) + +def removeAndReturnMaxIndex(x): #simplifies a list comprehension + m = max(x) + index = x.index(m) + x[index] = 0 + return index + + +start_location = dict() +hits = dict() +alltotal = 0 +for key in compiledregex.keys(): #for ca/cg/cm/ce + regularexpressions = compiledregex[key] #get the compiled regular expressions + for ID in dic.keys()[0:]: #for every ID + if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists + hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} + currentIDHits = hits[ID] + seq = dic[ID] + lastindex = 0 + start_zero = len(searchstrings[key]) #allows the reference sequence to start before search sequence (start_locations of < 0) + start = [0] * (len(seq) + start_zero) + for i, regexp in enumerate(regularexpressions): #for every regular expression + relativeStartLocation = lastindex - (chunklength / 2) * i + if relativeStartLocation >= len(seq): + break + regex, hasVar = regexp + matches = regex.finditer(seq[lastindex:]) + for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop + lastindex += match.start() + start[relativeStartLocation + start_zero] += 1 + if hasVar: #if the regex has a variable nt in it + chunkstart = chunklength / 2 * i #where in the reference does this chunk start + chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end + if key == "ca": #just calculate the variable nt score for 'ca', cheaper + currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["ca2"] += len([1 for x in ca2 if chunkstart <= x < chunkend and ca2[x] == seq[lastindex + x - chunkstart]]) + elif key == "cg": #just calculate the variable nt score for 'cg', cheaper + currentIDHits["cg1"] += len([1 for x in cg1 if chunkstart <= x < chunkend and cg1[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]]) + else: #key == "cm" #no variable regions in 'cm' or 'ce' + pass + break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped + else: #only runs if there were no hits + continue + #print "found ", regex.pattern , "at", lastindex, "adding one to", (lastindex - chunklength / 2 * i), "to the start array of", ID, "gene", key, "it's now:", start[lastindex - chunklength / 2 * i] + currentIDHits[key + "_hits"] += 1 + start_location[ID + "_" + key] = str([(removeAndReturnMaxIndex(start) + 1 - start_zero) for x in range(5) if len(start) > 0 and max(start) > 1]) + #start_location[ID + "_" + key] = str(start.index(max(start))) + + +varsInCA = float(len(ca1.keys()) * 2) +varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice +varsInCM = 0 +varsInCE = 0 + +def round_int(val): + return int(round(val)) + +first = True +seq_write_count=0 +with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence + with open(output, 'w') as o: + for line in f: + total += 1 + if first: + o.write("Sequence ID\tbest_match\tnt_hit_percentage\tchunk_hit_percentage\tstart_locations\n") + first = False + continue + linesplt = line.split("\t") + if linesplt[2] == "No results": + pass + ID = linesplt[1] + currentIDHits = hits[ID] + possibleca = float(len(compiledregex["ca"])) + possiblecg = float(len(compiledregex["cg"])) + possiblecm = float(len(compiledregex["cm"])) + possiblece = float(len(compiledregex["ce"])) + cahits = currentIDHits["ca_hits"] + cghits = currentIDHits["cg_hits"] + cmhits = currentIDHits["cm_hits"] + cehits = currentIDHits["ce_hits"] + if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene + ca1hits = currentIDHits["ca1"] + ca2hits = currentIDHits["ca2"] + if ca1hits >= ca2hits: + o.write(ID + "\tIGA1\t" + str(round_int(ca1hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") + else: + o.write(ID + "\tIGA2\t" + str(round_int(ca2hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") + elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene + cg1hits = currentIDHits["cg1"] + cg2hits = currentIDHits["cg2"] + cg3hits = currentIDHits["cg3"] + cg4hits = currentIDHits["cg4"] + if cg1hits >= cg2hits and cg1hits >= cg3hits and cg1hits >= cg4hits: #cg1 gene + o.write(ID + "\tIGG1\t" + str(round_int(cg1hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + elif cg2hits >= cg1hits and cg2hits >= cg3hits and cg2hits >= cg4hits: #cg2 gene + o.write(ID + "\tIGG2\t" + str(round_int(cg2hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + elif cg3hits >= cg1hits and cg3hits >= cg2hits and cg3hits >= cg4hits: #cg3 gene + o.write(ID + "\tIGG3\t" + str(round_int(cg3hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + else: #cg4 gene + o.write(ID + "\tIGG4\t" + str(round_int(cg4hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + else: #its a cm or ce gene + if cmhits >= cehits: + o.write(ID + "\tIGM\t100\t" + str(round_int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n") + else: + o.write(ID + "\tIGE\t100\t" + str(round_int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n") + seq_write_count += 1 + +print "Time: %i" % (int(time.time() * 1000) - starttime) + +print "Number of sequences written to file:", seq_write_count + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/imgt_loader.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,98 @@ +args <- commandArgs(trailingOnly = TRUE) + +summ.file = args[1] +aa.file = args[2] +junction.file = args[3] +out.file = args[4] + +summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T) +aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T) +junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T) + +fix_column_names = function(df){ + if("V.DOMAIN.Functionality" %in% names(df)){ + names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" + print("found V.DOMAIN.Functionality, changed") + } + if("V.DOMAIN.Functionality.comment" %in% names(df)){ + names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" + print("found V.DOMAIN.Functionality.comment, changed") + } + return(df) +} + +summ = fix_column_names(summ) +aa = fix_column_names(aa) +junction = fix_column_names(junction) + +old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation') +old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT') +old_junction_columns=c('JUNCTION') + +added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence') +added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT') + +added_junction_columns=c('P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION') +added_junction_columns=c(added_junction_columns, 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') + +out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")] + +out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"] +out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"] + +out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"] +out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"] + +out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"] +out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"] + +out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"] +out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"])) +out[,"Strand"] = summ[,"Orientation"] +out[,"CDR3.Found.How"] = "a" + +out[,added_summary_columns] = summ[,added_summary_columns] + +out[,added_sequence_columns] = aa[,added_sequence_columns] + +out[,added_junction_columns] = junction[,added_junction_columns] + +out[,"Top V Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"])) +out[,"Top D Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"])) +out[,"Top J Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"])) + +out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')] + +names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') + +out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"]) + +fltr = out[,"VDJ Frame"] == "in-frame" +if(any(fltr, na.rm = T)){ + out[fltr, "VDJ Frame"] = "In-frame" +} + +fltr = out[,"VDJ Frame"] == "null" +if(any(fltr, na.rm = T)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +fltr = out[,"VDJ Frame"] == "out-of-frame" +if(any(fltr, na.rm = T)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +fltr = out[,"VDJ Frame"] == "" +if(any(fltr, na.rm = T)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +for(col in c('Top V Gene','Top D Gene','Top J Gene')){ + out[,col] = as.character(out[,col]) + fltr = out[,col] == "" + if(any(fltr, na.rm = T)){ + out[fltr,col] = "NA" + } +} + +write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/merge.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,27 @@ +args <- commandArgs(trailingOnly = TRUE) + +input.1 = args[1] +input.2 = args[2] + +fields.1 = args[3] +fields.2 = args[4] + +field.1 = args[5] +field.2 = args[6] + +output = args[7] + +dat1 = read.table(input.1, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL) +if(fields.1 != "all"){ + fields.1 = unlist(strsplit(fields.1, ",")) + dat1 = dat1[,fields.1] +} +dat2 = read.table(input.2, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL) +if(fields.2 != "all"){ + fields.2 = unlist(strsplit(fields.2, ",")) + dat2 = dat2[,fields.2] +} + +dat3 = merge(dat1, dat2, by.x=field.1, by.y=field.2) + +write.table(dat3, output, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/merge_and_filter.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,304 @@ +args <- commandArgs(trailingOnly = TRUE) + + +summaryfile = args[1] +sequencesfile = args[2] +mutationanalysisfile = args[3] +mutationstatsfile = args[4] +hotspotsfile = args[5] +aafile = args[6] +gene_identification_file= args[7] +output = args[8] +before.unique.file = args[9] +unmatchedfile = args[10] +method=args[11] +functionality=args[12] +unique.type=args[13] +filter.unique=args[14] +filter.unique.count=as.numeric(args[15]) +class.filter=args[16] +empty.region.filter=args[17] + +print(paste("filter.unique.count:", filter.unique.count)) + +summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") + +fix_column_names = function(df){ + if("V.DOMAIN.Functionality" %in% names(df)){ + names(df)[names(df) == "V.DOMAIN.Functionality"] = "Functionality" + print("found V.DOMAIN.Functionality, changed") + } + if("V.DOMAIN.Functionality.comment" %in% names(df)){ + names(df)[names(df) == "V.DOMAIN.Functionality.comment"] = "Functionality.comment" + print("found V.DOMAIN.Functionality.comment, changed") + } + return(df) +} + +fix_non_unique_ids = function(df){ + df$Sequence.ID = paste(df$Sequence.ID, 1:nrow(df)) + return(df) +} + +summ = fix_column_names(summ) +sequences = fix_column_names(sequences) +mutationanalysis = fix_column_names(mutationanalysis) +mutationstats = fix_column_names(mutationstats) +hotspots = fix_column_names(hotspots) +AAs = fix_column_names(AAs) + +if(method == "blastn"){ + #"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" + gene_identification = gene_identification[!duplicated(gene_identification$qseqid),] + ref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52)) + gene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T) + gene_identification$chunk_hit_percentage = (gene_identification$length / gene_identification$ref.length) * 100 + gene_identification = gene_identification[,c("qseqid", "chunk_hit_percentage", "pident", "qstart", "sseqid")] + colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match") +} + +#print("Summary analysis files columns") +#print(names(summ)) + + + +input.sequence.count = nrow(summ) +print(paste("Number of sequences in summary file:", input.sequence.count)) + +filtering.steps = data.frame(character(0), numeric(0)) + +filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count)) + +filtering.steps[,1] = as.character(filtering.steps[,1]) +filtering.steps[,2] = as.character(filtering.steps[,2]) +#filtering.steps[,3] = as.numeric(filtering.steps[,3]) + +#print("summary files columns") +#print(names(summ)) + +summ = merge(summ, gene_identification, by="Sequence.ID") + +print(paste("Number of sequences after merging with gene identification:", nrow(summ))) + +summ = summ[summ$Functionality != "No results",] + +print(paste("Number of sequences after 'No results' filter:", nrow(summ))) + +filtering.steps = rbind(filtering.steps, c("After 'No results' filter", nrow(summ))) + +if(functionality == "productive"){ + summ = summ[summ$Functionality == "productive (see comment)" | summ$Functionality == "productive",] +} else if (functionality == "unproductive"){ + summ = summ[summ$Functionality == "unproductive (see comment)" | summ$Functionality == "unproductive",] +} else if (functionality == "remove_unknown"){ + summ = summ[summ$Functionality != "No results" & summ$Functionality != "unknown (see comment)" & summ$Functionality != "unknown",] +} + +print(paste("Number of sequences after functionality filter:", nrow(summ))) + +filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ))) + +if(F){ #to speed up debugging + set.seed(1) + summ = summ[sample(nrow(summ), floor(nrow(summ) * 0.03)),] + print(paste("Number of sequences after sampling 3%:", nrow(summ))) + + filtering.steps = rbind(filtering.steps, c("Number of sequences after sampling 3%", nrow(summ))) +} + +print("mutation analysis files columns") +print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])])) + +result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID") + +print(paste("Number of sequences after merging with mutation analysis file:", nrow(result))) + +#print("mutation stats files columns") +#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])])) + +result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID") + +print(paste("Number of sequences after merging with mutation stats file:", nrow(result))) + +print("hotspots files columns") +print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])])) + +result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID") + +print(paste("Number of sequences after merging with hotspots file:", nrow(result))) + +print("sequences files columns") +print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")) + +sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")] +names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") +result = merge(result, sequences, by="Sequence.ID", all.x=T) + +AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")] +names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA") +result = merge(result, AAs, by="Sequence.ID", all.x=T) + +print(paste("Number of sequences in result after merging with sequences:", nrow(result))) + +result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele) +result$VGene = gsub("[*].*", "", result$VGene) +result$DGene = gsub("^Homsap ", "", result$D.GENE.and.allele) +result$DGene = gsub("[*].*", "", result$DGene) +result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele) +result$JGene = gsub("[*].*", "", result$JGene) + +splt = strsplit(class.filter, "_")[[1]] +chunk_hit_threshold = as.numeric(splt[1]) +nt_hit_threshold = as.numeric(splt[2]) + +higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold) + +if(!all(higher_than, na.rm=T)){ #check for no unmatched + result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"]) +} + +if(class.filter == "101_101"){ + result$best_match = "all" +} + +write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) + +print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "", na.rm=T))) +print(paste("Number of empty FR2 sequences:", sum(result$FR2.IMGT.seq == "", na.rm=T))) +print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == "", na.rm=T))) +print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == "", na.rm=T))) + +if(empty.region.filter == "leader"){ + result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} else if(empty.region.filter == "FR1"){ + result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} else if(empty.region.filter == "CDR1"){ + result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} else if(empty.region.filter == "FR2"){ + result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} + +print(paste("After removal sequences that are missing a gene region:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) + +if(empty.region.filter == "leader"){ + result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} else if(empty.region.filter == "FR1"){ + result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} else if(empty.region.filter == "CDR1"){ + result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} else if(empty.region.filter == "FR2"){ + result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} + +print(paste("Number of sequences in result after n filtering:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) + +cleanup_columns = c("FR1.IMGT.Nb.of.mutations", + "CDR1.IMGT.Nb.of.mutations", + "FR2.IMGT.Nb.of.mutations", + "CDR2.IMGT.Nb.of.mutations", + "FR3.IMGT.Nb.of.mutations") + +for(col in cleanup_columns){ + result[,col] = gsub("\\(.*\\)", "", result[,col]) + result[,col] = as.numeric(result[,col]) + result[is.na(result[,col]),] = 0 +} + +write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) + + +if(filter.unique != "no"){ + clmns = names(result) + if(filter.unique == "remove_vjaa"){ + result$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA) + } else if(empty.region.filter == "leader"){ + result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR1"){ + result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "CDR1"){ + result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR2"){ + result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } + + if(grepl("remove", filter.unique)){ + result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] + unique.defs = data.frame(table(result$unique.def)) + unique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,] + result = result[result$unique.def %in% unique.defs$Var1,] + } + + if(filter.unique != "remove_vjaa"){ + result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it + } + + result = result[!duplicated(result$unique.def),] +} + +write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) + +filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) + +print(paste("Number of sequences in result after unique filtering:", nrow(result))) + +if(nrow(summ) == 0){ + stop("No data remaining after filter") +} + +result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it + +#result$past = "" +#cls = unlist(strsplit(unique.type, ",")) +#for (i in 1:nrow(result)){ +# result[i,"past"] = paste(result[i,cls], collapse=":") +#} + + + +result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) + +result.matched = result[!grepl("unmatched", result$best_match),] +result.unmatched = result[grepl("unmatched", result$best_match),] + +result = rbind(result.matched, result.unmatched) + +result = result[!(duplicated(result$past)), ] + +result = result[,!(names(result) %in% c("past", "best_match_class"))] + +print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result))) + +filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result))) + +unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] + +print(paste("Number of rows in result:", nrow(result))) +print(paste("Number of rows in unmatched:", nrow(unmatched))) + +matched.sequences = result[!grepl("^unmatched", result$best_match),] + +write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\t",quote=F,row.names=F,col.names=T) + +matched.sequences.count = nrow(matched.sequences) +unmatched.sequences.count = sum(grepl("^unmatched", result$best_match)) +if(matched.sequences.count <= unmatched.sequences.count){ + print("WARNING NO MATCHED (SUB)CLASS SEQUENCES!!") +} + +filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count)) +filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count)) +filtering.steps[,2] = as.numeric(filtering.steps[,2]) +filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2) + +write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\t",quote=F,row.names=F,col.names=F) + +write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T) +write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/mutation_column_checker.py Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,27 @@ +import re + +mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") + +with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: + first = True + fr3_index = -1 + for i, line in enumerate(file_handle): + line_split = line.split("\t") + if first: + fr3_index = line_split.index("FR3-IMGT") + first = False + continue + + if len(line_split) < fr3_index: + continue + + fr3_data = line_split[fr3_index] + if len(fr3_data) > 5: + try: + test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] + except: + print(line_split[1]) + print("Something went wrong at line {line} with:".format(line=line_split[0])) + #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) + if i % 100000 == 0: + print(i)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/naive_output.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,45 @@ +args <- commandArgs(trailingOnly = TRUE) + +naive.file = args[1] +shm.file = args[2] +output.file.ca = args[3] +output.file.cg = args[4] +output.file.cm = args[5] + +naive = read.table(naive.file, sep="\t", header=T, quote="", fill=T) +shm.merge = read.table(shm.file, sep="\t", header=T, quote="", fill=T) + + +final = merge(naive, shm.merge[,c("Sequence.ID", "best_match")], by.x="ID", by.y="Sequence.ID") +print(paste("nrow final:", nrow(final))) +names(final)[names(final) == "best_match"] = "Sample" +final.numeric = final[,sapply(final, is.numeric)] +final.numeric[is.na(final.numeric)] = 0 +final[,sapply(final, is.numeric)] = final.numeric + +final.ca = final[grepl("^ca", final$Sample),] +final.cg = final[grepl("^cg", final$Sample),] +final.cm = final[grepl("^cm", final$Sample),] + +if(nrow(final.ca) > 0){ + final.ca$Replicate = 1 +} + +if(nrow(final.cg) > 0){ + final.cg$Replicate = 1 +} + +if(nrow(final.cm) > 0){ + final.cm$Replicate = 1 +} + +#print(paste("nrow final:", nrow(final))) +#final2 = final +#final2$Sample = gsub("[0-9]", "", final2$Sample) +#final = rbind(final, final2) +#final$Replicate = 1 + +write.table(final.ca, output.file.ca, quote=F, sep="\t", row.names=F, col.names=T) +write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T) +write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/new_imgt.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,40 @@ +args <- commandArgs(trailingOnly = TRUE) + +imgt.dir = args[1] +merged.file = args[2] +gene = args[3] + +merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="") + +if(!("Sequence.ID" %in% names(merged))){ #change-o db + print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'") + names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID" +} + +if(gene != "-"){ + merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),] +} + +if("best_match" %in% names(merged)){ + merged = merged[!grepl("unmatched", merged$best_match),] +} + +nrow_dat = 0 + +for(f in list.files(imgt.dir, pattern="*.txt$")){ + #print(paste("filtering", f)) + path = file.path(imgt.dir, f) + dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="") + + dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] + + nrow_dat = nrow(dat) + + if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file + dat[,grepl("^FR1", names(dat))] = 0 + } + + write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="") +} + +print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/pattern_plots.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,178 @@ +library(ggplot2) +library(reshape2) +library(scales) + +args <- commandArgs(trailingOnly = TRUE) + +input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt" + +plot1.path = args[2] +plot1.png = paste(plot1.path, ".png", sep="") +plot1.txt = paste(plot1.path, ".txt", sep="") +plot1.pdf = paste(plot1.path, ".pdf", sep="") + +plot2.path = args[3] +plot2.png = paste(plot2.path, ".png", sep="") +plot2.txt = paste(plot2.path, ".txt", sep="") +plot2.pdf = paste(plot2.path, ".pdf", sep="") + +plot3.path = args[4] +plot3.png = paste(plot3.path, ".png", sep="") +plot3.txt = paste(plot3.path, ".txt", sep="") +plot3.pdf = paste(plot3.path, ".pdf", sep="") + +clean.output = args[5] + +dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1) + +classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE") +xyz = c("x", "y", "z") +new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) + +names(dat) = new.names + +clean.dat = dat +clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))] + +write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA) + +dat["RGYW.WRCY",] = colSums(dat[c(14,15),], na.rm=T) +dat["TW.WA",] = colSums(dat[c(16,17),], na.rm=T) + +data1 = dat[c("RGYW.WRCY", "TW.WA"),] + +data1 = data1[,names(data1)[grepl(".z", names(data1))]] +names(data1) = gsub("\\..*", "", names(data1)) + +data1 = melt(t(data1)) + +names(data1) = c("Class", "Type", "value") + +chk = is.na(data1$value) +if(any(chk)){ + data1[chk, "value"] = 0 +} + +data1 = data1[order(data1$Type),] + +write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) + ggtitle("Percentage of mutations in AID and pol eta motives") +p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) +#p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4")) +png(filename=plot1.png, width=510, height=300) +print(p) +dev.off() + +ggsave(plot1.pdf, p) + +data2 = dat[c(1, 5:8),] + +data2 = data2[,names(data2)[grepl("\\.x", names(data2))]] +names(data2) = gsub(".x", "", names(data2)) + +data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]] + +data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1) + +data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]] +data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1) + +data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0 +data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0 + +data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),])) + +names(data2) = c("Class", "Type", "value") + +chk = is.na(data2$value) +if(any(chk)){ + data2[chk, "value"] = 0 +} + +data2 = data2[order(data2$Type),] + +write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") + ggtitle("Relative mutation patterns") +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) +png(filename=plot2.png, width=480, height=300) +print(p) +dev.off() + +ggsave(plot2.pdf, p) + +data3 = dat[c(5, 6, 8, 18:21),] +data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] +names(data3) = gsub(".x", "", names(data3)) + +data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1) + +data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1) + +data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1) + +data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0 +data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0 + +data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0 +data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0 + +data3["A/T",is.nan(unlist(data3["A/T",]))] = 0 +data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0 + +data3 = melt(t(data3[8:10,])) +names(data3) = c("Class", "Type", "value") + +chk = is.na(data3$value) +if(any(chk)){ + data3[chk, "value"] = 0 +} + +data3 = data3[order(data3$Type),] + +write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) + ggtitle("Absolute mutation patterns") +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) +png(filename=plot3.png, width=480, height=300) +print(p) +dev.off() + +ggsave(plot3.pdf, p) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/plot_pdf.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,17 @@ +library(ggplot2) + +args <- commandArgs(trailingOnly = TRUE) +print(args) + +input = args[1] +outputdir = args[2] +setwd(outputdir) + +load(input) + +print(names(pdfplots)) + +for(n in names(pdfplots)){ + print(paste("n:", n)) + ggsave(pdfplots[[n]], file=n) +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/sequence_overview.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,363 @@ +library(reshape2) + +args <- commandArgs(trailingOnly = TRUE) + +before.unique.file = args[1] +merged.file = args[2] +outputdir = args[3] +gene.classes = unlist(strsplit(args[4], ",")) +hotspot.analysis.sum.file = args[5] +NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") +NTsum.file = paste(outputdir, "ntsum.txt", sep="/") +main.html = "index.html" +empty.region.filter = args[6] + + +setwd(outputdir) + +before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") + +#before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] + +if(empty.region.filter == "leader"){ + before.unique$seq_conc = paste(before.unique$FR1.IMGT.seq, before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) +} else if(empty.region.filter == "FR1"){ + before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) +} else if(empty.region.filter == "CDR1"){ + before.unique$seq_conc = paste(before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) +} else if(empty.region.filter == "FR2"){ + before.unique$seq_conc = paste(before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) +} + +IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] +IDs$best_match = as.character(IDs$best_match) + +dat = data.frame(table(before.unique$seq_conc)) + +names(dat) = c("seq_conc", "Freq") + +dat$seq_conc = factor(dat$seq_conc) + +dat = dat[order(as.character(dat$seq_conc)),] + +#writing html from R... +get.bg.color = function(val){ + if(val %in% c("TRUE", "FALSE", "T", "F")){ #if its a logical value, give the background a green/red color + return(ifelse(val,"#eafaf1","#f9ebea")) + } else if (!is.na(as.numeric(val))) { #if its a numerical value, give it a grey tint if its >0 + return(ifelse(val > 0,"#eaecee","white")) + } else { + return("white") + } +} +td = function(val) { + return(paste("<td bgcolor='", get.bg.color(val), "'>", val, "</td>", sep="")) +} +tr = function(val) { + return(paste(c("<tr>", sapply(val, td), "</tr>"), collapse="")) +} + +make.link = function(id, clss, val) { + paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") +} +tbl = function(df) { + res = "<table border='1'>" + for(i in 1:nrow(df)){ + res = paste(res, tr(df[i,]), sep="") + } + res = paste(res, "</table>") +} + +cat("<center><img src=''> Please note that this tab is based on all sequences before filter unique sequences and the remove duplicates based on filters are applied. In this table only sequences occuring more than once are included. </center>", file=main.html, append=F) +cat("<table border='1' class='pure-table pure-table-striped'>", file=main.html, append=T) + +if(empty.region.filter == "leader"){ + cat("<caption>FR1+CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) +} else if(empty.region.filter == "FR1"){ + cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) +} else if(empty.region.filter == "CDR1"){ + cat("<caption>FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) +} else if(empty.region.filter == "FR2"){ + cat("<caption>CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) +} + +cat("<tr>", file=main.html, append=T) +cat("<th>Sequence</th><th>Functionality</th><th>IGA1</th><th>IGA2</th><th>IGG1</th><th>IGG2</th><th>IGG3</th><th>IGG4</th><th>IGM</th><th>IGE</th><th>UN</th>", file=main.html, append=T) +cat("<th>total IGA</th><th>total IGG</th><th>total IGM</th><th>total IGE</th><th>number of subclasses</th><th>present in both IGA and IGG</th><th>present in IGA, IGG and IGM</th><th>present in IGA, IGG and IGE</th><th>present in IGA, IGG, IGM and IGE</th><th>IGA1+IGA2</th>", file=main.html, append=T) +cat("<th>IGG1+IGG2</th><th>IGG1+IGG3</th><th>IGG1+IGG4</th><th>IGG2+IGG3</th><th>IGG2+IGG4</th><th>IGG3+IGG4</th>", file=main.html, append=T) +cat("<th>IGG1+IGG2+IGG3</th><th>IGG2+IGG3+IGG4</th><th>IGG1+IGG2+IGG4</th><th>IGG1+IGG3+IGG4</th><th>IGG1+IGG2+IGG3+IGG4</th>", file=main.html, append=T) +cat("</tr>", file=main.html, append=T) + + + +single.sequences=0 #sequence only found once, skipped +in.multiple=0 #same sequence across multiple subclasses +multiple.in.one=0 #same sequence multiple times in one subclass +unmatched=0 #all of the sequences are unmatched +some.unmatched=0 #one or more sequences in a clone are unmatched +matched=0 #should be the same als matched sequences + +sequence.id.page="by_id.html" + +for(i in 1:nrow(dat)){ + + ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA1", IDs$best_match),] + ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGA2", IDs$best_match),] + + cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG1", IDs$best_match),] + cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG2", IDs$best_match),] + cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG3", IDs$best_match),] + cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGG4", IDs$best_match),] + + cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGM", IDs$best_match),] + + ce = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^IGE", IDs$best_match),] + + un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),] + + allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, ce, un) + + ca1.n = nrow(ca1) + ca2.n = nrow(ca2) + + cg1.n = nrow(cg1) + cg2.n = nrow(cg2) + cg3.n = nrow(cg3) + cg4.n = nrow(cg4) + + cm.n = nrow(cm) + + ce.n = nrow(ce) + + un.n = nrow(un) + + classes = c(ca1.n, ca2.n, cg1.n, cg2.n, cg3.n, cg4.n, cm.n, ce.n, un.n) + + classes.sum = sum(classes) + + if(classes.sum == 1){ + single.sequences = single.sequences + 1 + next + } + + if(un.n == classes.sum){ + unmatched = unmatched + 1 + next + } + + classes.no.un = classes[-length(classes)] + + in.classes = sum(classes.no.un > 0) + + matched = matched + in.classes #count in how many subclasses the sequence occurs. + + if(any(classes == classes.sum)){ + multiple.in.one = multiple.in.one + 1 + } else if (un.n > 0) { + some.unmatched = some.unmatched + 1 + } else { + in.multiple = in.multiple + 1 + } + + id = as.numeric(dat[i,"seq_conc"]) + + functionality = paste(unique(allc[,"Functionality"]), collapse=",") + + by.id.row = c() + + if(ca1.n > 0){ + cat(tbl(ca1), file=paste("IGA1_", id, ".html", sep="")) + } + + if(ca2.n > 0){ + cat(tbl(ca2), file=paste("IGA2_", id, ".html", sep="")) + } + + if(cg1.n > 0){ + cat(tbl(cg1), file=paste("IGG1_", id, ".html", sep="")) + } + + if(cg2.n > 0){ + cat(tbl(cg2), file=paste("IGG2_", id, ".html", sep="")) + } + + if(cg3.n > 0){ + cat(tbl(cg3), file=paste("IGG3_", id, ".html", sep="")) + } + + if(cg4.n > 0){ + cat(tbl(cg4), file=paste("IGG4_", id, ".html", sep="")) + } + + if(cm.n > 0){ + cat(tbl(cm), file=paste("IGM_", id, ".html", sep="")) + } + + if(ce.n > 0){ + cat(tbl(ce), file=paste("IGE_", id, ".html", sep="")) + } + + if(un.n > 0){ + cat(tbl(un), file=paste("un_", id, ".html", sep="")) + } + + ca1.html = make.link(id, "IGA1", ca1.n) + ca2.html = make.link(id, "IGA2", ca2.n) + + cg1.html = make.link(id, "IGG1", cg1.n) + cg2.html = make.link(id, "IGG2", cg2.n) + cg3.html = make.link(id, "IGG3", cg3.n) + cg4.html = make.link(id, "IGG4", cg4.n) + + cm.html = make.link(id, "IGM", cm.n) + + ce.html = make.link(id, "IGE", ce.n) + + un.html = make.link(id, "un", un.n) + + #extra columns + ca.n = ca1.n + ca2.n + + cg.n = cg1.n + cg2.n + cg3.n + cg4.n + + #in.classes + + in.ca.cg = (ca.n > 0 & cg.n > 0) + + in.ca.cg.cm = (ca.n > 0 & cg.n > 0 & cm.n > 0) + + in.ca.cg.ce = (ca.n > 0 & cg.n > 0 & ce.n > 0) + + in.ca.cg.cm.ce = (ca.n > 0 & cg.n > 0 & cm.n > 0 & ce.n > 0) + + in.ca1.ca2 = (ca1.n > 0 & ca2.n > 0) + + in.cg1.cg2 = (cg1.n > 0 & cg2.n > 0) + in.cg1.cg3 = (cg1.n > 0 & cg3.n > 0) + in.cg1.cg4 = (cg1.n > 0 & cg4.n > 0) + in.cg2.cg3 = (cg2.n > 0 & cg3.n > 0) + in.cg2.cg4 = (cg2.n > 0 & cg4.n > 0) + in.cg3.cg4 = (cg3.n > 0 & cg4.n > 0) + + in.cg1.cg2.cg3 = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0) + in.cg2.cg3.cg4 = (cg2.n > 0 & cg3.n > 0 & cg4.n > 0) + in.cg1.cg2.cg4 = (cg1.n > 0 & cg2.n > 0 & cg4.n > 0) + in.cg1.cg3.cg4 = (cg1.n > 0 & cg3.n > 0 & cg4.n > 0) + + in.cg.all = (cg1.n > 0 & cg2.n > 0 & cg3.n > 0 & cg4.n > 0) + + #rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html) + rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, ce.html, un.html) + rw = c(rw, ca.n, cg.n, cm.n, ce.n, in.classes, in.ca.cg, in.ca.cg.cm, in.ca.cg.ce, in.ca.cg.cm.ce, in.ca1.ca2, in.cg1.cg2, in.cg1.cg3, in.cg1.cg4, in.cg2.cg3, in.cg2.cg4, in.cg3.cg4, in.cg1.cg2.cg3, in.cg2.cg3.cg4, in.cg1.cg2.cg4, in.cg1.cg3.cg4, in.cg.all) + + + + cat(tr(rw), file=main.html, append=T) + + + for(i in 1:nrow(allc)){ #generate html by id + html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"]) + cat(paste(html, "<br />"), file=sequence.id.page, append=T) + } +} + +cat("</table>", file=main.html, append=T) + +print(paste("Single sequences:", single.sequences)) +print(paste("Sequences in multiple subclasses:", in.multiple)) +print(paste("Multiple sequences in one subclass:", multiple.in.one)) +print(paste("Matched with unmatched:", some.unmatched)) +print(paste("Count that should match 'matched' sequences:", matched)) + +#ACGT overview + +#NToverview = merged[!grepl("^unmatched", merged$best_match),] +NToverview = merged + +if(empty.region.filter == "leader"){ + NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) +} else if(empty.region.filter == "FR1"){ + NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) +} else if(empty.region.filter == "CDR1"){ + NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) +} else if(empty.region.filter == "FR2"){ + NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) +} + +NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) +NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) +NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) +NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) + +#Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) + +#NToverview = rbind(NToverview, NTsum) + +NTresult = data.frame(nt=c("A", "C", "T", "G")) + +for(clazz in gene.classes){ + print(paste("class:", clazz)) + NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),] + print(paste("nrow:", nrow(NToverview.sub))) + new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G)) + new.col.y = sum(new.col.x) + new.col.z = round(new.col.x / new.col.y * 100, 2) + + tmp = names(NTresult) + NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) + names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep="")) +} + +NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")] + +names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T") + +write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) + +NToverview = NToverview[!grepl("unmatched", NToverview$best_match),] + +new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G)) +new.col.y = sum(new.col.x) +new.col.z = round(new.col.x / new.col.y * 100, 2) + +tmp = names(NTresult) +NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) +names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep="")) + +names(hotspot.analysis.sum) = names(NTresult) + +hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) + +write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_clonality.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,144 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} +@font-face + {font-family:Tahoma; + panose-1:2 11 6 4 3 5 4 4 2 4;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +a:link, span.MsoHyperlink + {color:blue; + text-decoration:underline;} +a:visited, span.MsoHyperlinkFollowed + {color:purple; + text-decoration:underline;} +p + {margin-right:0in; + margin-left:0in; + font-size:12.0pt; + font-family:"Times New Roman","serif";} +p.MsoAcetate, li.MsoAcetate, div.MsoAcetate + {mso-style-link:"Balloon Text Char"; + margin:0in; + margin-bottom:.0001pt; + font-size:8.0pt; + font-family:"Tahoma","sans-serif";} +p.msochpdefault, li.msochpdefault, div.msochpdefault + {mso-style-name:msochpdefault; + margin-right:0in; + margin-left:0in; + font-size:12.0pt; + font-family:"Calibri","sans-serif";} +p.msopapdefault, li.msopapdefault, div.msopapdefault + {mso-style-name:msopapdefault; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:12.0pt; + font-family:"Times New Roman","serif";} +span.apple-converted-space + {mso-style-name:apple-converted-space;} +span.BalloonTextChar + {mso-style-name:"Balloon Text Char"; + mso-style-link:"Balloon Text"; + font-family:"Tahoma","sans-serif";} +.MsoChpDefault + {font-size:10.0pt; + font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US link=blue vlink=purple> + +<div class=WordSection1> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><b><span lang=EN-GB style='color:black'>References</span></b></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><span lang=EN-GB style='color:black'>Gupta, +Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, +Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). <a name="OLE_LINK106"></a><a +name="OLE_LINK107"></a>Change-O: a toolkit for analyzing large-scale B cell +immunoglobulin repertoire sequencing data: Table 1. In<span +class=apple-converted-space> </span><em>Bioinformatics, 31 (20), pp. +3356–3358.</em><span class=apple-converted-space><i> </i></span>[</span><a +href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span +lang=EN-GB style='color:#303030'>doi:10.1093/bioinformatics/btv359</span></a><span +lang=EN-GB style='color:black'>][</span><a +href="http://dx.doi.org/10.1093/bioinformatics/btv359" target="_blank"><span +lang=EN-GB style='color:#303030'>Link</span></a><span lang=EN-GB +style='color:black'>]</span></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><span lang=EN-GB style='color:black'> </span></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><a name="OLE_LINK110"><u><span lang=EN-GB +style='color:black'>All, IGA, IGG, IGM and IGE tabs</span></u></a></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><span lang=EN-GB style='color:black'>In +these tabs information on the clonal relation of transcripts can be found. To +calculate clonal relation Change-O is used (Gupta et al, PMID: 26069265). +Transcripts are considered clonally related if they have maximal three nucleotides +difference in their CDR3 sequence and the same first V segment (as assigned by +IMGT). Results are represented in a table format showing the clone size and the +number of clones or sequences with this clone size. Change-O settings used are +the </span><span lang=EN-GB>nucleotide hamming distance substitution model with +a complete distance of maximal three. For clonal assignment the first gene +segments were used, and the distances were not normalized. In case of +asymmetric distances, the minimal distance was used.<span style='color:black'> </span></span></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><span lang=EN-GB style='color:black'> </span></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><u><span lang=EN-GB style='color:black'>Overlap +tab</span></u><span lang=EN-GB style='color:black'> </span></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><span lang=EN-GB style='color:black'>This +tab gives information on with which (sub)classe(s) each unique analyzed region +(based on the exact nucleotide sequence of the analyzes region and the CDR3 +nucleotide sequence) is found with. This gives information if the combination +of the exact same nucleotide sequence of the analyzed region and the CDR3 +sequence can be found in multiple (sub)classes.</span></p> + +<p style='margin-top:0in;margin-right:0in;margin-bottom:6.4pt;margin-left:0in; +text-align:justify;background:white'><span style='color:black'><img src=""> Please note that this tab is based on all +sequences before filter unique sequences and the remove duplicates based on +filters are applied. In this table only sequences occuring more than once are +included. </span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_csr.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,95 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +a:link, span.MsoHyperlink + {color:blue; + text-decoration:underline;} +a:visited, span.MsoHyperlinkFollowed + {color:purple; + text-decoration:underline;} +span.apple-converted-space + {mso-style-name:apple-converted-space;} +.MsoChpDefault + {font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US link=blue vlink=purple> + +<div class=WordSection1> + +<p class=MsoNormalCxSpFirst style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The +graphs in this tab give insight into the subclass distribution of IGG and IGA +transcripts. </span><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Human Cµ, Cα, Cγ and Cε +constant genes are assigned using a </span><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>custom script +specifically designed for human (sub)class assignment in repertoire data as +described in van Schouwenburg and IJspeert et al, submitted for publication. In +this script the reference sequences for the subclasses are divided in 8 +nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are +then individually aligned in the right order to each input sequence. The +percentage of the chunks identified in each rearrangement is calculated in the +‘chunk hit percentage’. </span><span lang=EN-GB style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif"'>Cα and Cγ +subclasses are very homologous and only differ in a few nucleotides. To assign +subclasses the </span><span lang=EN-GB style='font-size:12.0pt;line-height: +115%;font-family:"Times New Roman","serif"'>‘nt hit percentage’ is calculated. +This percentage indicates how well the chunks covering the subclass specific +nucleotide match with the different subclasses. </span><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Information +on normal distribution of subclasses in healthy individuals of different ages +can be found in IJspeert and van Schouwenburg et al, PMID: 27799928.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK100"></a><a +name="OLE_LINK99"></a><a name="OLE_LINK25"><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGA +subclass distribution</span></u></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie +chart showing the relative distribution of IGA1 and IGA2 transcripts in the +sample.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IGG +subclass distribution</span></u></p> + +<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Pie +chart showing the relative distribution of IGG1, IGG2, IGG3 and IGG4 +transcripts in the sample.</span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_csr.py Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,508 @@ +import argparse +import logging +import sys +import os +import re + +from collections import defaultdict + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation") + parser.add_argument("--genes", help="The genes available in the 'best_match' column") + parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=['leader', 'FR1', 'CDR1', 'FR2']) + parser.add_argument("--output", help="Output file") + + args = parser.parse_args() + + infile = args.input + genes = str(args.genes).split(",") + empty_region_filter = args.empty_region_filter + outfile = args.output + + genedic = dict() + + mutationdic = dict() + mutationMatcher = re.compile("^(.)(\d+).(.),?[ ]?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?") + mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?.?([A-Z])?(.*)?") + mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") + mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") + NAMatchResult = (None, None, None, None, None, None, '') + geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes} + linecount = 0 + + IDIndex = 0 + best_matchIndex = 0 + fr1Index = 0 + cdr1Index = 0 + fr2Index = 0 + cdr2Index = 0 + fr3Index = 0 + first = True + IDlist = [] + mutationList = [] + mutationListByID = {} + cdr1LengthDic = {} + cdr2LengthDic = {} + + fr1LengthDict = {} + fr2LengthDict = {} + fr3LengthDict = {} + + cdr1LengthIndex = 0 + cdr2LengthIndex = 0 + + fr1SeqIndex = 0 + fr2SeqIndex = 0 + fr3SeqIndex = 0 + + tandem_sum_by_class = defaultdict(int) + expected_tandem_sum_by_class = defaultdict(float) + + with open(infile, 'ru') as i: + for line in i: + if first: + linesplt = line.split("\t") + IDIndex = linesplt.index("Sequence.ID") + best_matchIndex = linesplt.index("best_match") + fr1Index = linesplt.index("FR1.IMGT") + cdr1Index = linesplt.index("CDR1.IMGT") + fr2Index = linesplt.index("FR2.IMGT") + cdr2Index = linesplt.index("CDR2.IMGT") + fr3Index = linesplt.index("FR3.IMGT") + cdr1LengthIndex = linesplt.index("CDR1.IMGT.length") + cdr2LengthIndex = linesplt.index("CDR2.IMGT.length") + fr1SeqIndex = linesplt.index("FR1.IMGT.seq") + fr2SeqIndex = linesplt.index("FR2.IMGT.seq") + fr3SeqIndex = linesplt.index("FR3.IMGT.seq") + first = False + continue + linecount += 1 + linesplt = line.split("\t") + ID = linesplt[IDIndex] + genedic[ID] = linesplt[best_matchIndex] + + mutationdic[ID + "_FR1"] = [] + if len(linesplt[fr1Index]) > 5 and empty_region_filter == "leader": + mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] + + mutationdic[ID + "_CDR1"] = [] + if len(linesplt[cdr1Index]) > 5 and empty_region_filter in ["leader", "FR1"]: + mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] + + mutationdic[ID + "_FR2"] = [] + if len(linesplt[fr2Index]) > 5 and empty_region_filter in ["leader", "FR1", "CDR1"]: + mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] + + mutationdic[ID + "_CDR2"] = [] + if len(linesplt[cdr2Index]) > 5: + mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] + + mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + + mutationdic[ID + "_FR3"] = [] + if len(linesplt[fr3Index]) > 5: + mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] + + mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] + mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] + + try: + cdr1Length = int(linesplt[cdr1LengthIndex]) + except: + cdr1Length = 0 + + try: + cdr2Length = int(linesplt[cdr2LengthIndex]) + except: + cdr2Length = 0 + + #print linesplt[fr2SeqIndex] + fr1Length = len(linesplt[fr1SeqIndex]) if empty_region_filter == "leader" else 0 + fr2Length = len(linesplt[fr2SeqIndex]) if empty_region_filter in ["leader", "FR1", "CDR1"] else 0 + fr3Length = len(linesplt[fr3SeqIndex]) + + cdr1LengthDic[ID] = cdr1Length + cdr2LengthDic[ID] = cdr2Length + + fr1LengthDict[ID] = fr1Length + fr2LengthDict[ID] = fr2Length + fr3LengthDict[ID] = fr3Length + + IDlist += [ID] + print "len(mutationdic) =", len(mutationdic) + + with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle: + for ID, lst in mutationdic.iteritems(): + for mut in lst: + out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in mut]))) + + #tandem mutation stuff + tandem_frequency = defaultdict(int) + mutation_frequency = defaultdict(int) + + mutations_by_id_dic = {} + first = True + mutation_by_id_file = os.path.join(os.path.dirname(outfile), "mutation_by_id.txt") + with open(mutation_by_id_file, 'r') as mutation_by_id: + for l in mutation_by_id: + if first: + first = False + continue + splt = l.split("\t") + mutations_by_id_dic[splt[0]] = int(splt[1]) + + tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt") + with open(tandem_file, 'w') as o: + highest_tandem_length = 0 + + o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") + for ID in IDlist: + mutations = mutationListByID[ID] + if len(mutations) == 0: + continue + last_mut = max(mutations, key=lambda x: int(x[1])) + + last_mut_pos = int(last_mut[1]) + + mut_positions = [False] * (last_mut_pos + 1) + + for mutation in mutations: + frm, where, to, frmAA, whereAA, toAA, thing = mutation + where = int(where) + mut_positions[where] = True + + tandem_muts = [] + tandem_start = -1 + tandem_length = 0 + for i in range(len(mut_positions)): + if mut_positions[i]: + if tandem_start == -1: + tandem_start = i + tandem_length += 1 + #print "".join(["1" if x else "0" for x in mut_positions[:i+1]]) + else: + if tandem_length > 1: + tandem_muts.append((tandem_start, tandem_length)) + #print "{0}{1} {2}:{3}".format(" " * (i - tandem_length), "^" * tandem_length, tandem_start, tandem_length) + tandem_start = -1 + tandem_length = 0 + if tandem_length > 1: # if the sequence ends with a tandem mutation + tandem_muts.append((tandem_start, tandem_length)) + + if len(tandem_muts) > 0: + if highest_tandem_length < len(tandem_muts): + highest_tandem_length = len(tandem_muts) + + region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID] + longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0) + num_mutations = mutations_by_id_dic[ID] # len(mutations) + f_num_mutations = float(num_mutations) + num_tandem_muts = len(tandem_muts) + expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length) + o.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(ID, + str(num_mutations), + str(num_tandem_muts), + str(region_length), + str(round(expected_tandem_muts, 2)), + str(longest_tandem[1]), + str(tandem_muts))) + gene = genedic[ID] + if gene.find("unmatched") == -1: + tandem_sum_by_class[gene] += num_tandem_muts + expected_tandem_sum_by_class[gene] += expected_tandem_muts + + tandem_sum_by_class["all"] += num_tandem_muts + expected_tandem_sum_by_class["all"] += expected_tandem_muts + + gene = gene[:3] + if gene in ["IGA", "IGG"]: + tandem_sum_by_class[gene] += num_tandem_muts + expected_tandem_sum_by_class[gene] += expected_tandem_muts + else: + tandem_sum_by_class["unmatched"] += num_tandem_muts + expected_tandem_sum_by_class["unmatched"] += expected_tandem_muts + + + for tandem_mut in tandem_muts: + tandem_frequency[str(tandem_mut[1])] += 1 + #print "\t".join([ID, str(len(tandem_muts)), str(longest_tandem[1]) , str(tandem_muts)]) + + tandem_freq_file = os.path.join(os.path.dirname(outfile), "tandem_frequency.txt") + with open(tandem_freq_file, 'w') as o: + for frq in sorted([int(x) for x in tandem_frequency.keys()]): + o.write("{0}\t{1}\n".format(frq, tandem_frequency[str(frq)])) + + tandem_row = [] + genes_extra = list(genes) + genes_extra.append("all") + for x, y, in zip([tandem_sum_by_class[x] for x in genes_extra], [expected_tandem_sum_by_class[x] for x in genes_extra]): + if y != 0: + tandem_row += [x, round(y, 2), round(x / y, 2)] + else: + tandem_row += [x, round(y, 2), 0] + + tandem_freq_file = os.path.join(os.path.dirname(outfile), "shm_overview_tandem_row.txt") + with open(tandem_freq_file, 'w') as o: + o.write("Tandems/Expected (ratio),{0}\n".format(",".join([str(x) for x in tandem_row]))) + + #print mutationList, linecount + + AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] and i[5] != ";" else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent + if AALength < 60: + AALength = 64 + + AA_mutation = [0] * AALength + AA_mutation_dic = {"IGA": AA_mutation[:], "IGG": AA_mutation[:], "IGM": AA_mutation[:], "IGE": AA_mutation[:], "unm": AA_mutation[:], "all": AA_mutation[:]} + AA_mutation_empty = AA_mutation[:] + + print "AALength:", AALength + aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/aa_id_mutations.txt" + with open(aa_mutations_by_id_file, 'w') as o: + o.write("ID\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n") + for ID in mutationListByID.keys(): + AA_mutation_for_ID = AA_mutation_empty[:] + for mutation in mutationListByID[ID]: + if mutation[4] and mutation[5] != ";": + AA_mutation_position = int(mutation[4]) + try: + AA_mutation[AA_mutation_position] += 1 + AA_mutation_for_ID[AA_mutation_position] += 1 + except Exception as e: + print e + print mutation + sys.exit() + clss = genedic[ID][:3] + AA_mutation_dic[clss][AA_mutation_position] += 1 + o.write(ID + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in AA_mutation_for_ID[1:]]) + "\n") + + + + #absent AA stuff + absentAACDR1Dic = defaultdict(list) + absentAACDR1Dic[5] = range(29,36) + absentAACDR1Dic[6] = range(29,35) + absentAACDR1Dic[7] = range(30,35) + absentAACDR1Dic[8] = range(30,34) + absentAACDR1Dic[9] = range(31,34) + absentAACDR1Dic[10] = range(31,33) + absentAACDR1Dic[11] = [32] + + absentAACDR2Dic = defaultdict(list) + absentAACDR2Dic[0] = range(55,65) + absentAACDR2Dic[1] = range(56,65) + absentAACDR2Dic[2] = range(56,64) + absentAACDR2Dic[3] = range(57,64) + absentAACDR2Dic[4] = range(57,63) + absentAACDR2Dic[5] = range(58,63) + absentAACDR2Dic[6] = range(58,62) + absentAACDR2Dic[7] = range(59,62) + absentAACDR2Dic[8] = range(59,61) + absentAACDR2Dic[9] = [60] + + absentAA = [len(IDlist)] * (AALength-1) + for k, cdr1Length in cdr1LengthDic.iteritems(): + for c in absentAACDR1Dic[cdr1Length]: + absentAA[c] -= 1 + + for k, cdr2Length in cdr2LengthDic.iteritems(): + for c in absentAACDR2Dic[cdr2Length]: + absentAA[c] -= 1 + + + aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/absent_aa_id.txt" + with open(aa_mutations_by_id_file, 'w') as o: + o.write("ID\tcdr1length\tcdr2length\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n") + for ID in IDlist: + absentAAbyID = [1] * (AALength-1) + cdr1Length = cdr1LengthDic[ID] + for c in absentAACDR1Dic[cdr1Length]: + absentAAbyID[c] -= 1 + + cdr2Length = cdr2LengthDic[ID] + for c in absentAACDR2Dic[cdr2Length]: + absentAAbyID[c] -= 1 + o.write(ID + "\t" + str(cdr1Length) + "\t" + str(cdr2Length) + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in absentAAbyID]) + "\n") + + if linecount == 0: + print "No data, exiting" + with open(outfile, 'w') as o: + o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) + o.write("WRCY (%)," + ("0,0,0\n" * len(genes))) + o.write("WA (%)," + ("0,0,0\n" * len(genes))) + o.write("TW (%)," + ("0,0,0\n" * len(genes))) + import sys + + sys.exit() + + hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)") + RGYWCount = {} + WRCYCount = {} + WACount = {} + TWCount = {} + + #IDIndex = 0 + ataIndex = 0 + tatIndex = 0 + aggctatIndex = 0 + atagcctIndex = 0 + first = True + with open(infile, 'ru') as i: + for line in i: + if first: + linesplt = line.split("\t") + ataIndex = linesplt.index("X.a.t.a") + tatIndex = linesplt.index("t.a.t.") + aggctatIndex = linesplt.index("X.a.g.g.c.t..a.t.") + atagcctIndex = linesplt.index("X.a.t..a.g.c.c.t.") + first = False + continue + linesplt = line.split("\t") + gene = linesplt[best_matchIndex] + ID = linesplt[IDIndex] + RGYW = [(int(x), int(y), z) for (x, y, z) in + [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] + WRCY = [(int(x), int(y), z) for (x, y, z) in + [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] + WA = [(int(x), int(y), z) for (x, y, z) in + [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] + TW = [(int(x), int(y), z) for (x, y, z) in + [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] + RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0 + + with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "RGYW.txt"), 'a') as out_handle: + for hotspot in RGYW: + out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in hotspot]))) + + mutationList = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] + for mutation in mutationList: + frm, where, to, AAfrm, AAwhere, AAto, junk = mutation + mutation_in_RGYW = any(((start <= int(where) <= end) for (start, end, region) in RGYW)) + mutation_in_WRCY = any(((start <= int(where) <= end) for (start, end, region) in WRCY)) + mutation_in_WA = any(((start <= int(where) <= end) for (start, end, region) in WA)) + mutation_in_TW = any(((start <= int(where) <= end) for (start, end, region) in TW)) + + in_how_many_motifs = sum([mutation_in_RGYW, mutation_in_WRCY, mutation_in_WA, mutation_in_TW]) + + if in_how_many_motifs > 0: + RGYWCount[ID] += (1.0 * int(mutation_in_RGYW)) / in_how_many_motifs + WRCYCount[ID] += (1.0 * int(mutation_in_WRCY)) / in_how_many_motifs + WACount[ID] += (1.0 * int(mutation_in_WA)) / in_how_many_motifs + TWCount[ID] += (1.0 * int(mutation_in_TW)) / in_how_many_motifs + + mutations_in_motifs_file = os.path.join(os.path.dirname(os.path.abspath(infile)), "mutation_in_motifs.txt") + if not os.path.exists(mutation_by_id_file): + with open(mutations_in_motifs_file, 'w') as out_handle: + out_handle.write("{0}\n".format("\t".join([ + "Sequence.ID", + "mutation_position", + "region", + "from_nt", + "to_nt", + "mutation_position_AA", + "from_AA", + "to_AA", + "motif", + "motif_start_nt", + "motif_end_nt", + "rest" + ]))) + + with open(mutations_in_motifs_file, 'a') as out_handle: + motif_dic = {"RGYW": RGYW, "WRCY": WRCY, "WA": WA, "TW": TW} + for mutation in mutationList: + frm, where, to, AAfrm, AAwhere, AAto, junk = mutation + for motif in motif_dic.keys(): + + for start, end, region in motif_dic[motif]: + if start <= int(where) <= end: + out_handle.write("{0}\n".format( + "\t".join([ + ID, + where, + region, + frm, + to, + str(AAwhere), + str(AAfrm), + str(AAto), + motif, + str(start), + str(end), + str(junk) + ]) + )) + + + + def mean(lst): + return (float(sum(lst)) / len(lst)) if len(lst) > 0 else 0.0 + + + def median(lst): + lst = sorted(lst) + l = len(lst) + if l == 0: + return 0 + if l == 1: + return lst[0] + + l = int(l / 2) + + if len(lst) % 2 == 0: + return float(lst[l] + lst[(l - 1)]) / 2.0 + else: + return lst[l] + + funcs = {"mean": mean, "median": median, "sum": sum} + + directory = outfile[:outfile.rfind("/") + 1] + value = 0 + valuedic = dict() + + for fname in funcs.keys(): + for gene in genes: + with open(directory + gene + "_" + fname + "_value.txt", 'r') as v: + valuedic[gene + "_" + fname] = float(v.readlines()[0].rstrip()) + with open(directory + "all_" + fname + "_value.txt", 'r') as v: + valuedic["total_" + fname] = float(v.readlines()[0].rstrip()) + + + def get_xyz(lst, gene, f, fname): + x = round(round(f(lst), 1)) + y = valuedic[gene + "_" + fname] + z = str(round(x / float(y) * 100, 1)) if y != 0 else "0" + return (str(x), str(y), z) + + dic = {"RGYW": RGYWCount, "WRCY": WRCYCount, "WA": WACount, "TW": TWCount} + arr = ["RGYW", "WRCY", "WA", "TW"] + + for fname in funcs.keys(): + func = funcs[fname] + foutfile = outfile[:outfile.rindex("/")] + "/hotspot_analysis_" + fname + ".txt" + with open(foutfile, 'w') as o: + for typ in arr: + o.write(typ + " (%)") + curr = dic[typ] + for gene in genes: + geneMatcher = geneMatchers[gene] + if valuedic[gene + "_" + fname] is 0: + o.write(",0,0,0") + else: + x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.iteritems() if geneMatcher.match(z)]], gene, func, fname) + o.write("," + x + "," + y + "," + z) + x, y, z = get_xyz([y for x, y in curr.iteritems() if not genedic[x].startswith("unmatched")], "total", func, fname) + #x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname) + o.write("," + x + "," + y + "," + z + "\n") + + + # for testing + seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt" + with open(seq_motif_file, 'w') as o: + o.write("ID\tRGYW\tWRCY\tWA\tTW\n") + for ID in IDlist: + #o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n") + o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n") + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_csr.r Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,561 @@ +library(data.table) +library(ggplot2) +library(reshape2) + +args <- commandArgs(trailingOnly = TRUE) + +input = args[1] +genes = unlist(strsplit(args[2], ",")) +outputdir = args[3] +empty.region.filter = args[4] +setwd(outputdir) + +#dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) + +dat = data.frame(fread(input, sep="\t", header=T, stringsAsFactors=F)) #fread because read.table suddenly skips certain rows... + +if(length(dat$Sequence.ID) == 0){ + setwd(outputdir) + result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) + row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)") + write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) + transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) + row.names(transitionTable) = c("A", "C", "G", "T") + transitionTable["A","A"] = NA + transitionTable["C","C"] = NA + transitionTable["G","G"] = NA + transitionTable["T","T"] = NA + + write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) + cat("0", file="n.txt") + stop("No data") +} + +cleanup_columns = c("FR1.IMGT.c.a", + "FR2.IMGT.g.t", + "CDR1.IMGT.Nb.of.nucleotides", + "CDR2.IMGT.t.a", + "FR1.IMGT.c.g", + "CDR1.IMGT.c.t", + "FR2.IMGT.a.c", + "FR2.IMGT.Nb.of.mutations", + "FR2.IMGT.g.c", + "FR2.IMGT.a.g", + "FR3.IMGT.t.a", + "FR3.IMGT.t.c", + "FR2.IMGT.g.a", + "FR3.IMGT.c.g", + "FR1.IMGT.Nb.of.mutations", + "CDR1.IMGT.g.a", + "CDR1.IMGT.t.g", + "CDR1.IMGT.g.c", + "CDR2.IMGT.Nb.of.nucleotides", + "FR2.IMGT.a.t", + "CDR1.IMGT.Nb.of.mutations", + "CDR3.IMGT.Nb.of.nucleotides", + "CDR1.IMGT.a.g", + "FR3.IMGT.a.c", + "FR1.IMGT.g.a", + "FR3.IMGT.a.g", + "FR1.IMGT.a.t", + "CDR2.IMGT.a.g", + "CDR2.IMGT.Nb.of.mutations", + "CDR2.IMGT.g.t", + "CDR2.IMGT.a.c", + "CDR1.IMGT.t.c", + "FR3.IMGT.g.c", + "FR1.IMGT.g.t", + "FR3.IMGT.g.t", + "CDR1.IMGT.a.t", + "FR1.IMGT.a.g", + "FR3.IMGT.a.t", + "FR3.IMGT.Nb.of.nucleotides", + "FR2.IMGT.t.c", + "CDR2.IMGT.g.a", + "FR2.IMGT.t.a", + "CDR1.IMGT.t.a", + "FR2.IMGT.t.g", + "FR3.IMGT.t.g", + "FR2.IMGT.Nb.of.nucleotides", + "FR1.IMGT.t.a", + "FR1.IMGT.t.g", + "FR3.IMGT.c.t", + "FR1.IMGT.t.c", + "CDR2.IMGT.a.t", + "FR2.IMGT.c.t", + "CDR1.IMGT.g.t", + "CDR2.IMGT.t.g", + "FR1.IMGT.Nb.of.nucleotides", + "CDR1.IMGT.c.g", + "CDR2.IMGT.t.c", + "FR3.IMGT.g.a", + "CDR1.IMGT.a.c", + "FR2.IMGT.c.a", + "FR3.IMGT.Nb.of.mutations", + "FR2.IMGT.c.g", + "CDR2.IMGT.g.c", + "FR1.IMGT.g.c", + "CDR2.IMGT.c.t", + "FR3.IMGT.c.a", + "CDR1.IMGT.c.a", + "CDR2.IMGT.c.g", + "CDR2.IMGT.c.a", + "FR1.IMGT.c.t", + "FR1.IMGT.Nb.of.silent.mutations", + "FR2.IMGT.Nb.of.silent.mutations", + "FR3.IMGT.Nb.of.silent.mutations", + "FR1.IMGT.Nb.of.nonsilent.mutations", + "FR2.IMGT.Nb.of.nonsilent.mutations", + "FR3.IMGT.Nb.of.nonsilent.mutations") + +print("Cleaning up columns") + +for(col in cleanup_columns){ + dat[,col] = gsub("\\(.*\\)", "", dat[,col]) + #dat[dat[,col] == "",] = "0" + dat[,col] = as.numeric(dat[,col]) + dat[is.na(dat[,col]),col] = 0 +} + +regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3") +if(empty.region.filter == "FR1") { + regions = c("CDR1", "FR2", "CDR2", "FR3") +} else if (empty.region.filter == "CDR1") { + regions = c("FR2", "CDR2", "FR3") +} else if (empty.region.filter == "FR2") { + regions = c("CDR2", "FR3") +} + +pdfplots = list() #save() this later to create the pdf plots in another script (maybe avoids the "address (nil), cause memory not mapped") + +sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) } + +print("aggregating data into new columns") + +VRegionMutations_columns = paste(regions, ".IMGT.Nb.of.mutations", sep="") +dat$VRegionMutations = apply(dat, FUN=sum_by_row, 1, columns=VRegionMutations_columns) + +VRegionNucleotides_columns = paste(regions, ".IMGT.Nb.of.nucleotides", sep="") +dat$FR3.IMGT.Nb.of.nucleotides = nchar(dat$FR3.IMGT.seq) +dat$VRegionNucleotides = apply(dat, FUN=sum_by_row, 1, columns=VRegionNucleotides_columns) + +transitionMutations_columns = paste(rep(regions, each=4), c(".IMGT.a.g", ".IMGT.g.a", ".IMGT.c.t", ".IMGT.t.c"), sep="") +dat$transitionMutations = apply(dat, FUN=sum_by_row, 1, columns=transitionMutations_columns) + +transversionMutations_columns = paste(rep(regions, each=8), c(".IMGT.a.c",".IMGT.c.a",".IMGT.a.t",".IMGT.t.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t",".IMGT.t.g"), sep="") +dat$transversionMutations = apply(dat, FUN=sum_by_row, 1, columns=transversionMutations_columns) + +transitionMutationsAtGC_columns = paste(rep(regions, each=2), c(".IMGT.g.a",".IMGT.c.t"), sep="") +dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) + +totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.c.g",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.g.a",".IMGT.g.t"), sep="") +#totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.c.g",".IMGT.g.t"), sep="") +dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) + +transitionMutationsAtAT_columns = paste(rep(regions, each=2), c(".IMGT.a.g",".IMGT.t.c"), sep="") +dat$transitionMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtAT_columns) + +totalMutationsAtAT_columns = paste(rep(regions, each=6), c(".IMGT.a.g",".IMGT.a.c",".IMGT.a.t",".IMGT.t.g",".IMGT.t.c",".IMGT.t.a"), sep="") +#totalMutationsAtAT_columns = paste(rep(regions, each=5), c(".IMGT.a.g",".IMGT.t.c",".IMGT.a.c",".IMGT.g.c",".IMGT.t.g"), sep="") +dat$totalMutationsAtAT = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtAT_columns) + +FRRegions = regions[grepl("FR", regions)] +CDRRegions = regions[grepl("CDR", regions)] + +FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="") +dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns) + +CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="") +dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns) + +FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="") +dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns) + +CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="") +dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) + +mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR") +write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T) + +setwd(outputdir) + +write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) + +base.order.x = data.frame(base=c("A", "C", "G", "T"), order.x=1:4) +base.order.y = data.frame(base=c("T", "G", "C", "A"), order.y=1:4) + +calculate_result = function(i, gene, dat, matrx, f, fname, name){ + tmp = dat[grepl(paste("^", gene, ".*", sep=""), dat$best_match),] + + j = i - 1 + x = (j * 3) + 1 + y = (j * 3) + 2 + z = (j * 3) + 3 + + if(nrow(tmp) > 0){ + if(fname == "sum"){ + matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) + matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) + matrx[1,z] = round(f(matrx[1,x] / matrx[1,y]) * 100, digits=1) + } else { + matrx[1,x] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) + matrx[1,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) + matrx[1,z] = round(f(tmp$VRegionMutations / tmp$VRegionNucleotides) * 100, digits=1) + } + + matrx[2,x] = round(f(tmp$transitionMutations, na.rm=T), digits=1) + matrx[2,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) + matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1) + + matrx[3,x] = round(f(tmp$transversionMutations, na.rm=T), digits=1) + matrx[3,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) + matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1) + + matrx[4,x] = round(f(tmp$transitionMutationsAtGC, na.rm=T), digits=1) + matrx[4,y] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1) + matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) + + matrx[5,x] = round(f(tmp$totalMutationsAtGC, na.rm=T), digits=1) + matrx[5,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) + matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) + + matrx[6,x] = round(f(tmp$transitionMutationsAtAT, na.rm=T), digits=1) + matrx[6,y] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1) + matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) + + matrx[7,x] = round(f(tmp$totalMutationsAtAT, na.rm=T), digits=1) + matrx[7,y] = round(f(tmp$VRegionMutations, na.rm=T), digits=1) + matrx[7,z] = round(matrx[7,x] / matrx[7,y] * 100, digits=1) + + matrx[8,x] = round(f(tmp$nonSilentMutationsFR, na.rm=T), digits=1) + matrx[8,y] = round(f(tmp$silentMutationsFR, na.rm=T), digits=1) + matrx[8,z] = round(matrx[8,x] / matrx[8,y], digits=1) + + matrx[9,x] = round(f(tmp$nonSilentMutationsCDR, na.rm=T), digits=1) + matrx[9,y] = round(f(tmp$silentMutationsCDR, na.rm=T), digits=1) + matrx[9,z] = round(matrx[9,x] / matrx[9,y], digits=1) + + if(fname == "sum"){ + + regions.fr = regions[grepl("FR", regions)] + regions.fr = paste(regions.fr, ".IMGT.Nb.of.nucleotides", sep="") + regions.cdr = regions[grepl("CDR", regions)] + regions.cdr = paste(regions.cdr, ".IMGT.Nb.of.nucleotides", sep="") + + if(length(regions.fr) > 1){ #in case there is only on FR region (rowSums needs >1 column) + matrx[10,x] = round(f(rowSums(tmp[,regions.fr], na.rm=T)), digits=1) + } else { + matrx[10,x] = round(f(tmp[,regions.fr], na.rm=T), digits=1) + } + matrx[10,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) + matrx[10,z] = round(matrx[10,x] / matrx[10,y] * 100, digits=1) + + if(length(regions.cdr) > 1){ #in case there is only on CDR region + matrx[11,x] = round(f(rowSums(tmp[,regions.cdr], na.rm=T)), digits=1) + } else { + matrx[11,x] = round(f(tmp[,regions.cdr], na.rm=T), digits=1) + } + matrx[11,y] = round(f(tmp$VRegionNucleotides, na.rm=T), digits=1) + matrx[11,z] = round(matrx[11,x] / matrx[11,y] * 100, digits=1) + } + } + + transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) + row.names(transitionTable) = c("A", "C", "G", "T") + transitionTable["A","A"] = NA + transitionTable["C","C"] = NA + transitionTable["G","G"] = NA + transitionTable["T","T"] = NA + + if(nrow(tmp) > 0){ + for(nt1 in nts){ + for(nt2 in nts){ + if(nt1 == nt2){ + next + } + NT1 = LETTERS[letters == nt1] + NT2 = LETTERS[letters == nt2] + FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") + CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") + FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") + CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") + FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") + if (empty.region.filter == "leader"){ + transitionTable[NT1,NT2] = sum(tmp[,c(FR1, CDR1, FR2, CDR2, FR3)]) + } else if (empty.region.filter == "FR1") { + transitionTable[NT1,NT2] = sum(tmp[,c(CDR1, FR2, CDR2, FR3)]) + } else if (empty.region.filter == "CDR1") { + transitionTable[NT1,NT2] = sum(tmp[,c(FR2, CDR2, FR3)]) + } else if (empty.region.filter == "FR2") { + transitionTable[NT1,NT2] = sum(tmp[,c(CDR2, FR3)]) + } + } + } + transition = transitionTable + transition$id = names(transition) + + transition2 = melt(transition, id.vars="id") + + transition2 = merge(transition2, base.order.x, by.x="id", by.y="base") + + transition2 = merge(transition2, base.order.y, by.x="variable", by.y="base") + + transition2[is.na(transition2$value),]$value = 0 + + if(any(transition2$value != 0)){ #having a transition table filled with 0 is bad + print("Plotting heatmap and transition") + png(filename=paste("transitions_stacked_", name, ".png", sep="")) + p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar + p = p + xlab("From base") + ylab("") + ggtitle("Bargraph transition information") + guides(fill=guide_legend(title=NULL)) + p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) + #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) + print(p) + dev.off() + + pdfplots[[paste("transitions_stacked_", name, ".pdf", sep="")]] <<- p + + png(filename=paste("transitions_heatmap_", name, ".png", sep="")) + p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap + p = p + xlab("To base") + ylab("From Base") + ggtitle("Heatmap transition information") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + print(p) + dev.off() + + pdfplots[[paste("transitions_heatmap_", name, ".pdf", sep="")]] <<- p + } else { + #print("No data to plot") + } + } + + #print(paste("writing value file: ", name, "_", fname, "_value.txt" ,sep="")) + write.table(x=transitionTable, file=paste("transitions_", name ,"_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) + write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", name , "_", fname, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + cat(matrx[1,x], file=paste(name, "_", fname, "_value.txt" ,sep="")) + cat(nrow(tmp), file=paste(name, "_", fname, "_n.txt" ,sep="")) + #print(paste(fname, name, nrow(tmp))) + matrx +} +nts = c("a", "c", "g", "t") +zeros=rep(0, 4) +funcs = c(median, sum, mean) +fnames = c("median", "sum", "mean") + +print("Creating result tables") + +for(i in 1:length(funcs)){ + func = funcs[[i]] + fname = fnames[[i]] + + print(paste("Creating table for", fname)) + + rows = 9 + if(fname == "sum"){ + rows = 11 + } + matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) + for(i in 1:length(genes)){ + matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) + } + matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") + + result = data.frame(matrx) + if(fname == "sum"){ + row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR") + } else { + row.names(result) = c("Number of Mutations (%)", "Transitions (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)") + } + write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) +} + +print("Adding median number of mutations to sum table") +sum.table = read.table("mutations_sum.txt", sep=",", header=F) +median.table = read.table("mutations_median.txt", sep=",", header=F) + +new.table = sum.table[1,] +new.table[2,] = median.table[1,] +new.table[3:12,] = sum.table[2:11,] +new.table[,1] = as.character(new.table[,1]) +new.table[2,1] = "Median of Number of Mutations (%)" + +#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of G C (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] + +write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) + +print("Plotting IGA piechart") + +dat = dat[!grepl("^unmatched", dat$best_match),] + +#blegh + +genesForPlot = dat[grepl("IGA", dat$best_match),]$best_match + +if(length(genesForPlot) > 0){ + genesForPlot = data.frame(table(genesForPlot)) + colnames(genesForPlot) = c("Gene","Freq") + genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) + + pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) + pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4")) + pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) + pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) + pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclass distribution", "( n =", sum(genesForPlot$Freq), ")")) + write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) + + png(filename="IGA.png") + print(pc) + dev.off() + + pdfplots[["IGA.pdf"]] <- pc +} + +print("Plotting IGG piechart") + +genesForPlot = dat[grepl("IGG", dat$best_match),]$best_match + +if(length(genesForPlot) > 0){ + genesForPlot = data.frame(table(genesForPlot)) + colnames(genesForPlot) = c("Gene","Freq") + genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) + + pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) + pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred")) + pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) + pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) + pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclass distribution", "( n =", sum(genesForPlot$Freq), ")")) + write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) + + png(filename="IGG.png") + print(pc) + dev.off() + + pdfplots[["IGG.pdf"]] <- pc +} + +print("Plotting scatterplot") + +dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) +dat.clss = dat + +dat.clss$best_match = substr(dat.clss$best_match, 0, 3) + +dat.clss = rbind(dat, dat.clss) + +p = ggplot(dat.clss, aes(best_match, percentage_mutations)) +p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) +p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) +p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) +p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) + +png(filename="scatter.png") +print(p) +dev.off() + +pdfplots[["scatter.pdf"]] <- p + +write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) + +print("Plotting frequency ranges plot") + +dat$best_match_class = substr(dat$best_match, 0, 3) +freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") +dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels) + +frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match_class")]) + +frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match_class", "frequency_bins")]) + +frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match_class") + +frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) + +p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) +p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) +p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) + +png(filename="frequency_ranges.png") +print(p) +dev.off() + +pdfplots[["frequency_ranges.pdf"]] <- p + +save(pdfplots, file="pdfplots.RData") + +frequency_bins_data_by_class = frequency_bins_data + +frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),] + +frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins) +frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" +frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" + +write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) + +frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) + +frequency_bins_sum = data.frame(data.table(dat)[, list(class_sum=sum(.N)), by=c("best_match")]) + +frequency_bins_data = merge(frequency_bins_data, frequency_bins_sum, by="best_match") + +frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) + +frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),] +frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins) +frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" +frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" + +write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_csr.xml Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,240 @@ +<tool id="shm_csr" name="SHM & CSR pipeline" version="1.0"> + <description></description> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.16.0">numpy</requirement> + <requirement type="package" version="1.2.0">xlrd</requirement> + <requirement type="package" version="3.0.0">r-ggplot2</requirement> + <requirement type="package" version="1.4.3">r-reshape2</requirement> + <requirement type="package" version="0.5.0">r-scales</requirement> + <requirement type="package" version="3.4_5">r-seqinr</requirement> + <requirement type="package" version="1.11.4">r-data.table</requirement> + </requirements> + <command interpreter="bash"> + #if str ( $filter_unique.filter_unique_select ) == "remove": + wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast + #else: + wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast + #end if + </command> + <inputs> + <param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" /> + <param name="empty_region_filter" type="select" label="Sequence starts at" help="" > + <option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option> + <option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option> + <option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option> + <option value="FR2">FR2: include CDR2,FR3 in filters</option> + </param> + <param name="functionality" type="select" label="Functionality filter" help="" > + <option value="productive" selected="true">Productive (Productive and Productive see comment)</option> + <option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option> + <option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option> + </param> + <conditional name="filter_unique"> + <param name="filter_unique_select" type="select" label="Filter unique sequences" help="See below for an example."> + <option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option> + <option value="remove_vjaa">Remove uniques (Based on V+J+CDR3 (AA))</option> + <option value="keep">Keep uniques (Based on nucleotide sequence + C)</option> + <option value="no">No</option> + </param> + <when value="remove"> + <param name="filter_unique_clone_count" size="4" type="integer" label="How many sequences should be in a group to keep 1 of them" value="2" min="2"/> + </when> + <when value="keep"></when> + <when value="no"></when> + </conditional> + <param name="unique" type="select" label="Remove duplicates based on" help="" > + <option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option> + <option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option> + <option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option> + <option value="CDR3.IMGT.AA">CDR3 (AA)</option> + + <option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option> + <option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option> + <option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option> + <option value="CDR3.IMGT.seq">CDR3 (nt)</option> + <option value="Sequence.ID" selected="true">Don't remove duplicates</option> + </param> + <conditional name="class_filter_cond"> + <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" > + <option value="70_70" selected="true">>70% class and >70% subclass</option> + <option value="60_55">>60% class and >55% subclass</option> + <option value="70_0">>70% class</option> + <option value="60_0">>60% class</option> + <option value="19_0">>19% class</option> + <option value="101_101">Do not assign (sub)class</option> + </param> + <when value="70_70"></when> + <when value="60_55"></when> + <when value="70_0"></when> + <when value="60_0"></when> + <when value="19_0"></when> + <when value="101_101"></when> + </conditional> + <conditional name="naive_output_cond"> + <param name="naive_output" type="select" label="Output new IMGT archives per class into your history?"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"></when> + <when value="no"></when> + </conditional> + <param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" > + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + </inputs> + <outputs> + <data format="html" name="out_file" label = "SHM & CSR on ${in_file.name}"/> + <data format="imgt_archive" name="naive_output_ca" label = "Filtered IMGT IGA: ${in_file.name}" > + <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> + </data> + <data format="imgt_archive" name="naive_output_cg" label = "Filtered IMGT IGG: ${in_file.name}" > + <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> + </data> + <data format="imgt_archive" name="naive_output_cm" label = "Filtered IMGT IGM: ${in_file.name}" > + <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> + </data> + <data format="imgt_archive" name="naive_output_ce" label = "Filtered IMGT IGE: ${in_file.name}" > + <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] != "101_101"</filter> + </data> + <data format="imgt_archive" name="naive_output_all" label = "Filtered IMGT all: ${in_file.name}" > + <filter>naive_output_cond['naive_output'] == "yes"</filter> + <filter>class_filter_cond['class_filter'] == "101_101"</filter> + </data> + </outputs> + <tests> + <test> + <param name="fast" value="yes"/> + <output name="out_file" file="test1.html"/> + </test> + </tests> + <help> +<![CDATA[ +**References** + +Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_] + +.. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457 + +Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_] + +.. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359 + +----- + +**Input files** + +IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu. + +.. class:: infomark + +Note: Files can be uploaded by using “get data†and “upload file†and selecting “IMGT archive“ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names. + +----- + +**Sequence starts at** + +Identifies the region which will be included in the analysis (analysed region) + +- Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded. +- Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded. +- All other filtering/analysis is based on the analysed region. + +----- + +**Functionality filter** + +Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT. + +**Filter unique sequences** + +*Remove unique:* + + +This filter consists of two different steps. + +Step 1: removes all sequences of which the nucleotide sequence in the “analysed region†and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step. + +Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class). + +.. class:: infomark + +This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes. + +*Keep unique:* + +Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). + +Example of the sequences that are included using either the “remove unique filter†or the “keep unique filter†+ ++--------------------------+ +| unique filter | ++--------+--------+--------+ +| values | remove | keep | ++--------+--------+--------+ +| A | A | A | ++--------+--------+--------+ +| A | B | B | ++--------+--------+--------+ +| B | D | C | ++--------+--------+--------+ +| B | | D | ++--------+--------+--------+ +| C | | | ++--------+--------+--------+ +| D | | | ++--------+--------+--------+ +| D | | | ++--------+--------+--------+ + +----- + +**Remove duplicates based on** + +Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen. + +.. class:: infomark + +Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results. + +----- + +**Human Class/Subclass filter** + +.. class:: warningmark + +Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline. + +The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below). + +The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage. + +*Chunk hit percentage*: The percentage of the chunks that is aligned + +*Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass. +The option “>25% class†can be chosen when you only are interested in the class (Cα/Cγ/Cµ/Cɛ) of your sequences and the length of your sequence is not long enough to assign the subclasses. + +----- + +**Output new IMGT archives per class into your history?** + +If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline. + +----- + +**Execute** + +Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found. + +]]> + </help> + <citations> + <citation type="doi">10.1093/nar/gks457</citation> + <citation type="doi">10.1093/bioinformatics/btv359</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_downloads.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,538 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +a:link, span.MsoHyperlink + {color:blue; + text-decoration:underline;} +a:visited, span.MsoHyperlinkFollowed + {color:purple; + text-decoration:underline;} +p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing + {margin:0in; + margin-bottom:.0001pt; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +.MsoChpDefault + {font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US link=blue vlink=purple> + +<div class=WordSection1> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Info</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The complete +dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Allows downloading of the complete parsed data set.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The filtered +dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Allows downloading of all parsed IMGT information of all transcripts that +passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The alignment +info on the unmatched sequences:</span></u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'> Provides information of the subclass +alignment of all unmatched sequences. For each sequence the chunck hit +percentage and the nt hit percentage is shown together with the best matched +subclass.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Overview</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The SHM Overview +table as a dataset:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Allows downloading of the SHM Overview +table as a data set. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Motif data per +sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'> Provides a file that contains information for each +transcript on the number of mutations present in WA/TW and RGYW/WRCY motives.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Mutation data +per sequence ID: </span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'>Provides a file containing information +on the number of sequences bases, the number and location of mutations and the +type of mutations found in each transcript. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Base count for +every sequence:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'> links to a page showing for each transcript the +sequence of the analysed region (as dependent on the sequence starts at filter), +the assigned subclass and the number of sequenced A,C,G and T’s.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the percentage of mutations in AID and pol eta motives plot:</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Provides a file containing the values used to generate the percentage of +mutations in AID and pol eta motives plot in the SHM overview tab.</span></p> + +<p class=MsoNormalCxSpFirst style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The +data used to generate the relative mutation patterns plot:</span></u><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +Provides a download with the data used to generate the relative mutation +patterns plot in the SHM overview tab.</span></p> + +<p class=MsoNormalCxSpLast style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The +data used to generate the absolute mutation patterns plot:</span></u><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +Provides a download with the data used to generate the absolute mutation +patterns plot in the SHM overview tab. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Frequency</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data +generate the frequency scatter plot:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows +downloading the data used to generate the frequency scatter plot in the SHM +frequency tab. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the frequency by class plot:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows +downloading the data used to generate frequency by class plot included in the +SHM frequency tab. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for +frequency by subclass:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Provides information of the number and +percentage of sequences that have 0%, 0-2%, 2-5%, 5-10%, 10-15%, 15-20%, +>20% SHM. Information is provided for each subclass.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Transition +Tables</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'all' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGA' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGA sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGA1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGA1 sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGA2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGA2 sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGG' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGG sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGG1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGG1 sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGG2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGG2 sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGG3' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGG3 sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGG4' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGG4 sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGM' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the information used to +generate the transition table for all IGM sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +'IGE' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Contains the +information used to generate the transition table for all IGE sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Antigen +selection</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>AA mutation data +per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'> Provides for each transcript information on whether +there is replacement mutation at each amino acid location (as defined by IMGT). +For all amino acids outside of the analysed region the value 0 is given.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Presence of AA +per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'> Provides for each transcript information on which +amino acid location (as defined by IMGT) is present. </span><span lang=NL +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>0 is absent, 1 +is present. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the aa mutation frequency plot:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the +data used to generate the aa mutation frequency plot for all sequences in the +antigen selection tab.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the aa mutation frequency plot for IGA:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the +data used to generate the aa mutation frequency plot for all IGA sequences in +the antigen selection tab.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the aa mutation frequency plot for IGG:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the +data used to generate the aa mutation frequency plot for all IGG sequences in +the antigen selection tab.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the aa mutation frequency plot for IGM:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the +data used to generate the aa mutation frequency plot for all IGM sequences in +the antigen selection tab.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to +generate the aa mutation frequency plot for IGE:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the +data used to generate the aa mutation frequency plot for all IGE sequences in +the antigen selection tab.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline PDF (</span></u><span +lang=EN-GB><a href="http://selection.med.yale.edu/baseline/"><span +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>http://selection.med.yale.edu/baseline/</span></a></span><u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>):</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF +containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all +sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline data:</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Table output of the BASELINe analysis. Calculation of antigen selection as +performed by BASELINe are shown for each individual sequence and the sum of all +sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA +PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all +sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA +data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Table output of the BASELINe analysis. Calculation of antigen selection as +performed by BASELINe are shown for each individual IGA sequence and the sum of +all IGA sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG +PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGG +sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG +data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Table output of the BASELINe analysis. Calculation of antigen selection as +performed by BASELINe are shown for each individual IGG sequence and the sum of +all IGG sequences. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM PDF:</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF +containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGM +sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM +data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Table output of the BASELINe analysis. Calculation of antigen selection as +performed by BASELINe are shown for each individual IGM sequence and the sum of +all IGM sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE +PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: +"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGE +sequences.</span><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE +data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Table output of the BASELINe analysis. Calculation of antigen selection as +performed by BASELINe are shown for each individual IGE sequence and the sum of +all IGE sequences.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>CSR</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA +subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> </span><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Data used for +the generation of the </span><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'>IGA subclass distribution plot provided +in the CSR tab. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the +</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA +subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Data used for the generation of the </span><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGG +subclass distribution plot provided in the CSR tab. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=NL +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Clonal relation</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Sequence overlap +between subclasses:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Link to the overlap table as provided +under the clonality overlap tab. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +file with defined clones and subclass annotation:</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> +Downloads a table with the calculation of clonal relation between all +sequences. For each individual transcript the results of the clonal assignment +as provided by Change-O are provided. Sequences with the same number in the CLONE +column are considered clonally related. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +defined clones summary file:</span></u><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'> Gives a summary of the total number of +clones in all sequences and their clone size. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +file with defined clones of IGA:</span></u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the +calculation of clonal relation between all IGA sequences. For each individual +transcript the results of the clonal assignment as provided by Change-O are +provided. Sequences with the same number in the CLONE column are considered +clonally related. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +defined clones summary file of IGA:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary +of the total number of clones in all IGA sequences and their clone size.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +file with defined clones of IGG:</span></u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the +calculation of clonal relation between all IGG sequences. For each individual +transcript the results of the clonal assignment as provided by Change-O are +provided. Sequences with the same number in the CLONE column are considered +clonally related. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +defined clones summary file of IGG:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary +of the total number of clones in all IGG sequences and their clone size.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +file with defined clones of IGM:</span></u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'> Downloads a table +with the calculation of clonal relation between all IGM sequences. For each +individual transcript the results of the clonal assignment as provided by +Change-O are provided. Sequences with the same number in the CLONE column are +considered clonally related. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +defined clones summary file of IGM:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary +of the total number of clones in all IGM sequences and their clone size.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +file with defined clones of IGE:</span></u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the +calculation of clonal relation between all IGE sequences. For each individual +transcript the results of the clonal assignment as provided by Change-O are +provided. Sequences with the same number in the CLONE column are considered +clonally related. </span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB +defined clones summary file of IGE:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary +of the total number of clones in all IGE sequences and their clone size.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Filtered IMGT +output files</span></b></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGA sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all IGA +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGA1 sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all IGA1 +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGA2 sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz +file with the same format as downloaded IMGT files that contains all IGA2 +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGG sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz +file with the same format as downloaded IMGT files that contains all IGG +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGG1 sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all IGG1 +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGG2 sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all IGG2 +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGG3 sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz +file with the same format as downloaded IMGT files that contains all IGG3 +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGG4 sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all IGG4 +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGM sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz +file with the same format as downloaded IMGT files that contains all IGM +sequences that have passed the chosen filter settings.</span></p> + +<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive +with just the matched and filtered IGE sequences:</span></u><span lang=EN-GB +style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a +.txz file with the same format as downloaded IMGT files that contains all IGE +sequences that have passed the chosen filter settings.</span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_first.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,127 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +.MsoChpDefault + {font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US> + +<div class=WordSection1> + +<p class=MsoNormalCxSpFirst style='margin-bottom:0in;margin-bottom:.0001pt; +text-align:justify;line-height:normal'><span lang=EN-GB style='font-size:12.0pt; +font-family:"Times New Roman","serif"'>Table showing the order of each +filtering step and the number and percentage of sequences after each filtering +step. </span></p> + +<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt; +text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'>Input:</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The +number of sequences in the original IMGT file. This is always 100% of the +sequences.</span></p> + +<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt; +text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'>After "no results" filter: </span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IMGT +classifies sequences either as "productive", "unproductive", "unknown", or "no +results". Here, the number and percentages of sequences that are not classified +as "no results" are reported.</span></p> + +<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt; +text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size: +12.0pt;font-family:"Times New Roman","serif"'>After functionality filter:</span></u><span +lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The +number and percentages of sequences that have passed the functionality filter. The +filtering performed is dependent on the settings of the functionality filter. +Details on the functionality filter <a name="OLE_LINK12"></a><a +name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of +the SHM&CSR pipeline</a>.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After +removal sequences that are missing a gene region:</span></u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +In this step all sequences that are missing a gene region (FR1, CDR1, FR2, +CDR2, FR3) that should be present are removed from analysis. The sequence +regions that should be present are dependent on the settings of the sequence +starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and +percentage of sequences that pass this filter step are reported.</a> </span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After +N filter:</span></u><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> In this step all sequences that contain +an ambiguous base (n) in the analysed region or the CDR3 are removed from the +analysis. The analysed region is determined by the setting of the sequence +starts at filter. The number and percentage of sequences that pass this filter +step are reported.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After +filter unique sequences</span></u><span lang=EN-GB style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif"'>: The number and +percentage of sequences that pass the "filter unique sequences" filter. Details +on this filter </span><span lang=EN-GB style='font-size:12.0pt;line-height: +115%;font-family:"Times New Roman","serif"'>can be found on the start page of +the SHM&CSR pipeline</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After +remove duplicate based on filter:</span></u><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> The number and +percentage of sequences that passed the remove duplicate filter. Details on the +"remove duplicate filter based on filter" can be found on the start page of the +SHM&CSR pipeline.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK17"></a><a +name="OLE_LINK16"><u><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Number of matches sequences:</span></u></a><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +The number and percentage of sequences that passed all the filters described +above and have a (sub)class assigned.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number +of unmatched sequences</span></u><span lang=EN-GB style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif"'>: The number and percentage +of sequences that passed all the filters described above and do not have +subclass assigned.</span></p> + +<p class=MsoNormal><span lang=EN-GB> </span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_frequency.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,87 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +.MsoChpDefault + {font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US> + +<div class=WordSection1> + +<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM +frequency tab</span></u></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These +graphs give insight into the level of SHM. The data represented in these graphs +can be downloaded in the download tab. <a name="OLE_LINK24"></a><a +name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More +information on the values found in healthy individuals of different ages can be +found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency +scatter plot</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A +dot plot showing the percentage of SHM in each transcript divided into the +different (sub)classes. </span><span lang=NL style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot +represents an individual transcript.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation +frequency by class</span></u></p> + +<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A +bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%, +5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p> + +<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van +Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, +Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation +of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and +Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>Link</span></a>]</span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_overview.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,332 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +.MsoChpDefault + {font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US> + +<div class=WordSection1> + +<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Info +table</span></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>This +table contains information on different characteristics of SHM. For all +characteristics information can be found for all sequences or only sequences of +a certain (sub)class. All results are based on the sequences that passed the filter +settings chosen on the start page of the SHM & CSR pipeline and only +include details on the analysed region as determined by the setting of the +sequence starts at filter. All data in this table can be downloaded via the +“downloads” tab.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation +frequency:</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK83"></a><a +name="OLE_LINK82"></a><a name="OLE_LINK81"><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values +give information on the level of SHM. </span></a><a name="OLE_LINK22"></a><a +name="OLE_LINK21"></a><a name="OLE_LINK20"><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information +on the values found in healthy individuals of different ages can be found in </span></a><a +name="OLE_LINK15"></a><a name="OLE_LINK14"></a><a name="OLE_LINK13"><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IJspeert +and van Schouwenburg et al, PMID: 27799928</span></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number +of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height: +115%;font-family:"Times New Roman","serif"'> Shows the number of total +mutations / the number of sequenced bases (the % of mutated bases).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Median +number of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif"'> Shows the median % of +SHM of all sequences.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Patterns +of SHM:</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK72"></a><a +name="OLE_LINK71"></a><a name="OLE_LINK70"><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values +give insights into the targeting and patterns of SHM. These values can give +insight into the repair pathways used to repair the U:G mismatches introduced +by AID. </span></a><a name="OLE_LINK40"></a><a name="OLE_LINK39"></a><a +name="OLE_LINK38"></a><a name="OLE_LINK60"><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information +on the values found in healthy individuals of different ages can be found in +IJspeert and van Schouwenburg et al, PMID: 27799928</span></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions:</span></i><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +Shows the number of transition mutations / the number of total mutations (the +percentage of mutations that are transitions). Transition mutations are C>T, +T>C, A>G, G>A. </span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transversions:</span></i><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +Shows the number of transversion mutations / the number of total mutations (the +percentage of mutations that are transitions). Transversion mutations are +C>A, C>G, T>A, T>G, A>T, A>C, G>T, G>C.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions +at GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> <a name="OLE_LINK2"></a><a +name="OLE_LINK1">Shows the number of transitions at GC locations (C>T, +G>A) / the total number of mutations at GC locations (the percentage of +mutations at GC locations that are transitions).</a></span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting +of GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> <a name="OLE_LINK7"></a><a +name="OLE_LINK6"></a><a name="OLE_LINK3">Shows the number of mutations at GC +locations / the total number of mutations (the percentage of total mutations +that are at GC locations).</a> </span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions +at AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> Shows the number of transitions at AT +locations (T>C, A>G) / the total number of mutations at AT locations (the +percentage of mutations at AT locations that are transitions).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting +of AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> Shows the number of mutations at AT +locations / the total number of mutations (the percentage of total mutations +that are at AT locations).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>RGYW:</span></i><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +<a name="OLE_LINK28"></a><a name="OLE_LINK27"></a><a name="OLE_LINK26">Shows +the number of mutations that are in a RGYW motive / The number of total mutations +(the percentage of mutations that are in a RGYW motive). </a><a +name="OLE_LINK62"></a><a name="OLE_LINK61">RGYW motives are known to be +preferentially targeted by AID </a></span><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine, +Y=pyrimidine, W = A or T).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WRCY:</span></i><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +<a name="OLE_LINK34"></a><a name="OLE_LINK33">Shows the number of mutations +that are in a </a><a name="OLE_LINK32"></a><a name="OLE_LINK31"></a><a +name="OLE_LINK30"></a><a name="OLE_LINK29">WRCY</a> motive / The number of +total mutations (the percentage of mutations that are in a WRCY motive). WRCY +motives are known to be preferentially targeted by AID </span><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine, +Y=pyrimidine, W = A or T).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WA:</span></i><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +<a name="OLE_LINK37"></a><a name="OLE_LINK36"></a><a name="OLE_LINK35">Shows +the number of mutations that are in a WA motive / The number of total mutations +(the percentage of mutations that are in a WA motive). It is described that +polymerase eta preferentially makes errors at WA motives </a></span><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W += A or T).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>TW:</span></i><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +Shows the number of mutations that are in a TW motive / The number of total mutations +(the percentage of mutations that are in a TW motive). It is described that +polymerase eta preferentially makes errors at TW motives </span><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W += A or T).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen +selection:</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These +values give insight into antigen selection. It has been described that during +antigen selection, there is selection against replacement mutations in the FR +regions as these can cause instability of the B-cell receptor. In contrast +replacement mutations in the CDR regions are important for changing the +affinity of the B-cell receptor and therefore there is selection for this type +of mutations. Silent mutations do not alter the amino acid sequence and +therefore do not play a role in selection. More information on the values found +in healthy individuals of different ages can be found in IJspeert and van +Schouwenburg et al, PMID: 27799928</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>FR +R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> <a name="OLE_LINK43"></a><a +name="OLE_LINK42"></a><a name="OLE_LINK41">Shows the number of replacement +mutations in the FR regions / The number of silent mutations in the FR regions +(the number of replacement mutations in the FR regions divided by the number of +silent mutations in the FR regions)</a></span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>CDR +R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> Shows the number of replacement +mutations in the CDR regions / The number of silent mutations in the CDR +regions (the number of replacement mutations in the CDR regions divided by the +number of silent mutations in the CDR regions)</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number +of sequences nucleotides:</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These +values give information on the number of sequenced nucleotides.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt +in FR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> <a name="OLE_LINK46"></a><a +name="OLE_LINK45"></a><a name="OLE_LINK44">Shows the number of sequences bases +that are located in the FR regions / The total number of sequenced bases (the +percentage of sequenced bases that are present in the FR regions).</a></span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt +in CDR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'> Shows the number of sequenced bases +that are located in the CDR regions / <a name="OLE_LINK48"></a><a +name="OLE_LINK47">The total number of sequenced bases (the percentage of +sequenced bases that are present in the CDR regions).</a></span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A: +</span></i><a name="OLE_LINK51"></a><a name="OLE_LINK50"></a><a +name="OLE_LINK49"><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Shows the total number of sequenced +adenines / The total number of sequenced bases (the percentage of sequenced +bases that were adenines).</span></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>C: +</span></i><a name="OLE_LINK53"></a><a name="OLE_LINK52"><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows +the total number of sequenced cytosines / The total number of sequenced bases +(the percentage of sequenced bases that were cytosines).</span></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>T: +</span></i><a name="OLE_LINK57"></a><a name="OLE_LINK56"><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows +the total number of sequenced </span></a><a name="OLE_LINK55"></a><a +name="OLE_LINK54"><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>thymines</span></a><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> +/ The total number of sequenced bases (the percentage of sequenced bases that +were thymines).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>G: +</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Shows the total number of sequenced <a +name="OLE_LINK59"></a><a name="OLE_LINK58">guanine</a>s / The total number of +sequenced bases (the percentage of sequenced bases that were guanines).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK69"><b><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK75"></a><a +name="OLE_LINK74"></a><a name="OLE_LINK73"><span lang=EN-GB style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs visualize +information on the patterns and targeting of SHM and thereby give information +into the repair pathways used to repair the U:G mismatches introduced by AID. The +data represented in these graphs can be downloaded in the download tab. More +information on the values found in healthy individuals of different ages can be +found in IJspeert and van Schouwenburg et al, PMID: 27799928</span></a><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>. +<a name="OLE_LINK85"></a><a name="OLE_LINK84"></a></span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Percentage +of mutations in AID and pol eta motives</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes +<a name="OLE_LINK80"></a><a name="OLE_LINK79"></a><a name="OLE_LINK78">for each +(sub)class </a>the percentage of mutations that are present in AID (RGYW or +WRCY) or polymerase eta motives (WA or TW) in the different subclasses </span><span +lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine, +Y=pyrimidine, W = A or T).</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Relative +mutation patterns</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes +for each (sub)class the distribution of mutations between mutations at AT +locations and transitions or transversions at GC locations. </span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Absolute +mutation patterns</span></u></p> + +<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualized +for each (sub)class the percentage of sequenced AT and GC bases that are +mutated. The mutations at GC bases are divided into transition and transversion +mutations<a name="OLE_LINK77"></a><a name="OLE_LINK76">. </a></span></p> + +<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van +Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, +Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation +of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and +Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>Link</span></a>]</span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_selection.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,128 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} +@font-face + {font-family:UICTFontTextStyleBody;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +a:link, span.MsoHyperlink + {color:blue; + text-decoration:underline;} +a:visited, span.MsoHyperlinkFollowed + {color:purple; + text-decoration:underline;} +span.apple-converted-space + {mso-style-name:apple-converted-space;} +.MsoChpDefault + {font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US link=blue vlink=purple> + +<div class=WordSection1> + +<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>References</span></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; +color:black'>Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying +selection in high-throughput Immunoglobulin sequencing data sets. In<span +class=apple-converted-space> </span><em>Nucleic Acids Research, 40 (17), +pp. e134–e134.</em><span class=apple-converted-space><i> </i></span>[</span><span +lang=EN-GB><a href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; +color:#303030'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; +color:black'>][</span><span lang=EN-GB><a +href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; +color:#303030'>Link</span></a></span><span lang=EN-GB style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif";color:black'>]</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>AA +mutation frequency</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>For +each class, the frequency of replacement mutations at each amino acid position +is shown, which is calculated by dividing the number of replacement mutations +at a particular amino acid position/the number sequences that have an amino +acid at that particular position. Since the length of the CDR1 and CDR2 region +is not the same for every VH gene, some amino acids positions are absent. +Therefore we calculate the frequency using the number of amino acids present at +that that particular location. </span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen +selection (BASELINe)</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows +the results of the analysis of antigen selection as performed using BASELINe. +Details on the analysis performed by BASELINe can be found in Yaari et al, +PMID: 22641856. The settings used for the analysis are</span><span lang=EN-GB +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>: +focused, SHM targeting model: human Tri-nucleotide, custom bounderies. The +custom boundries are dependent on the ‘sequence starts at filter’. </span></p> + +<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL +style='font-family:UICTFontTextStyleBody;color:black'>Leader: +1:26:38:55:65:104:-</span></p> + +<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL +style='font-family:UICTFontTextStyleBody;color:black'>FR1: 27:27:38:55:65:104:-</span></p> + +<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL +style='font-family:UICTFontTextStyleBody;color:black'>CDR1: 27:27:38:55:65:104:-</span></p> + +<p class=MsoNormalCxSpLast style='line-height:normal'><span lang=NL +style='font-family:UICTFontTextStyleBody;color:black'>FR2: 27:27:38:55:65:104:-</span></p> + +<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van +Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, +Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation +of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and +Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>Link</span></a>]</span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/shm_transition.htm Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,120 @@ +<html> + +<head> +<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> +<meta name=Generator content="Microsoft Word 14 (filtered)"> +<style> +<!-- + /* Font Definitions */ + @font-face + {font-family:Calibri; + panose-1:2 15 5 2 2 2 4 3 2 4;} + /* Style Definitions */ + p.MsoNormal, li.MsoNormal, div.MsoNormal + {margin-top:0in; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:11.0pt; + font-family:"Calibri","sans-serif";} +a:link, span.MsoHyperlink + {color:blue; + text-decoration:underline;} +a:visited, span.MsoHyperlinkFollowed + {color:purple; + text-decoration:underline;} +p.msochpdefault, li.msochpdefault, div.msochpdefault + {mso-style-name:msochpdefault; + margin-right:0in; + margin-left:0in; + font-size:12.0pt; + font-family:"Calibri","sans-serif";} +p.msopapdefault, li.msopapdefault, div.msopapdefault + {mso-style-name:msopapdefault; + margin-right:0in; + margin-bottom:10.0pt; + margin-left:0in; + line-height:115%; + font-size:12.0pt; + font-family:"Times New Roman","serif";} +span.apple-converted-space + {mso-style-name:apple-converted-space;} +.MsoChpDefault + {font-size:10.0pt; + font-family:"Calibri","sans-serif";} +.MsoPapDefault + {margin-bottom:10.0pt; + line-height:115%;} +@page WordSection1 + {size:8.5in 11.0in; + margin:1.0in 1.0in 1.0in 1.0in;} +div.WordSection1 + {page:WordSection1;} +--> +</style> + +</head> + +<body lang=EN-US link=blue vlink=purple> + +<div class=WordSection1> + +<p class=MsoNormalCxSpFirst style='text-align:justify'><span style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs and +tables give insight into the targeting and patterns of SHM. This can give +insight into the DNA repair pathways used to solve the U:G mismatches +introduced by AID. More information on the values found in healthy individuals +of different ages can be found in IJspeert and van Schouwenburg et al, PMID: +27799928.</span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs +</span></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK93"></a><a +name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style='font-size:12.0pt; +line-height:115%;font-family:"Times New Roman","serif"'>Heatmap transition +information</span></u></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK98"></a><a +name="OLE_LINK97"><span style='font-size:12.0pt;line-height:115%;font-family: +"Times New Roman","serif"'>Heatmaps visualizing for each subclass the frequency +of all possible substitutions. On the x-axes the original base is shown, while +the y-axes shows the new base. The darker the shade of blue, the more frequent +this type of substitution is occurring. </span></a></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bargraph +transition information</span></u></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bar graph +visualizing for each original base the distribution of substitutions into the other +bases. A graph is included for each (sub)class. </span></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Tables</span></b></p> + +<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size: +12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transition +tables are shown for each (sub)class. All the original bases are listed +horizontally, while the new bases are listed vertically. </span></p> + +<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; +font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van +Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, +Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span +style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation +of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and +Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a +href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span +style='color:windowtext'>Link</span></a>]</span></p> + +</div> + +</body> + +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/summary_to_fasta.py Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,42 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") +parser.add_argument("--fasta", help="The output fasta file") + +args = parser.parse_args() + +infile = args.input +fasta = args.fasta + +with open(infile, 'r') as i, open(fasta, 'w') as o: + first = True + id_col = 0 + seq_col = 0 + no_results = 0 + no_seqs = 0 + passed = 0 + for line in i: + splt = line.split("\t") + if first: + id_col = splt.index("Sequence ID") + seq_col = splt.index("Sequence") + first = False + continue + if len(splt) < 5: + no_results += 1 + continue + + ID = splt[id_col] + seq = splt[seq_col] + + if not len(seq) > 0: + no_seqs += 1 + continue + + o.write(">" + ID + "\n" + seq + "\n") + passed += 1 + + print "No results:", no_results + print "No sequences:", no_seqs + print "Written to fasta file:", passed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shm_csr/wrapper.sh Fri Feb 19 15:08:51 2021 +0000 @@ -0,0 +1,913 @@ +#!/bin/bash +#set -e +dir="$(cd "$(dirname "$0")" && pwd)" +input=$1 +method=$2 +log=$3 #becomes the main html page at the end +outdir=$4 +output="$outdir/index.html" #copied to $log location at the end +title="$5" +include_fr1=$6 +functionality=$7 +unique=$8 +naive_output=$9 +naive_output_ca=${10} +naive_output_cg=${11} +naive_output_cm=${12} +naive_output_ce=${13} +naive_output_all=${14} +filter_unique=${15} +filter_unique_count=${16} +class_filter=${17} +empty_region_filter=${18} +fast=${19} + +mkdir $outdir + +tar -xzf $dir/style.tar.gz -C $outdir + +echo "---------------- read parameters ----------------" +echo "---------------- read parameters ----------------<br />" > $log + +echo "unpacking IMGT file" + +type="`file $input`" +if [[ "$type" == *"Zip archive"* ]] ; then + echo "Zip archive" + echo "unzip $input -d $PWD/files/" + unzip $input -d $PWD/files/ +elif [[ "$type" == *"XZ compressed data"* ]] ; then + echo "ZX archive" + echo "tar -xJf $input -C $PWD/files/" + mkdir -p "$PWD/files/$title" + tar -xJf $input -C "$PWD/files/$title" +else + echo "Unrecognized format $type" + echo "Unrecognized format $type" > $log + exit 1 +fi + +cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt +cat "`find $PWD/files/ -name "2_*"`" > $PWD/gapped_nt.txt +cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt +cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt +cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt +cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt +cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt +cat "`find $PWD/files/ -name "8_*"`" > $PWD/mutationstats.txt +cat "`find $PWD/files/ -name "9_*"`" > $PWD/aa_change_stats.txt +cat "`find $PWD/files/ -name "10_*"`" > $PWD/hotspots.txt + +echo "---------------- unique id check ----------------" + +Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/gapped_nt.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/aa_change_stats.txt $PWD/hotspots.txt + +if [[ ${#BLASTN_DIR} -ge 5 ]] ; then + echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" +else + BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" + echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" +fi + +echo "---------------- class identification ----------------" +echo "---------------- class identification ----------------<br />" >> $log + +python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt + +echo "---------------- merge_and_filter.r ----------------" +echo "---------------- merge_and_filter.r ----------------<br />" >> $log + +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1 + +if [[ "${naive_output}" == "yes" ]] || [[ "$fast" == "no" ]] ; then + + echo "---------------- creating new IMGT zips ----------------" + echo "---------------- creating new IMGT zips ----------------<br />" >> $log + + mkdir $outdir/new_IMGT + + cp $PWD/summary.txt "$outdir/new_IMGT/1_Summary.txt" + cp $PWD/gapped_nt.txt "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" + cp $PWD/sequences.txt "$outdir/new_IMGT/3_Nt-sequences.txt" + cp $PWD/gapped_aa.txt "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" + cp $PWD/aa.txt "$outdir/new_IMGT/5_AA-sequences.txt" + cp $PWD/junction.txt "$outdir/new_IMGT/6_Junction.txt" + cp $PWD/mutationanalysis.txt "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" + cp $PWD/mutationstats.txt "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" + cp $PWD/aa_change_stats.txt "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" + cp $PWD/hotspots.txt "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" + + mkdir $outdir/new_IMGT_IGA + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA + + mkdir $outdir/new_IMGT_IGA1 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 + + mkdir $outdir/new_IMGT_IGA2 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 + + mkdir $outdir/new_IMGT_IGG + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG + + mkdir $outdir/new_IMGT_IGG1 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 + + mkdir $outdir/new_IMGT_IGG2 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 + + mkdir $outdir/new_IMGT_IGG3 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 + + mkdir $outdir/new_IMGT_IGG4 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 + + mkdir $outdir/new_IMGT_IGM + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM + + mkdir $outdir/new_IMGT_IGE + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE + + Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1 + + + tmp="$PWD" + cd $outdir/new_IMGT/ #tar weirdness... + tar -cJf ../new_IMGT.txz * + + cd $outdir/new_IMGT_IGA/ + tar -cJf ../new_IMGT_IGA.txz * + + cd $outdir/new_IMGT_IGA1/ + tar -cJf ../new_IMGT_IGA1.txz * + + cd $outdir/new_IMGT_IGA2/ + tar -cJf ../new_IMGT_IGA2.txz * + + cd $outdir/new_IMGT_IGG/ + tar -cJf ../new_IMGT_IGG.txz * + + cd $outdir/new_IMGT_IGG1/ + tar -cJf ../new_IMGT_IGG1.txz * + + cd $outdir/new_IMGT_IGG2/ + tar -cJf ../new_IMGT_IGG2.txz * + + cd $outdir/new_IMGT_IGG3/ + tar -cJf ../new_IMGT_IGG3.txz * + + cd $outdir/new_IMGT_IGG4/ + tar -cJf ../new_IMGT_IGG4.txz * + + cd $outdir/new_IMGT_IGM/ + tar -cJf ../new_IMGT_IGM.txz * + + cd $outdir/new_IMGT_IGE/ + tar -cJf ../new_IMGT_IGE.txz * + + cd $tmp +fi + +echo "---------------- shm_csr.r ----------------" +echo "---------------- shm_csr.r ----------------<br />" >> $log + +classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched" +echo "R mutation analysis" +Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1 + +echo "---------------- plot_pdfs.r ----------------" +echo "---------------- plot_pdfs.r ----------------<br />" >> $log + +echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1" + +Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1 + +echo "---------------- shm_csr.py ----------------" +echo "---------------- shm_csr.py ----------------<br />" >> $log + +python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --empty_region_filter "${empty_region_filter}" --output $outdir/hotspot_analysis.txt + +echo "---------------- aa_histogram.r ----------------" +echo "---------------- aa_histogram.r ----------------<br />" >> $log + +Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1 +if [ -e "$outdir/aa_histogram_.png" ]; then + mv $outdir/aa_histogram_.png $outdir/aa_histogram.png + mv $outdir/aa_histogram_.pdf $outdir/aa_histogram.pdf + mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt + mv $outdir/aa_histogram_absent_.txt $outdir/aa_histogram_absent.txt + mv $outdir/aa_histogram_count_.txt $outdir/aa_histogram_count.txt + mv $outdir/aa_histogram_sum_.txt $outdir/aa_histogram_sum.txt +fi + +genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE) + +funcs=(sum mean median) +funcs=(sum) + +echo "---------------- sequence_overview.r ----------------" +echo "---------------- sequence_overview.r ----------------<br />" >> $log + +mkdir $outdir/sequence_overview + +Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt ${empty_region_filter} 2>&1 + +echo "<table border='1'>" > $outdir/base_overview.html + +while IFS=$'\t' read ID class seq A C G T +do + echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html +done < $outdir/sequence_overview/ntoverview.txt + +echo "<html><center><h1>$title</h1></center>" > $output +echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output +echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output +echo "<script type='text/javascript' src='tabber.js'></script>" >> $output +echo "<script type='text/javascript' src='script.js'></script>" >> $output +echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output +echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output + +matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" +unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" +total_count=$((matched_count + unmatched_count)) +perc_count=$((unmatched_count / total_count * 100)) +perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"` +perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"` + +echo "<center><h2>Total: ${total_count}</h2></center>" >> $output +echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output +echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output + +echo "---------------- main tables ----------------" +echo "---------------- main tables ----------------<br />" >> $log + +echo "<div class='tabber'>" >> $output +echo "<div class='tabbertab' title='SHM Overview' style='width: 3000px;'>" >> $output + +for func in ${funcs[@]} +do + + echo "---------------- $func table ----------------" + echo "---------------- $func table ----------------<br />" >> $log + + cat $outdir/mutations_${func}.txt $outdir/shm_overview_tandem_row.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt + + echo "---------------- pattern_plots.r ----------------" + echo "---------------- pattern_plots.r ----------------<br />" >> $log + + Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1 + + echo "<table class='pure-table pure-table-striped'>" >> $output + echo "<thead><tr><th>info</th>" >> $output + + if [ "${class_filter}" != "101_101" ] ; then + + for gene in ${genes[@]} + do + tmp=`cat $outdir/${gene}_${func}_n.txt` + echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output + done + + tmp=`cat $outdir/all_${func}_n.txt` + echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output + tmp=`cat $outdir/unmatched_${func}_n.txt` + echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output + + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz + do + if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] || [ "$name" == "Tandems/Expected (ratio)" ] ; then #meh + echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${cex}/${cey} (${cez})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output + elif [ "$name" == "Median of Number of Mutations (%)" ] ; then + echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${cez}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output + else + echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${cex}/${cey} (${cez}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output + fi + done < $outdir/data_${func}.txt + + else + tmp=`cat $outdir/all_${func}_n.txt` + echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output + + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz + do + if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh + echo "<tr><td>$name</td><td>${allx}/${ally}</td></tr>" >> $output + elif [ "$name" == "Median of Number of Mutations (%)" ] ; then + echo "<tr><td>$name</td><td>${allz}%</td></tr>" >> $output + else + echo "<tr><td>$name</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output + fi + done < $outdir/data_${func}.txt + + fi + echo "</table>" >> $output + #echo "<a href='data_${func}.txt'>Download data</a>" >> $output +done + +echo "<a href='aid_motives.pdf'><img src='aid_motives.png' /></a><br />" >> $output +echo "<a href='relative_mutations.pdf'><img src='relative_mutations.png' /></a><br />" >> $output +echo "<a href='absolute_mutations.pdf'><img src='absolute_mutations.png' /></a><br />" >> $output +echo "<br />" >> $output +cat $dir/shm_overview.htm >> $output +echo "</div>" >> $output #SHM overview tab end + +echo "---------------- images ----------------" +echo "---------------- images ----------------<br />" >> $log + +echo "<div class='tabbertab' title='SHM Frequency' style='width: 3000px;'></a>" >> $output + +if [ -a $outdir/scatter.png ] +then + echo "<a href='scatter.pdf'><img src='scatter.png'/><br />" >> $output +fi +if [ -a $outdir/frequency_ranges.png ] +then + echo "<a href='frequency_ranges.pdf'><img src='frequency_ranges.png'/></a><br />" >> $output +fi + +echo "<br />" >> $output +cat $dir/shm_frequency.htm >> $output + +echo "</div>" >> $output #SHM frequency tab end + +echo "<div class='tabbertab' title='Transition tables' style='width: 3000px;'>" >> $output + +echo "<table border='0'>" >> $output + +for gene in ${genes[@]} +do + echo "<tr>" >> $output + echo "<td><h1>${gene}</h1></td>" >> $output + + if [ -e $outdir/transitions_heatmap_${gene}.png ] + then + echo "<td><a href='transitions_heatmap_${gene}.pdf'><img src='transitions_heatmap_${gene}.png' /></a></td>" >> $output + else + echo "<td></td>" >> $output + fi + + if [ -e $outdir/transitions_stacked_${gene}.png ] + then + echo "<td><a href='transitions_stacked_${gene}.pdf'><img src='transitions_stacked_${gene}.png' /></a></td>" >> $output + else + echo "<td></td>" >> $output + fi + + echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output + echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output + first="true" + while IFS=, read from a c g t + do + if [ "$first" == "true" ] ; then + echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output + first="false" + else + echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output + fi + done < $outdir/transitions_${gene}_sum.txt + echo "</table></td>" >> $output + + echo "</tr>" >> $output +done + +echo "<tr>" >> $output +echo "<td><h1>All</h1></td>" >> $output +echo "<td><a href='transitions_heatmap_all.pdf'><img src='transitions_heatmap_all.png' /></a></td>" >> $output +echo "<td><a href='transitions_stacked_all.pdf'><img src='transitions_stacked_all.png' /></a></td>" >> $output +echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output +echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output +first="true" +while IFS=, read from a c g t + do + if [ "$first" == "true" ] ; then + echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output + first="false" + else + echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output + fi +done < $outdir/transitions_all_sum.txt +echo "</table></td>" >> $output + +echo "</tr>" >> $output + +echo "</table>" >> $output + +echo "<br />" >> $output +cat $dir/shm_transition.htm >> $output + +echo "</div>" >> $output #transition tables tab end + +echo "<div class='tabbertab' title='Antigen Selection'>" >> $output + +if [ -e $outdir/aa_histogram.png ] +then + echo "<a href='aa_histogram.pdf'><img src='aa_histogram.png'/></a><br />" >> $output +fi + +if [ -e $outdir/aa_histogram_IGA.png ] +then + echo "<a href='aa_histogram_IGA.pdf'><img src='aa_histogram_IGA.png'/></a><br />" >> $output +fi + +if [ -e $outdir/aa_histogram_IGG.png ] +then + echo "<a href='aa_histogram_IGG.pdf'><img src='aa_histogram_IGG.png'/></a><br />" >> $output +fi + +if [ -e $outdir/aa_histogram_IGM.png ] +then + echo "<a href='aa_histogram_IGM.pdf'><img src='aa_histogram_IGM.png'/></a><br />" >> $output +fi + +if [ -e $outdir/aa_histogram_IGE.png ] +then + echo "<a href='aa_histogram_IGE.pdf'><img src='aa_histogram_IGE.png'/></a><br />" >> $output +fi + + + +if [[ "$fast" == "no" ]] ; then + + + + echo "---------------- baseline ----------------" + echo "---------------- baseline ----------------<br />" >> $log + tmp="$PWD" + + mkdir $outdir/baseline + + echo "<center><h1>BASELINe</h1>" >> $output + header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)" + + baseline_boundaries="27:27:38:55:65:104:-" + + if [[ "${empty_region_filter}" == "leader" ]] ; then + baseline_boundaries="1:26:38:55:65:104:-" + header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)" + fi + + echo "<p>${header_substring}</p></center>" >> $output + + mkdir $outdir/baseline/IGA_IGG_IGM + if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGA_IGG_IGM + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" + else + echo "No sequences" > "$outdir/baseline.txt" + fi + + mkdir $outdir/baseline/IGA + if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGA + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" + else + echo "No IGA sequences" > "$outdir/baseline_IGA.txt" + fi + + mkdir $outdir/baseline/IGG + if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGG + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" + else + echo "No IGG sequences" > "$outdir/baseline_IGG.txt" + fi + + mkdir $outdir/baseline/IGM + if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGM + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" + else + echo "No IGM sequences" > "$outdir/baseline_IGM.txt" + fi + + mkdir $outdir/baseline/IGE + if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGE + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt" + else + echo "No IGE sequences" > "$outdir/baseline_IGE.txt" + fi + + cd $tmp + + echo "Cleaning up *.RData files" + find $outdir/baseline -name "*.RData" -type f -delete + + if [ -e $outdir/baseline.pdf ] + then + echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output + fi + + if [ -e $outdir/baseline_IGA.pdf ] + then + echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output + fi + + if [ -e $outdir/baseline_IGG.pdf ] + then + echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output + fi + + if [ -e $outdir/baseline_IGM.pdf ] + then + echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output + fi + + if [ -e $outdir/baseline_IGE.pdf ] + then + echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output + fi +fi + +echo "<br />" >> $output +cat $dir/shm_selection.htm >> $output + +echo "</div>" >> $output #antigen selection tab end + +echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab + +if [ -e $outdir/IGA.png ] +then + echo "<a href='IGA.pdf'><img src='IGA.png'/></a><br />" >> $output +fi +if [ -e $outdir/IGG.png ] +then + echo "<a href='IGG.pdf'><img src='IGG.png'/></a><br />" >> $output +fi + +echo "<br />" >> $output +cat $dir/shm_csr.htm >> $output + +echo "</div>" >> $output #CSR tab end + +if [[ "$fast" == "no" ]] ; then + + echo "---------------- change-o MakeDB ----------------" + + mkdir $outdir/change_o + + tmp="$PWD" + + cd $outdir/change_o + + bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt + Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1 + + mkdir $outdir/new_IMGT_changeo + cp $outdir/new_IMGT/* $outdir/new_IMGT_changeo + + Rscript $dir/new_imgt.r $outdir/new_IMGT_changeo $outdir/change_o/change-o-db-defined_first_clones.txt "-" 2>&1 + + cd $outdir/new_IMGT_changeo + tar -cJf ../new_IMGT_first_seq_of_clone.txz * + cd $outdir/change_o + + rm -rf $outdir/new_IMGT_changeo + + Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 + echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" + + if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt + Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1 + + mkdir $outdir/new_IMGT_IGA_changeo + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA_changeo + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA_changeo $outdir/change_o/change-o-db-defined_first_clones-IGA.txt "-" 2>&1 + + cd $outdir/new_IMGT_IGA_changeo + tar -cJf ../new_IMGT_IGA_first_seq_of_clone.txz * + + rm -rf $outdir/new_IMGT_IGA_changeo + + cd $outdir/change_o + else + echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" + echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" + fi + + if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt + Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1 + + mkdir $outdir/new_IMGT_IGG_changeo + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG_changeo + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG_changeo $outdir/change_o/change-o-db-defined_first_clones-IGG.txt "-" 2>&1 + + cd $outdir/new_IMGT_IGG_changeo + tar -cJf ../new_IMGT_IGG_first_seq_of_clone.txz * + rm -rf $outdir/new_IMGT_IGG_changeo + + cd $outdir/change_o + else + echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" + echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" + fi + + if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt + Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-db-defined_first_clones-IGM.txt 2>&1 + + mkdir $outdir/new_IMGT_IGM_changeo + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM_changeo + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM_changeo $outdir/change_o/change-o-db-defined_first_clones-IGM.txt "-" 2>&1 + + cd $outdir/new_IMGT_IGM_changeo + tar -cJf ../new_IMGT_IGM_first_seq_of_clone.txz * + + rm -rf $outdir/new_IMGT_IGM_changeo + + cd $outdir/change_o + else + echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" + echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" + fi + + if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGE.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-defined_clones-summary-IGE.txt + Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-db-defined_first_clones-IGE.txt 2>&1 + + mkdir $outdir/new_IMGT_IGE_changeo + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE_changeo + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE_changeo $outdir/change_o/change-o-db-defined_first_clones-IGE.txt "-" 2>&1 + + cd $outdir/new_IMGT_IGE_changeo + tar -cJf ../new_IMGT_IGE_first_seq_of_clone.txz * + + rm -rf $outdir/new_IMGT_IGE_changeo + + cd $outdir/change_o + else + echo "No IGE sequences" > "$outdir/change_o/change-o-db-defined_clones-IGE.txt" + echo "No IGE sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGE.txt" + fi + + cd "$tmp" + + rm -rf $outdir/new_IMGT + rm -rf $outdir/new_IMGT_IGA/ + rm -rf $outdir/new_IMGT_IGA1/ + rm -rf $outdir/new_IMGT_IGA2/ + rm -rf $outdir/new_IMGT_IGG/ + rm -rf $outdir/new_IMGT_IGG1/ + rm -rf $outdir/new_IMGT_IGG2/ + rm -rf $outdir/new_IMGT_IGG3/ + rm -rf $outdir/new_IMGT_IGG4/ + rm -rf $outdir/new_IMGT_IGM/ + rm -rf $outdir/new_IMGT_IGE/ + + echo "<div class='tabbertab' title='Clonal Relation' style='width: 7000px;'>" >> $output #clonality tab + + function clonality_table { + local infile=$1 + local outfile=$2 + + echo "<table class='pure-table pure-table-striped'>" >> $outfile + echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile + + first='true' + + while read size clones seqs + do + if [[ "$first" == "true" ]]; then + first="false" + continue + fi + echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile + done < $infile + + echo "</table>" >> $outfile + } + echo "<div class='tabber'>" >> $output + + echo "<div class='tabbertab' title='All'>" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output + echo "</div>" >> $output + + echo "<div class='tabbertab' title='IGA'>" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output + echo "</div>" >> $output + + echo "<div class='tabbertab' title='IGG'>" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output + echo "</div>" >> $output + + echo "<div class='tabbertab' title='IGM'>" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output + echo "</div>" >> $output + + echo "<div class='tabbertab' title='IGE'>" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output + echo "</div>" >> $output + + echo "<div class='tabbertab' title='Overlap' style='width: 7000px;'>" >> $output + cat "$outdir/sequence_overview/index.html" | sed -e 's:</td>:</td>\n:g' | sed "s:href='\(.*\).html:href='sequence_overview/\1.html:g" >> $output # rewrite href to 'sequence_overview/..." + echo "</div>" >> $output + + echo "</div>" >> $output #clonality tabber end + + echo "<br />" >> $output + cat $dir/shm_clonality.htm >> $output + + echo "</div>" >> $output #clonality tab end + +fi + +echo "<div class='tabbertab' title='Downloads'>" >> $output + +echo "<table class='pure-table pure-table-striped'>" >> $output +echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output +echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Overview</td></tr>" >> $output +echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='shm_overview.txt' download='shm_overview.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the percentage of mutations in AID and pol eta motives plot</td><td><a href='aid_motives.txt' download='aid_motives.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the relative mutation patterns plot</td><td><a href='relative_mutations.txt' download='relative_mutations.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the absolute mutation patterns plot</td><td><a href='absolute_mutations.txt' download='absolute_mutations.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Data about tandem mutations by ID</td><td><a href='tandems_by_id.txt' download='tandems_by_id.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Frequency</td></tr>" >> $output +echo "<tr><td>The data generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Transition Tables</td></tr>" >> $output +echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_IGA_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transitions_IGG3_sum.txt' download='transitions_IGG3_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transitions_IGG4_sum.txt' download='transitions_IGG4_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transitions_IGM_sum.txt' download='transitions_IGM_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGE' transition plot</td><td><a href='transitions_IGE_sum.txt' download='transitions_IGE_sum.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output +echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Presence of AA per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td>The data used to generate the aa mutation frequency plot</td><td><a href='aa_histogram_sum.txt' download='aa_histogram_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the aa mutation frequency plot for IGA</td><td><a href='aa_histogram_sum_IGA.txt' download='aa_histogram_sum_IGA.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the aa mutation frequency plot for IGG</td><td><a href='aa_histogram_sum_IGG.txt' download='aa_histogram_sum_IGG.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the aa mutation frequency plot for IGM</td><td><a href='aa_histogram_sum_IGM.txt' download='aa_histogram_sum_IGM.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data used to generate the aa mutation frequency plot for IGE</td><td><a href='aa_histogram_sum_IGE.txt' download='aa_histogram_sum_IGE.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGE PDF</td><td><a href='baseline_IGE.pdf' download='baseline_IGE.pdf' >Download</a></td></tr>" >> $output +echo "<tr><td>Baseline IGE data</td><td><a href='baseline_IGE.txt' download='baseline_IGE.txt' >Download</a></td></tr>" >> $output + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>CSR</td></tr>" >> $output +echo "<tr><td>The data for the IGA subclass distribution plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the IGG subclass distribution plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output + + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonal Relation</td></tr>" >> $output +echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href='new_IMGT_first_seq_of_clone.txz' download='new_IMGT_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output + +echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href='new_IMGT_IGA_first_seq_of_clone.txz' download='new_IMGT_IGA_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output + +echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href='new_IMGT_IGG_first_seq_of_clone.txz' download='new_IMGT_IGG_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output + +echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href='new_IMGT_IGM_first_seq_of_clone.txz' download='new_IMGT_IGM_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output + +echo "<tr><td>The Change-O DB file with defined clones of IGE</td><td><a href='change_o/change-o-db-defined_clones-IGE.txt' download='change_o/change-o-db-defined_clones-IGE.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The Change-O DB defined clones summary file of IGE</td><td><a href='change_o/change-o-defined_clones-summary-IGE.txt' download='change_o/change-o-defined_clones-summary-IGE.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href='new_IMGT_IGE_first_seq_of_clone.txz' download='new_IMGT_IGE_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output + +echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Filtered IMGT output files</td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href='new_IMGT_IGE.txz' download='new_IMGT_IGE.txz' >Download</a></td></tr>" >> $output + +echo "</table>" >> $output + +echo "<br />" >> $output +cat $dir/shm_downloads.htm >> $output + +echo "</div>" >> $output #downloads tab end + +echo "</div>" >> $output #tabs end + +echo "</html>" >> $output + + +echo "---------------- naive_output.r ----------------" +echo "---------------- naive_output.r ----------------<br />" >> $log + +if [[ "$naive_output" == "yes" ]] +then + echo "output naive output" + if [[ "${class_filter}" == "101_101" ]] + then + echo "copy new_IMGT.txz to ${naive_output_all}" + cp $outdir/new_IMGT.txz ${naive_output_all} + else + echo "copy for classes" + cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} + cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} + cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} + cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} + fi +fi + +echo "</table>" >> $outdir/base_overview.html + +mv $log $outdir/log.html + +echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log +echo "<table border = 1>" >> $log +echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log +tIFS="$TMP" +IFS=$'\t' +while read step seq perc + do + echo "<tr>" >> $log + echo "<td>$step</td>" >> $log + echo "<td>$seq</td>" >> $log + echo "<td>${perc}%</td>" >> $log + echo "</tr>" >> $log +done < $outdir/filtering_steps.txt +echo "</table>" >> $log +echo "<br />" >> $log +cat $dir/shm_first.htm >> $log +echo "</center></html>" >> $log + +IFS="$tIFS" + + +echo "---------------- Done! ----------------" +echo "---------------- Done! ----------------<br />" >> $outdir/log.html + + + + + + + + + + + + + + + + + + + + +
--- a/shm_downloads.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,538 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -a:link, span.MsoHyperlink - {color:blue; - text-decoration:underline;} -a:visited, span.MsoHyperlinkFollowed - {color:purple; - text-decoration:underline;} -p.MsoNoSpacing, li.MsoNoSpacing, div.MsoNoSpacing - {margin:0in; - margin-bottom:.0001pt; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -.MsoChpDefault - {font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US link=blue vlink=purple> - -<div class=WordSection1> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Info</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The complete -dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Allows downloading of the complete parsed data set.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The filtered -dataset:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Allows downloading of all parsed IMGT information of all transcripts that -passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The alignment -info on the unmatched sequences:</span></u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'> Provides information of the subclass -alignment of all unmatched sequences. For each sequence the chunck hit -percentage and the nt hit percentage is shown together with the best matched -subclass.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Overview</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The SHM Overview -table as a dataset:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Allows downloading of the SHM Overview -table as a data set. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Motif data per -sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'> Provides a file that contains information for each -transcript on the number of mutations present in WA/TW and RGYW/WRCY motives.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Mutation data -per sequence ID: </span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'>Provides a file containing information -on the number of sequences bases, the number and location of mutations and the -type of mutations found in each transcript. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Base count for -every sequence:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'> links to a page showing for each transcript the -sequence of the analysed region (as dependent on the sequence starts at filter), -the assigned subclass and the number of sequenced A,C,G and T’s.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the percentage of mutations in AID and pol eta motives plot:</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Provides a file containing the values used to generate the percentage of -mutations in AID and pol eta motives plot in the SHM overview tab.</span></p> - -<p class=MsoNormalCxSpFirst style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The -data used to generate the relative mutation patterns plot:</span></u><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -Provides a download with the data used to generate the relative mutation -patterns plot in the SHM overview tab.</span></p> - -<p class=MsoNormalCxSpLast style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>The -data used to generate the absolute mutation patterns plot:</span></u><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -Provides a download with the data used to generate the absolute mutation -patterns plot in the SHM overview tab. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>SHM Frequency</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data -generate the frequency scatter plot:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows -downloading the data used to generate the frequency scatter plot in the SHM -frequency tab. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the frequency by class plot:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Allows -downloading the data used to generate frequency by class plot included in the -SHM frequency tab. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for -frequency by subclass:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Provides information of the number and -percentage of sequences that have 0%, 0-2%, 2-5%, 5-10%, 10-15%, 15-20%, ->20% SHM. Information is provided for each subclass.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Transition -Tables</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'all' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGA' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGA sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGA1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGA1 sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGA2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGA2 sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGG' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGG sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGG1' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGG1 sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGG2' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGG2 sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGG3' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGG3 sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGG4' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGG4 sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGM' transition plot :</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the information used to -generate the transition table for all IGM sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -'IGE' transition plot:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Contains the -information used to generate the transition table for all IGE sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Antigen -selection</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>AA mutation data -per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'> Provides for each transcript information on whether -there is replacement mutation at each amino acid location (as defined by IMGT). -For all amino acids outside of the analysed region the value 0 is given.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Presence of AA -per sequence ID:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'> Provides for each transcript information on which -amino acid location (as defined by IMGT) is present. </span><span lang=NL -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>0 is absent, 1 -is present. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the aa mutation frequency plot:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the -data used to generate the aa mutation frequency plot for all sequences in the -antigen selection tab.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the aa mutation frequency plot for IGA:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the -data used to generate the aa mutation frequency plot for all IGA sequences in -the antigen selection tab.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the aa mutation frequency plot for IGG:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the -data used to generate the aa mutation frequency plot for all IGG sequences in -the antigen selection tab.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the aa mutation frequency plot for IGM:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the -data used to generate the aa mutation frequency plot for all IGM sequences in -the antigen selection tab.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data used to -generate the aa mutation frequency plot for IGE:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Provides the -data used to generate the aa mutation frequency plot for all IGE sequences in -the antigen selection tab.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline PDF (</span></u><span -lang=EN-GB><a href="http://selection.med.yale.edu/baseline/"><span -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>http://selection.med.yale.edu/baseline/</span></a></span><u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>):</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF -containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all -sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline data:</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Table output of the BASELINe analysis. Calculation of antigen selection as -performed by BASELINe are shown for each individual sequence and the sum of all -sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA -PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all -sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGA -data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Table output of the BASELINe analysis. Calculation of antigen selection as -performed by BASELINe are shown for each individual IGA sequence and the sum of -all IGA sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG -PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGG -sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGG -data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Table output of the BASELINe analysis. Calculation of antigen selection as -performed by BASELINe are shown for each individual IGG sequence and the sum of -all IGG sequences. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM PDF:</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> PDF -containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGM -sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGM -data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Table output of the BASELINe analysis. Calculation of antigen selection as -performed by BASELINe are shown for each individual IGM sequence and the sum of -all IGM sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE -PDF:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -PDF containing the </span><span lang=EN-GB style='font-size:12.0pt;font-family: -"Times New Roman","serif"'>Antigen selection (BASELINe) graph for all IGE -sequences.</span><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Baseline IGE -data:</span></u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Table output of the BASELINe analysis. Calculation of antigen selection as -performed by BASELINe are shown for each individual IGE sequence and the sum of -all IGE sequences.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>CSR</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA -subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> </span><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Data used for -the generation of the </span><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'>IGA subclass distribution plot provided -in the CSR tab. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The data for the -</span></u><u><span lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGA -subclass distribution plot :</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Data used for the generation of the </span><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IGG -subclass distribution plot provided in the CSR tab. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=NL -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Clonal relation</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Sequence overlap -between subclasses:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Link to the overlap table as provided -under the clonality overlap tab. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -file with defined clones and subclass annotation:</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> -Downloads a table with the calculation of clonal relation between all -sequences. For each individual transcript the results of the clonal assignment -as provided by Change-O are provided. Sequences with the same number in the CLONE -column are considered clonally related. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -defined clones summary file:</span></u><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'> Gives a summary of the total number of -clones in all sequences and their clone size. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -file with defined clones of IGA:</span></u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the -calculation of clonal relation between all IGA sequences. For each individual -transcript the results of the clonal assignment as provided by Change-O are -provided. Sequences with the same number in the CLONE column are considered -clonally related. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -defined clones summary file of IGA:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary -of the total number of clones in all IGA sequences and their clone size.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -file with defined clones of IGG:</span></u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the -calculation of clonal relation between all IGG sequences. For each individual -transcript the results of the clonal assignment as provided by Change-O are -provided. Sequences with the same number in the CLONE column are considered -clonally related. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -defined clones summary file of IGG:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary -of the total number of clones in all IGG sequences and their clone size.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -file with defined clones of IGM:</span></u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'> Downloads a table -with the calculation of clonal relation between all IGM sequences. For each -individual transcript the results of the clonal assignment as provided by -Change-O are provided. Sequences with the same number in the CLONE column are -considered clonally related. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -defined clones summary file of IGM:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary -of the total number of clones in all IGM sequences and their clone size.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -file with defined clones of IGE:</span></u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'> Downloads a table with the -calculation of clonal relation between all IGE sequences. For each individual -transcript the results of the clonal assignment as provided by Change-O are -provided. Sequences with the same number in the CLONE column are considered -clonally related. </span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>The Change-O DB -defined clones summary file of IGE:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Gives a summary -of the total number of clones in all IGE sequences and their clone size.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>Filtered IMGT -output files</span></b></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGA sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all IGA -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGA1 sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all IGA1 -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGA2 sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz -file with the same format as downloaded IMGT files that contains all IGA2 -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGG sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz -file with the same format as downloaded IMGT files that contains all IGG -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGG1 sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all IGG1 -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGG2 sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all IGG2 -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGG3 sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz -file with the same format as downloaded IMGT files that contains all IGG3 -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGG4 sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all IGG4 -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGM sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a .txz -file with the same format as downloaded IMGT files that contains all IGM -sequences that have passed the chosen filter settings.</span></p> - -<p class=MsoNoSpacing style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'>An IMGT archive -with just the matched and filtered IGE sequences:</span></u><span lang=EN-GB -style='font-size:12.0pt;font-family:"Times New Roman","serif"'> Downloads a -.txz file with the same format as downloaded IMGT files that contains all IGE -sequences that have passed the chosen filter settings.</span></p> - -</div> - -</body> - -</html>
--- a/shm_first.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,127 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -.MsoChpDefault - {font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US> - -<div class=WordSection1> - -<p class=MsoNormalCxSpFirst style='margin-bottom:0in;margin-bottom:.0001pt; -text-align:justify;line-height:normal'><span lang=EN-GB style='font-size:12.0pt; -font-family:"Times New Roman","serif"'>Table showing the order of each -filtering step and the number and percentage of sequences after each filtering -step. </span></p> - -<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt; -text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'>Input:</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The -number of sequences in the original IMGT file. This is always 100% of the -sequences.</span></p> - -<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt; -text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'>After "no results" filter: </span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'>IMGT -classifies sequences either as "productive", "unproductive", "unknown", or "no -results". Here, the number and percentages of sequences that are not classified -as "no results" are reported.</span></p> - -<p class=MsoNormalCxSpMiddle style='margin-bottom:0in;margin-bottom:.0001pt; -text-align:justify;line-height:normal'><u><span lang=EN-GB style='font-size: -12.0pt;font-family:"Times New Roman","serif"'>After functionality filter:</span></u><span -lang=EN-GB style='font-size:12.0pt;font-family:"Times New Roman","serif"'> The -number and percentages of sequences that have passed the functionality filter. The -filtering performed is dependent on the settings of the functionality filter. -Details on the functionality filter <a name="OLE_LINK12"></a><a -name="OLE_LINK11"></a><a name="OLE_LINK10">can be found on the start page of -the SHM&CSR pipeline</a>.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After -removal sequences that are missing a gene region:</span></u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -In this step all sequences that are missing a gene region (FR1, CDR1, FR2, -CDR2, FR3) that should be present are removed from analysis. The sequence -regions that should be present are dependent on the settings of the sequence -starts at filter. <a name="OLE_LINK9"></a><a name="OLE_LINK8">The number and -percentage of sequences that pass this filter step are reported.</a> </span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After -N filter:</span></u><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> In this step all sequences that contain -an ambiguous base (n) in the analysed region or the CDR3 are removed from the -analysis. The analysed region is determined by the setting of the sequence -starts at filter. The number and percentage of sequences that pass this filter -step are reported.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After -filter unique sequences</span></u><span lang=EN-GB style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif"'>: The number and -percentage of sequences that pass the "filter unique sequences" filter. Details -on this filter </span><span lang=EN-GB style='font-size:12.0pt;line-height: -115%;font-family:"Times New Roman","serif"'>can be found on the start page of -the SHM&CSR pipeline</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>After -remove duplicate based on filter:</span></u><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> The number and -percentage of sequences that passed the remove duplicate filter. Details on the -"remove duplicate filter based on filter" can be found on the start page of the -SHM&CSR pipeline.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK17"></a><a -name="OLE_LINK16"><u><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Number of matches sequences:</span></u></a><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -The number and percentage of sequences that passed all the filters described -above and have a (sub)class assigned.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number -of unmatched sequences</span></u><span lang=EN-GB style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif"'>: The number and percentage -of sequences that passed all the filters described above and do not have -subclass assigned.</span></p> - -<p class=MsoNormal><span lang=EN-GB> </span></p> - -</div> - -</body> - -</html>
--- a/shm_frequency.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -.MsoChpDefault - {font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US> - -<div class=WordSection1> - -<p class=MsoNormalCxSpFirst style='text-align:justify'><b><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>SHM -frequency tab</span></u></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These -graphs give insight into the level of SHM. The data represented in these graphs -can be downloaded in the download tab. <a name="OLE_LINK24"></a><a -name="OLE_LINK23"></a><a name="OLE_LINK90"></a><a name="OLE_LINK89">More -information on the values found in healthy individuals of different ages can be -found in IJspeert and van Schouwenburg et al, PMID: 27799928. </a></span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Frequency -scatter plot</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A -dot plot showing the percentage of SHM in each transcript divided into the -different (sub)classes. </span><span lang=NL style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif"'>In the graph each dot -represents an individual transcript.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation -frequency by class</span></u></p> - -<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A -bar graph showing the percentage of transcripts that contain 0%, 0-2%, 2-5%, -5-10% 10-15%, 15-20% or more than 20% SHM for each subclass. </span></p> - -<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van -Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, -Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation -of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and -Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>Link</span></a>]</span></p> - -</div> - -</body> - -</html>
--- a/shm_overview.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,332 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -.MsoChpDefault - {font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US> - -<div class=WordSection1> - -<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Info -table</span></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>This -table contains information on different characteristics of SHM. For all -characteristics information can be found for all sequences or only sequences of -a certain (sub)class. All results are based on the sequences that passed the filter -settings chosen on the start page of the SHM & CSR pipeline and only -include details on the analysed region as determined by the setting of the -sequence starts at filter. All data in this table can be downloaded via the -“downloads” tab.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Mutation -frequency:</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK83"></a><a -name="OLE_LINK82"></a><a name="OLE_LINK81"><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values -give information on the level of SHM. </span></a><a name="OLE_LINK22"></a><a -name="OLE_LINK21"></a><a name="OLE_LINK20"><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information -on the values found in healthy individuals of different ages can be found in </span></a><a -name="OLE_LINK15"></a><a name="OLE_LINK14"></a><a name="OLE_LINK13"><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>IJspeert -and van Schouwenburg et al, PMID: 27799928</span></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number -of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height: -115%;font-family:"Times New Roman","serif"'> Shows the number of total -mutations / the number of sequenced bases (the % of mutated bases).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Median -number of mutations:</span></i><span lang=EN-GB style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif"'> Shows the median % of -SHM of all sequences.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Patterns -of SHM:</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK72"></a><a -name="OLE_LINK71"></a><a name="OLE_LINK70"><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These values -give insights into the targeting and patterns of SHM. These values can give -insight into the repair pathways used to repair the U:G mismatches introduced -by AID. </span></a><a name="OLE_LINK40"></a><a name="OLE_LINK39"></a><a -name="OLE_LINK38"></a><a name="OLE_LINK60"><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>More information -on the values found in healthy individuals of different ages can be found in -IJspeert and van Schouwenburg et al, PMID: 27799928</span></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions:</span></i><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -Shows the number of transition mutations / the number of total mutations (the -percentage of mutations that are transitions). Transition mutations are C>T, -T>C, A>G, G>A. </span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transversions:</span></i><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -Shows the number of transversion mutations / the number of total mutations (the -percentage of mutations that are transitions). Transversion mutations are -C>A, C>G, T>A, T>G, A>T, A>C, G>T, G>C.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions -at GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> <a name="OLE_LINK2"></a><a -name="OLE_LINK1">Shows the number of transitions at GC locations (C>T, -G>A) / the total number of mutations at GC locations (the percentage of -mutations at GC locations that are transitions).</a></span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting -of GC:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> <a name="OLE_LINK7"></a><a -name="OLE_LINK6"></a><a name="OLE_LINK3">Shows the number of mutations at GC -locations / the total number of mutations (the percentage of total mutations -that are at GC locations).</a> </span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transitions -at AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> Shows the number of transitions at AT -locations (T>C, A>G) / the total number of mutations at AT locations (the -percentage of mutations at AT locations that are transitions).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Targeting -of AT:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> Shows the number of mutations at AT -locations / the total number of mutations (the percentage of total mutations -that are at AT locations).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>RGYW:</span></i><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -<a name="OLE_LINK28"></a><a name="OLE_LINK27"></a><a name="OLE_LINK26">Shows -the number of mutations that are in a RGYW motive / The number of total mutations -(the percentage of mutations that are in a RGYW motive). </a><a -name="OLE_LINK62"></a><a name="OLE_LINK61">RGYW motives are known to be -preferentially targeted by AID </a></span><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine, -Y=pyrimidine, W = A or T).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WRCY:</span></i><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -<a name="OLE_LINK34"></a><a name="OLE_LINK33">Shows the number of mutations -that are in a </a><a name="OLE_LINK32"></a><a name="OLE_LINK31"></a><a -name="OLE_LINK30"></a><a name="OLE_LINK29">WRCY</a> motive / The number of -total mutations (the percentage of mutations that are in a WRCY motive). WRCY -motives are known to be preferentially targeted by AID </span><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine, -Y=pyrimidine, W = A or T).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>WA:</span></i><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -<a name="OLE_LINK37"></a><a name="OLE_LINK36"></a><a name="OLE_LINK35">Shows -the number of mutations that are in a WA motive / The number of total mutations -(the percentage of mutations that are in a WA motive). It is described that -polymerase eta preferentially makes errors at WA motives </a></span><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W -= A or T).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>TW:</span></i><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -Shows the number of mutations that are in a TW motive / The number of total mutations -(the percentage of mutations that are in a TW motive). It is described that -polymerase eta preferentially makes errors at TW motives </span><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(W -= A or T).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen -selection:</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These -values give insight into antigen selection. It has been described that during -antigen selection, there is selection against replacement mutations in the FR -regions as these can cause instability of the B-cell receptor. In contrast -replacement mutations in the CDR regions are important for changing the -affinity of the B-cell receptor and therefore there is selection for this type -of mutations. Silent mutations do not alter the amino acid sequence and -therefore do not play a role in selection. More information on the values found -in healthy individuals of different ages can be found in IJspeert and van -Schouwenburg et al, PMID: 27799928</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>FR -R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> <a name="OLE_LINK43"></a><a -name="OLE_LINK42"></a><a name="OLE_LINK41">Shows the number of replacement -mutations in the FR regions / The number of silent mutations in the FR regions -(the number of replacement mutations in the FR regions divided by the number of -silent mutations in the FR regions)</a></span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>CDR -R/S:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> Shows the number of replacement -mutations in the CDR regions / The number of silent mutations in the CDR -regions (the number of replacement mutations in the CDR regions divided by the -number of silent mutations in the CDR regions)</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Number -of sequences nucleotides:</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These -values give information on the number of sequenced nucleotides.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt -in FR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> <a name="OLE_LINK46"></a><a -name="OLE_LINK45"></a><a name="OLE_LINK44">Shows the number of sequences bases -that are located in the FR regions / The total number of sequenced bases (the -percentage of sequenced bases that are present in the FR regions).</a></span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Nt -in CDR:</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'> Shows the number of sequenced bases -that are located in the CDR regions / <a name="OLE_LINK48"></a><a -name="OLE_LINK47">The total number of sequenced bases (the percentage of -sequenced bases that are present in the CDR regions).</a></span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>A: -</span></i><a name="OLE_LINK51"></a><a name="OLE_LINK50"></a><a -name="OLE_LINK49"><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Shows the total number of sequenced -adenines / The total number of sequenced bases (the percentage of sequenced -bases that were adenines).</span></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>C: -</span></i><a name="OLE_LINK53"></a><a name="OLE_LINK52"><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows -the total number of sequenced cytosines / The total number of sequenced bases -(the percentage of sequenced bases that were cytosines).</span></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>T: -</span></i><a name="OLE_LINK57"></a><a name="OLE_LINK56"><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows -the total number of sequenced </span></a><a name="OLE_LINK55"></a><a -name="OLE_LINK54"><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>thymines</span></a><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'> -/ The total number of sequenced bases (the percentage of sequenced bases that -were thymines).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><i><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>G: -</span></i><span lang=EN-GB style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Shows the total number of sequenced <a -name="OLE_LINK59"></a><a name="OLE_LINK58">guanine</a>s / The total number of -sequenced bases (the percentage of sequenced bases that were guanines).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK69"><b><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK75"></a><a -name="OLE_LINK74"></a><a name="OLE_LINK73"><span lang=EN-GB style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs visualize -information on the patterns and targeting of SHM and thereby give information -into the repair pathways used to repair the U:G mismatches introduced by AID. The -data represented in these graphs can be downloaded in the download tab. More -information on the values found in healthy individuals of different ages can be -found in IJspeert and van Schouwenburg et al, PMID: 27799928</span></a><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>. -<a name="OLE_LINK85"></a><a name="OLE_LINK84"></a></span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Percentage -of mutations in AID and pol eta motives</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes -<a name="OLE_LINK80"></a><a name="OLE_LINK79"></a><a name="OLE_LINK78">for each -(sub)class </a>the percentage of mutations that are present in AID (RGYW or -WRCY) or polymerase eta motives (WA or TW) in the different subclasses </span><span -lang=EN-GB style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>(R=Purine, -Y=pyrimidine, W = A or T).</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Relative -mutation patterns</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualizes -for each (sub)class the distribution of mutations between mutations at AT -locations and transitions or transversions at GC locations. </span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=NL -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Absolute -mutation patterns</span></u></p> - -<p class=MsoNormalCxSpLast style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Visualized -for each (sub)class the percentage of sequenced AT and GC bases that are -mutated. The mutations at GC bases are divided into transition and transversion -mutations<a name="OLE_LINK77"></a><a name="OLE_LINK76">. </a></span></p> - -<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van -Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, -Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation -of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and -Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>Link</span></a>]</span></p> - -</div> - -</body> - -</html>
--- a/shm_selection.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,128 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} -@font-face - {font-family:UICTFontTextStyleBody;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -a:link, span.MsoHyperlink - {color:blue; - text-decoration:underline;} -a:visited, span.MsoHyperlinkFollowed - {color:purple; - text-decoration:underline;} -span.apple-converted-space - {mso-style-name:apple-converted-space;} -.MsoChpDefault - {font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US link=blue vlink=purple> - -<div class=WordSection1> - -<p class=MsoNormalCxSpFirst style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>References</span></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; -color:black'>Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying -selection in high-throughput Immunoglobulin sequencing data sets. In<span -class=apple-converted-space> </span><em>Nucleic Acids Research, 40 (17), -pp. e134–e134.</em><span class=apple-converted-space><i> </i></span>[</span><span -lang=EN-GB><a href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; -color:#303030'>doi:10.1093/nar/gks457</span></a></span><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; -color:black'>][</span><span lang=EN-GB><a -href="http://dx.doi.org/10.1093/nar/gks457" target="_blank"><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"; -color:#303030'>Link</span></a></span><span lang=EN-GB style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif";color:black'>]</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs</span></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>AA -mutation frequency</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>For -each class, the frequency of replacement mutations at each amino acid position -is shown, which is calculated by dividing the number of replacement mutations -at a particular amino acid position/the number sequences that have an amino -acid at that particular position. Since the length of the CDR1 and CDR2 region -is not the same for every VH gene, some amino acids positions are absent. -Therefore we calculate the frequency using the number of amino acids present at -that that particular location. </span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Antigen -selection (BASELINe)</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Shows -the results of the analysis of antigen selection as performed using BASELINe. -Details on the analysis performed by BASELINe can be found in Yaari et al, -PMID: 22641856. The settings used for the analysis are</span><span lang=EN-GB -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>: -focused, SHM targeting model: human Tri-nucleotide, custom bounderies. The -custom boundries are dependent on the ‘sequence starts at filter’. </span></p> - -<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL -style='font-family:UICTFontTextStyleBody;color:black'>Leader: -1:26:38:55:65:104:-</span></p> - -<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL -style='font-family:UICTFontTextStyleBody;color:black'>FR1: 27:27:38:55:65:104:-</span></p> - -<p class=MsoNormalCxSpMiddle style='line-height:normal'><span lang=NL -style='font-family:UICTFontTextStyleBody;color:black'>CDR1: 27:27:38:55:65:104:-</span></p> - -<p class=MsoNormalCxSpLast style='line-height:normal'><span lang=NL -style='font-family:UICTFontTextStyleBody;color:black'>FR2: 27:27:38:55:65:104:-</span></p> - -<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van -Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, -Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation -of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and -Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>Link</span></a>]</span></p> - -</div> - -</body> - -</html>
--- a/shm_transition.htm Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ -<html> - -<head> -<meta http-equiv=Content-Type content="text/html; charset=windows-1252"> -<meta name=Generator content="Microsoft Word 14 (filtered)"> -<style> -<!-- - /* Font Definitions */ - @font-face - {font-family:Calibri; - panose-1:2 15 5 2 2 2 4 3 2 4;} - /* Style Definitions */ - p.MsoNormal, li.MsoNormal, div.MsoNormal - {margin-top:0in; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:11.0pt; - font-family:"Calibri","sans-serif";} -a:link, span.MsoHyperlink - {color:blue; - text-decoration:underline;} -a:visited, span.MsoHyperlinkFollowed - {color:purple; - text-decoration:underline;} -p.msochpdefault, li.msochpdefault, div.msochpdefault - {mso-style-name:msochpdefault; - margin-right:0in; - margin-left:0in; - font-size:12.0pt; - font-family:"Calibri","sans-serif";} -p.msopapdefault, li.msopapdefault, div.msopapdefault - {mso-style-name:msopapdefault; - margin-right:0in; - margin-bottom:10.0pt; - margin-left:0in; - line-height:115%; - font-size:12.0pt; - font-family:"Times New Roman","serif";} -span.apple-converted-space - {mso-style-name:apple-converted-space;} -.MsoChpDefault - {font-size:10.0pt; - font-family:"Calibri","sans-serif";} -.MsoPapDefault - {margin-bottom:10.0pt; - line-height:115%;} -@page WordSection1 - {size:8.5in 11.0in; - margin:1.0in 1.0in 1.0in 1.0in;} -div.WordSection1 - {page:WordSection1;} ---> -</style> - -</head> - -<body lang=EN-US link=blue vlink=purple> - -<div class=WordSection1> - -<p class=MsoNormalCxSpFirst style='text-align:justify'><span style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>These graphs and -tables give insight into the targeting and patterns of SHM. This can give -insight into the DNA repair pathways used to solve the U:G mismatches -introduced by AID. More information on the values found in healthy individuals -of different ages can be found in IJspeert and van Schouwenburg et al, PMID: -27799928.</span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Graphs -</span></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK93"></a><a -name="OLE_LINK92"></a><a name="OLE_LINK91"><u><span style='font-size:12.0pt; -line-height:115%;font-family:"Times New Roman","serif"'>Heatmap transition -information</span></u></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><a name="OLE_LINK98"></a><a -name="OLE_LINK97"><span style='font-size:12.0pt;line-height:115%;font-family: -"Times New Roman","serif"'>Heatmaps visualizing for each subclass the frequency -of all possible substitutions. On the x-axes the original base is shown, while -the y-axes shows the new base. The darker the shade of blue, the more frequent -this type of substitution is occurring. </span></a></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><u><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bargraph -transition information</span></u></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Bar graph -visualizing for each original base the distribution of substitutions into the other -bases. A graph is included for each (sub)class. </span></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><b><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Tables</span></b></p> - -<p class=MsoNormalCxSpMiddle style='text-align:justify'><span style='font-size: -12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Transition -tables are shown for each (sub)class. All the original bases are listed -horizontally, while the new bases are listed vertically. </span></p> - -<p class=MsoNormal><span lang=NL style='font-size:12.0pt;line-height:115%; -font-family:"Times New Roman","serif"'>Hanna IJspeert, Pauline A. van -Schouwenburg, David van Zessen, Ingrid Pico-Knijnenburg, Gertjan J. Driessen, -Andrew P. Stubbs, and Mirjam van der Burg (2016). </span><span -style='font-size:12.0pt;line-height:115%;font-family:"Times New Roman","serif"'>Evaluation -of the Antigen-Experienced B-Cell Receptor Repertoire in Healthy Children and -Adults. In <i>Frontiers in Immunolog, 7, pp. e410-410. </i>[<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>doi:10.3389/fimmu.2016.00410</span></a>][<a -href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5066086/"><span -style='color:windowtext'>Link</span></a>]</span></p> - -</div> - -</body> - -</html>
--- a/summary_to_fasta.py Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") -parser.add_argument("--fasta", help="The output fasta file") - -args = parser.parse_args() - -infile = args.input -fasta = args.fasta - -with open(infile, 'r') as i, open(fasta, 'w') as o: - first = True - id_col = 0 - seq_col = 0 - no_results = 0 - no_seqs = 0 - passed = 0 - for line in i: - splt = line.split("\t") - if first: - id_col = splt.index("Sequence ID") - seq_col = splt.index("Sequence") - first = False - continue - if len(splt) < 5: - no_results += 1 - continue - - ID = splt[id_col] - seq = splt[seq_col] - - if not len(seq) > 0: - no_seqs += 1 - continue - - o.write(">" + ID + "\n" + seq + "\n") - passed += 1 - - print "No results:", no_results - print "No sequences:", no_seqs - print "Written to fasta file:", passed
--- a/wrapper.sh Tue Sep 01 16:03:44 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,913 +0,0 @@ -#!/bin/bash -#set -e -dir="$(cd "$(dirname "$0")" && pwd)" -input=$1 -method=$2 -log=$3 #becomes the main html page at the end -outdir=$4 -output="$outdir/index.html" #copied to $log location at the end -title="$5" -include_fr1=$6 -functionality=$7 -unique=$8 -naive_output=$9 -naive_output_ca=${10} -naive_output_cg=${11} -naive_output_cm=${12} -naive_output_ce=${13} -naive_output_all=${14} -filter_unique=${15} -filter_unique_count=${16} -class_filter=${17} -empty_region_filter=${18} -fast=${19} - -mkdir $outdir - -tar -xzf $dir/style.tar.gz -C $outdir - -echo "---------------- read parameters ----------------" -echo "---------------- read parameters ----------------<br />" > $log - -echo "unpacking IMGT file" - -type="`file $input`" -if [[ "$type" == *"Zip archive"* ]] ; then - echo "Zip archive" - echo "unzip $input -d $PWD/files/" - unzip $input -d $PWD/files/ -elif [[ "$type" == *"XZ compressed data"* ]] ; then - echo "ZX archive" - echo "tar -xJf $input -C $PWD/files/" - mkdir -p "$PWD/files/$title" - tar -xJf $input -C "$PWD/files/$title" -else - echo "Unrecognized format $type" - echo "Unrecognized format $type" > $log - exit 1 -fi - -cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt -cat "`find $PWD/files/ -name "2_*"`" > $PWD/gapped_nt.txt -cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt -cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt -cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt -cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt -cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt -cat "`find $PWD/files/ -name "8_*"`" > $PWD/mutationstats.txt -cat "`find $PWD/files/ -name "9_*"`" > $PWD/aa_change_stats.txt -cat "`find $PWD/files/ -name "10_*"`" > $PWD/hotspots.txt - -echo "---------------- unique id check ----------------" - -Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/gapped_nt.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/aa_change_stats.txt $PWD/hotspots.txt - -if [[ ${#BLASTN_DIR} -ge 5 ]] ; then - echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" -else - BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" - echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" -fi - -echo "---------------- class identification ----------------" -echo "---------------- class identification ----------------<br />" >> $log - -python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt - -echo "---------------- merge_and_filter.r ----------------" -echo "---------------- merge_and_filter.r ----------------<br />" >> $log - -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1 - -if [[ "${naive_output}" == "yes" ]] || [[ "$fast" == "no" ]] ; then - - echo "---------------- creating new IMGT zips ----------------" - echo "---------------- creating new IMGT zips ----------------<br />" >> $log - - mkdir $outdir/new_IMGT - - cp $PWD/summary.txt "$outdir/new_IMGT/1_Summary.txt" - cp $PWD/gapped_nt.txt "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" - cp $PWD/sequences.txt "$outdir/new_IMGT/3_Nt-sequences.txt" - cp $PWD/gapped_aa.txt "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" - cp $PWD/aa.txt "$outdir/new_IMGT/5_AA-sequences.txt" - cp $PWD/junction.txt "$outdir/new_IMGT/6_Junction.txt" - cp $PWD/mutationanalysis.txt "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" - cp $PWD/mutationstats.txt "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" - cp $PWD/aa_change_stats.txt "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" - cp $PWD/hotspots.txt "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" - - mkdir $outdir/new_IMGT_IGA - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA - - mkdir $outdir/new_IMGT_IGA1 - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 - - mkdir $outdir/new_IMGT_IGA2 - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 - - mkdir $outdir/new_IMGT_IGG - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG - - mkdir $outdir/new_IMGT_IGG1 - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 - - mkdir $outdir/new_IMGT_IGG2 - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 - - mkdir $outdir/new_IMGT_IGG3 - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 - - mkdir $outdir/new_IMGT_IGG4 - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 - - mkdir $outdir/new_IMGT_IGM - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM - - mkdir $outdir/new_IMGT_IGE - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE - - Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1 - - - tmp="$PWD" - cd $outdir/new_IMGT/ #tar weirdness... - tar -cJf ../new_IMGT.txz * - - cd $outdir/new_IMGT_IGA/ - tar -cJf ../new_IMGT_IGA.txz * - - cd $outdir/new_IMGT_IGA1/ - tar -cJf ../new_IMGT_IGA1.txz * - - cd $outdir/new_IMGT_IGA2/ - tar -cJf ../new_IMGT_IGA2.txz * - - cd $outdir/new_IMGT_IGG/ - tar -cJf ../new_IMGT_IGG.txz * - - cd $outdir/new_IMGT_IGG1/ - tar -cJf ../new_IMGT_IGG1.txz * - - cd $outdir/new_IMGT_IGG2/ - tar -cJf ../new_IMGT_IGG2.txz * - - cd $outdir/new_IMGT_IGG3/ - tar -cJf ../new_IMGT_IGG3.txz * - - cd $outdir/new_IMGT_IGG4/ - tar -cJf ../new_IMGT_IGG4.txz * - - cd $outdir/new_IMGT_IGM/ - tar -cJf ../new_IMGT_IGM.txz * - - cd $outdir/new_IMGT_IGE/ - tar -cJf ../new_IMGT_IGE.txz * - - cd $tmp -fi - -echo "---------------- shm_csr.r ----------------" -echo "---------------- shm_csr.r ----------------<br />" >> $log - -classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched" -echo "R mutation analysis" -Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1 - -echo "---------------- plot_pdfs.r ----------------" -echo "---------------- plot_pdfs.r ----------------<br />" >> $log - -echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1" - -Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1 - -echo "---------------- shm_csr.py ----------------" -echo "---------------- shm_csr.py ----------------<br />" >> $log - -python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --empty_region_filter "${empty_region_filter}" --output $outdir/hotspot_analysis.txt - -echo "---------------- aa_histogram.r ----------------" -echo "---------------- aa_histogram.r ----------------<br />" >> $log - -Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1 -if [ -e "$outdir/aa_histogram_.png" ]; then - mv $outdir/aa_histogram_.png $outdir/aa_histogram.png - mv $outdir/aa_histogram_.pdf $outdir/aa_histogram.pdf - mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt - mv $outdir/aa_histogram_absent_.txt $outdir/aa_histogram_absent.txt - mv $outdir/aa_histogram_count_.txt $outdir/aa_histogram_count.txt - mv $outdir/aa_histogram_sum_.txt $outdir/aa_histogram_sum.txt -fi - -genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE) - -funcs=(sum mean median) -funcs=(sum) - -echo "---------------- sequence_overview.r ----------------" -echo "---------------- sequence_overview.r ----------------<br />" >> $log - -mkdir $outdir/sequence_overview - -Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt ${empty_region_filter} 2>&1 - -echo "<table border='1'>" > $outdir/base_overview.html - -while IFS=$'\t' read ID class seq A C G T -do - echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html -done < $outdir/sequence_overview/ntoverview.txt - -echo "<html><center><h1>$title</h1></center>" > $output -echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output -echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output -echo "<script type='text/javascript' src='tabber.js'></script>" >> $output -echo "<script type='text/javascript' src='script.js'></script>" >> $output -echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output -echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output - -matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" -unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" -total_count=$((matched_count + unmatched_count)) -perc_count=$((unmatched_count / total_count * 100)) -perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"` -perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"` - -echo "<center><h2>Total: ${total_count}</h2></center>" >> $output -echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output -echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output - -echo "---------------- main tables ----------------" -echo "---------------- main tables ----------------<br />" >> $log - -echo "<div class='tabber'>" >> $output -echo "<div class='tabbertab' title='SHM Overview' style='width: 3000px;'>" >> $output - -for func in ${funcs[@]} -do - - echo "---------------- $func table ----------------" - echo "---------------- $func table ----------------<br />" >> $log - - cat $outdir/mutations_${func}.txt $outdir/shm_overview_tandem_row.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt - - echo "---------------- pattern_plots.r ----------------" - echo "---------------- pattern_plots.r ----------------<br />" >> $log - - Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1 - - echo "<table class='pure-table pure-table-striped'>" >> $output - echo "<thead><tr><th>info</th>" >> $output - - if [ "${class_filter}" != "101_101" ] ; then - - for gene in ${genes[@]} - do - tmp=`cat $outdir/${gene}_${func}_n.txt` - echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output - done - - tmp=`cat $outdir/all_${func}_n.txt` - echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output - tmp=`cat $outdir/unmatched_${func}_n.txt` - echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output - - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz - do - if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] || [ "$name" == "Tandems/Expected (ratio)" ] ; then #meh - echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${cex}/${cey} (${cez})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output - elif [ "$name" == "Median of Number of Mutations (%)" ] ; then - echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${cez}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output - else - echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${cex}/${cey} (${cez}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output - fi - done < $outdir/data_${func}.txt - - else - tmp=`cat $outdir/all_${func}_n.txt` - echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output - - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz - do - if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh - echo "<tr><td>$name</td><td>${allx}/${ally}</td></tr>" >> $output - elif [ "$name" == "Median of Number of Mutations (%)" ] ; then - echo "<tr><td>$name</td><td>${allz}%</td></tr>" >> $output - else - echo "<tr><td>$name</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output - fi - done < $outdir/data_${func}.txt - - fi - echo "</table>" >> $output - #echo "<a href='data_${func}.txt'>Download data</a>" >> $output -done - -echo "<a href='aid_motives.pdf'><img src='aid_motives.png' /></a><br />" >> $output -echo "<a href='relative_mutations.pdf'><img src='relative_mutations.png' /></a><br />" >> $output -echo "<a href='absolute_mutations.pdf'><img src='absolute_mutations.png' /></a><br />" >> $output -echo "<br />" >> $output -cat $dir/shm_overview.htm >> $output -echo "</div>" >> $output #SHM overview tab end - -echo "---------------- images ----------------" -echo "---------------- images ----------------<br />" >> $log - -echo "<div class='tabbertab' title='SHM Frequency' style='width: 3000px;'></a>" >> $output - -if [ -a $outdir/scatter.png ] -then - echo "<a href='scatter.pdf'><img src='scatter.png'/><br />" >> $output -fi -if [ -a $outdir/frequency_ranges.png ] -then - echo "<a href='frequency_ranges.pdf'><img src='frequency_ranges.png'/></a><br />" >> $output -fi - -echo "<br />" >> $output -cat $dir/shm_frequency.htm >> $output - -echo "</div>" >> $output #SHM frequency tab end - -echo "<div class='tabbertab' title='Transition tables' style='width: 3000px;'>" >> $output - -echo "<table border='0'>" >> $output - -for gene in ${genes[@]} -do - echo "<tr>" >> $output - echo "<td><h1>${gene}</h1></td>" >> $output - - if [ -e $outdir/transitions_heatmap_${gene}.png ] - then - echo "<td><a href='transitions_heatmap_${gene}.pdf'><img src='transitions_heatmap_${gene}.png' /></a></td>" >> $output - else - echo "<td></td>" >> $output - fi - - if [ -e $outdir/transitions_stacked_${gene}.png ] - then - echo "<td><a href='transitions_stacked_${gene}.pdf'><img src='transitions_stacked_${gene}.png' /></a></td>" >> $output - else - echo "<td></td>" >> $output - fi - - echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output - echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output - first="true" - while IFS=, read from a c g t - do - if [ "$first" == "true" ] ; then - echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output - first="false" - else - echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output - fi - done < $outdir/transitions_${gene}_sum.txt - echo "</table></td>" >> $output - - echo "</tr>" >> $output -done - -echo "<tr>" >> $output -echo "<td><h1>All</h1></td>" >> $output -echo "<td><a href='transitions_heatmap_all.pdf'><img src='transitions_heatmap_all.png' /></a></td>" >> $output -echo "<td><a href='transitions_stacked_all.pdf'><img src='transitions_stacked_all.png' /></a></td>" >> $output -echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output -echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output -first="true" -while IFS=, read from a c g t - do - if [ "$first" == "true" ] ; then - echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output - first="false" - else - echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output - fi -done < $outdir/transitions_all_sum.txt -echo "</table></td>" >> $output - -echo "</tr>" >> $output - -echo "</table>" >> $output - -echo "<br />" >> $output -cat $dir/shm_transition.htm >> $output - -echo "</div>" >> $output #transition tables tab end - -echo "<div class='tabbertab' title='Antigen Selection'>" >> $output - -if [ -e $outdir/aa_histogram.png ] -then - echo "<a href='aa_histogram.pdf'><img src='aa_histogram.png'/></a><br />" >> $output -fi - -if [ -e $outdir/aa_histogram_IGA.png ] -then - echo "<a href='aa_histogram_IGA.pdf'><img src='aa_histogram_IGA.png'/></a><br />" >> $output -fi - -if [ -e $outdir/aa_histogram_IGG.png ] -then - echo "<a href='aa_histogram_IGG.pdf'><img src='aa_histogram_IGG.png'/></a><br />" >> $output -fi - -if [ -e $outdir/aa_histogram_IGM.png ] -then - echo "<a href='aa_histogram_IGM.pdf'><img src='aa_histogram_IGM.png'/></a><br />" >> $output -fi - -if [ -e $outdir/aa_histogram_IGE.png ] -then - echo "<a href='aa_histogram_IGE.pdf'><img src='aa_histogram_IGE.png'/></a><br />" >> $output -fi - - - -if [[ "$fast" == "no" ]] ; then - - - - echo "---------------- baseline ----------------" - echo "---------------- baseline ----------------<br />" >> $log - tmp="$PWD" - - mkdir $outdir/baseline - - echo "<center><h1>BASELINe</h1>" >> $output - header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)" - - baseline_boundaries="27:27:38:55:65:104:-" - - if [[ "${empty_region_filter}" == "leader" ]] ; then - baseline_boundaries="1:26:38:55:65:104:-" - header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)" - fi - - echo "<p>${header_substring}</p></center>" >> $output - - mkdir $outdir/baseline/IGA_IGG_IGM - if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGA_IGG_IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" - else - echo "No sequences" > "$outdir/baseline.txt" - fi - - mkdir $outdir/baseline/IGA - if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGA - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" - else - echo "No IGA sequences" > "$outdir/baseline_IGA.txt" - fi - - mkdir $outdir/baseline/IGG - if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGG - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" - else - echo "No IGG sequences" > "$outdir/baseline_IGG.txt" - fi - - mkdir $outdir/baseline/IGM - if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" - else - echo "No IGM sequences" > "$outdir/baseline_IGM.txt" - fi - - mkdir $outdir/baseline/IGE - if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGE - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt" - else - echo "No IGE sequences" > "$outdir/baseline_IGE.txt" - fi - - cd $tmp - - echo "Cleaning up *.RData files" - find $outdir/baseline -name "*.RData" -type f -delete - - if [ -e $outdir/baseline.pdf ] - then - echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output - fi - - if [ -e $outdir/baseline_IGA.pdf ] - then - echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output - fi - - if [ -e $outdir/baseline_IGG.pdf ] - then - echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output - fi - - if [ -e $outdir/baseline_IGM.pdf ] - then - echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output - fi - - if [ -e $outdir/baseline_IGE.pdf ] - then - echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output - fi -fi - -echo "<br />" >> $output -cat $dir/shm_selection.htm >> $output - -echo "</div>" >> $output #antigen selection tab end - -echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab - -if [ -e $outdir/IGA.png ] -then - echo "<a href='IGA.pdf'><img src='IGA.png'/></a><br />" >> $output -fi -if [ -e $outdir/IGG.png ] -then - echo "<a href='IGG.pdf'><img src='IGG.png'/></a><br />" >> $output -fi - -echo "<br />" >> $output -cat $dir/shm_csr.htm >> $output - -echo "</div>" >> $output #CSR tab end - -if [[ "$fast" == "no" ]] ; then - - echo "---------------- change-o MakeDB ----------------" - - mkdir $outdir/change_o - - tmp="$PWD" - - cd $outdir/change_o - - bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1 - - mkdir $outdir/new_IMGT_changeo - cp $outdir/new_IMGT/* $outdir/new_IMGT_changeo - - Rscript $dir/new_imgt.r $outdir/new_IMGT_changeo $outdir/change_o/change-o-db-defined_first_clones.txt "-" 2>&1 - - cd $outdir/new_IMGT_changeo - tar -cJf ../new_IMGT_first_seq_of_clone.txz * - cd $outdir/change_o - - rm -rf $outdir/new_IMGT_changeo - - Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 - echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" - - if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1 - - mkdir $outdir/new_IMGT_IGA_changeo - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA_changeo - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA_changeo $outdir/change_o/change-o-db-defined_first_clones-IGA.txt "-" 2>&1 - - cd $outdir/new_IMGT_IGA_changeo - tar -cJf ../new_IMGT_IGA_first_seq_of_clone.txz * - - rm -rf $outdir/new_IMGT_IGA_changeo - - cd $outdir/change_o - else - echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" - echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" - fi - - if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1 - - mkdir $outdir/new_IMGT_IGG_changeo - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG_changeo - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG_changeo $outdir/change_o/change-o-db-defined_first_clones-IGG.txt "-" 2>&1 - - cd $outdir/new_IMGT_IGG_changeo - tar -cJf ../new_IMGT_IGG_first_seq_of_clone.txz * - rm -rf $outdir/new_IMGT_IGG_changeo - - cd $outdir/change_o - else - echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" - echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" - fi - - if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-db-defined_first_clones-IGM.txt 2>&1 - - mkdir $outdir/new_IMGT_IGM_changeo - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM_changeo - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM_changeo $outdir/change_o/change-o-db-defined_first_clones-IGM.txt "-" 2>&1 - - cd $outdir/new_IMGT_IGM_changeo - tar -cJf ../new_IMGT_IGM_first_seq_of_clone.txz * - - rm -rf $outdir/new_IMGT_IGM_changeo - - cd $outdir/change_o - else - echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" - echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" - fi - - if [[ $(wc -l < $outdir/new_IMGT_IGE/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGE.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-defined_clones-summary-IGE.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-db-defined_first_clones-IGE.txt 2>&1 - - mkdir $outdir/new_IMGT_IGE_changeo - cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE_changeo - - Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE_changeo $outdir/change_o/change-o-db-defined_first_clones-IGE.txt "-" 2>&1 - - cd $outdir/new_IMGT_IGE_changeo - tar -cJf ../new_IMGT_IGE_first_seq_of_clone.txz * - - rm -rf $outdir/new_IMGT_IGE_changeo - - cd $outdir/change_o - else - echo "No IGE sequences" > "$outdir/change_o/change-o-db-defined_clones-IGE.txt" - echo "No IGE sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGE.txt" - fi - - cd "$tmp" - - rm -rf $outdir/new_IMGT - rm -rf $outdir/new_IMGT_IGA/ - rm -rf $outdir/new_IMGT_IGA1/ - rm -rf $outdir/new_IMGT_IGA2/ - rm -rf $outdir/new_IMGT_IGG/ - rm -rf $outdir/new_IMGT_IGG1/ - rm -rf $outdir/new_IMGT_IGG2/ - rm -rf $outdir/new_IMGT_IGG3/ - rm -rf $outdir/new_IMGT_IGG4/ - rm -rf $outdir/new_IMGT_IGM/ - rm -rf $outdir/new_IMGT_IGE/ - - echo "<div class='tabbertab' title='Clonal Relation' style='width: 7000px;'>" >> $output #clonality tab - - function clonality_table { - local infile=$1 - local outfile=$2 - - echo "<table class='pure-table pure-table-striped'>" >> $outfile - echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile - - first='true' - - while read size clones seqs - do - if [[ "$first" == "true" ]]; then - first="false" - continue - fi - echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile - done < $infile - - echo "</table>" >> $outfile - } - echo "<div class='tabber'>" >> $output - - echo "<div class='tabbertab' title='All'>" >> $output - clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output - echo "</div>" >> $output - - echo "<div class='tabbertab' title='IGA'>" >> $output - clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output - echo "</div>" >> $output - - echo "<div class='tabbertab' title='IGG'>" >> $output - clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output - echo "</div>" >> $output - - echo "<div class='tabbertab' title='IGM'>" >> $output - clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output - echo "</div>" >> $output - - echo "<div class='tabbertab' title='IGE'>" >> $output - clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output - echo "</div>" >> $output - - echo "<div class='tabbertab' title='Overlap' style='width: 7000px;'>" >> $output - cat "$outdir/sequence_overview/index.html" | sed -e 's:</td>:</td>\n:g' | sed "s:href='\(.*\).html:href='sequence_overview/\1.html:g" >> $output # rewrite href to 'sequence_overview/..." - echo "</div>" >> $output - - echo "</div>" >> $output #clonality tabber end - - echo "<br />" >> $output - cat $dir/shm_clonality.htm >> $output - - echo "</div>" >> $output #clonality tab end - -fi - -echo "<div class='tabbertab' title='Downloads'>" >> $output - -echo "<table class='pure-table pure-table-striped'>" >> $output -echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output -echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Overview</td></tr>" >> $output -echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='shm_overview.txt' download='shm_overview.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the percentage of mutations in AID and pol eta motives plot</td><td><a href='aid_motives.txt' download='aid_motives.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the relative mutation patterns plot</td><td><a href='relative_mutations.txt' download='relative_mutations.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the absolute mutation patterns plot</td><td><a href='absolute_mutations.txt' download='absolute_mutations.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Data about tandem mutations by ID</td><td><a href='tandems_by_id.txt' download='tandems_by_id.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Frequency</td></tr>" >> $output -echo "<tr><td>The data generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Transition Tables</td></tr>" >> $output -echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_IGA_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transitions_IGG3_sum.txt' download='transitions_IGG3_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transitions_IGG4_sum.txt' download='transitions_IGG4_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transitions_IGM_sum.txt' download='transitions_IGM_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGE' transition plot</td><td><a href='transitions_IGE_sum.txt' download='transitions_IGE_sum.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output -echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Presence of AA per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td>The data used to generate the aa mutation frequency plot</td><td><a href='aa_histogram_sum.txt' download='aa_histogram_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the aa mutation frequency plot for IGA</td><td><a href='aa_histogram_sum_IGA.txt' download='aa_histogram_sum_IGA.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the aa mutation frequency plot for IGG</td><td><a href='aa_histogram_sum_IGG.txt' download='aa_histogram_sum_IGG.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the aa mutation frequency plot for IGM</td><td><a href='aa_histogram_sum_IGM.txt' download='aa_histogram_sum_IGM.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data used to generate the aa mutation frequency plot for IGE</td><td><a href='aa_histogram_sum_IGE.txt' download='aa_histogram_sum_IGE.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGE PDF</td><td><a href='baseline_IGE.pdf' download='baseline_IGE.pdf' >Download</a></td></tr>" >> $output -echo "<tr><td>Baseline IGE data</td><td><a href='baseline_IGE.txt' download='baseline_IGE.txt' >Download</a></td></tr>" >> $output - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>CSR</td></tr>" >> $output -echo "<tr><td>The data for the IGA subclass distribution plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the IGG subclass distribution plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output - - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonal Relation</td></tr>" >> $output -echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href='new_IMGT_first_seq_of_clone.txz' download='new_IMGT_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output - -echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href='new_IMGT_IGA_first_seq_of_clone.txz' download='new_IMGT_IGA_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output - -echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href='new_IMGT_IGG_first_seq_of_clone.txz' download='new_IMGT_IGG_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output - -echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href='new_IMGT_IGM_first_seq_of_clone.txz' download='new_IMGT_IGM_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output - -echo "<tr><td>The Change-O DB file with defined clones of IGE</td><td><a href='change_o/change-o-db-defined_clones-IGE.txt' download='change_o/change-o-db-defined_clones-IGE.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The Change-O DB defined clones summary file of IGE</td><td><a href='change_o/change-o-defined_clones-summary-IGE.txt' download='change_o/change-o-defined_clones-summary-IGE.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href='new_IMGT_IGE_first_seq_of_clone.txz' download='new_IMGT_IGE_first_seq_of_clone.txz' >Download</a></td></tr>" >> $output - -echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Filtered IMGT output files</td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output -echo "<tr><td>An IMGT archive with just the matched and filtered IGE sequences</td><td><a href='new_IMGT_IGE.txz' download='new_IMGT_IGE.txz' >Download</a></td></tr>" >> $output - -echo "</table>" >> $output - -echo "<br />" >> $output -cat $dir/shm_downloads.htm >> $output - -echo "</div>" >> $output #downloads tab end - -echo "</div>" >> $output #tabs end - -echo "</html>" >> $output - - -echo "---------------- naive_output.r ----------------" -echo "---------------- naive_output.r ----------------<br />" >> $log - -if [[ "$naive_output" == "yes" ]] -then - echo "output naive output" - if [[ "${class_filter}" == "101_101" ]] - then - echo "copy new_IMGT.txz to ${naive_output_all}" - cp $outdir/new_IMGT.txz ${naive_output_all} - else - echo "copy for classes" - cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} - cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} - cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} - cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} - fi -fi - -echo "</table>" >> $outdir/base_overview.html - -mv $log $outdir/log.html - -echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log -echo "<table border = 1>" >> $log -echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log -tIFS="$TMP" -IFS=$'\t' -while read step seq perc - do - echo "<tr>" >> $log - echo "<td>$step</td>" >> $log - echo "<td>$seq</td>" >> $log - echo "<td>${perc}%</td>" >> $log - echo "</tr>" >> $log -done < $outdir/filtering_steps.txt -echo "</table>" >> $log -echo "<br />" >> $log -cat $dir/shm_first.htm >> $log -echo "</center></html>" >> $log - -IFS="$tIFS" - - -echo "---------------- Done! ----------------" -echo "---------------- Done! ----------------<br />" >> $outdir/log.html - - - - - - - - - - - - - - - - - - - - -