# HG changeset patch # User davidvanzessen # Date 1481538157 18000 # Node ID 5ffd52fc35c4a558b8b4848e0cbe44342651c7d1 # Parent beaa487ecf43b184ea56eb973266c7f7caa4653e Uploaded diff -r beaa487ecf43 -r 5ffd52fc35c4 aa_histogram.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aa_histogram.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,63 @@ +library(ggplot2) + +args <- commandArgs(trailingOnly = TRUE) + +mutations.by.id.file = args[1] +absent.aa.by.id.file = args[2] +genes = strsplit(args[3], ",")[[1]] +genes = c(genes, "") +outdir = args[4] + + +print("---------------- read input ----------------") + +mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") +absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") + +for(gene in genes){ + if(gene == ""){ + mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] + } else { + mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] + } + print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) + if(nrow(mutations.by.id.gene) == 0){ + next + } + + mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) + aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) + + dat_freq = mutations.at.position / aa.at.position + dat_freq[is.na(dat_freq)] = 0 + dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) + + print("---------------- plot ----------------") + + m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black")) + m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) + m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") + m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") + m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") + m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") + m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") + m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) + m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) + #m = m + scale_colour_manual(values=c("black")) + + print("---------------- write/print ----------------") + + + dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) + + write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + + png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) + print(m) + dev.off() +} diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/Baseline_Functions.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/Baseline_Functions.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,2287 @@ +######################################################################################### +# License Agreement +# +# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE +# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER +# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE +# OR COPYRIGHT LAW IS PROHIBITED. +# +# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE +# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED +# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN +# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. +# +# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences +# Coded by: Mohamed Uduman & Gur Yaari +# Copyright 2012 Kleinstein Lab +# Version: 1.3 (01/23/2014) +######################################################################################### + +# Global variables + + FILTER_BY_MUTATIONS = 1000 + + # Nucleotides + NUCLEOTIDES = c("A","C","G","T") + + # Amino Acids + AMINO_ACIDS <- c("F", "F", "L", "L", "S", "S", "S", "S", "Y", "Y", "*", "*", "C", "C", "*", "W", "L", "L", "L", "L", "P", "P", "P", "P", "H", "H", "Q", "Q", "R", "R", "R", "R", "I", "I", "I", "M", "T", "T", "T", "T", "N", "N", "K", "K", "S", "S", "R", "R", "V", "V", "V", "V", "A", "A", "A", "A", "D", "D", "E", "E", "G", "G", "G", "G") + names(AMINO_ACIDS) <- c("TTT", "TTC", "TTA", "TTG", "TCT", "TCC", "TCA", "TCG", "TAT", "TAC", "TAA", "TAG", "TGT", "TGC", "TGA", "TGG", "CTT", "CTC", "CTA", "CTG", "CCT", "CCC", "CCA", "CCG", "CAT", "CAC", "CAA", "CAG", "CGT", "CGC", "CGA", "CGG", "ATT", "ATC", "ATA", "ATG", "ACT", "ACC", "ACA", "ACG", "AAT", "AAC", "AAA", "AAG", "AGT", "AGC", "AGA", "AGG", "GTT", "GTC", "GTA", "GTG", "GCT", "GCC", "GCA", "GCG", "GAT", "GAC", "GAA", "GAG", "GGT", "GGC", "GGA", "GGG") + names(AMINO_ACIDS) <- names(AMINO_ACIDS) + + #Amino Acid Traits + #"*" "A" "C" "D" "E" "F" "G" "H" "I" "K" "L" "M" "N" "P" "Q" "R" "S" "T" "V" "W" "Y" + #B = "Hydrophobic/Burried" N = "Intermediate/Neutral" S="Hydrophilic/Surface") + TRAITS_AMINO_ACIDS_CHOTHIA98 <- c("*","N","B","S","S","B","N","N","B","S","B","B","S","N","S","S","N","N","B","B","N") + names(TRAITS_AMINO_ACIDS_CHOTHIA98) <- sort(unique(AMINO_ACIDS)) + TRAITS_AMINO_ACIDS <- array(NA,21) + + # Codon Table + CODON_TABLE <- as.data.frame(matrix(NA,ncol=64,nrow=12)) + + # Substitution Model: Smith DS et al. 1996 + substitution_Literature_Mouse <- matrix(c(0, 0.156222928, 0.601501588, 0.242275484, 0.172506739, 0, 0.241239892, 0.586253369, 0.54636291, 0.255795364, 0, 0.197841727, 0.290240811, 0.467680608, 0.24207858, 0),nrow=4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + substitution_Flu_Human <- matrix(c(0,0.2795596,0.5026927,0.2177477,0.1693210,0,0.3264723,0.5042067,0.4983549,0.3328321,0,0.1688130,0.2021079,0.4696077,0.3282844,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + substitution_Flu25_Human <- matrix(c(0,0.2580641,0.5163685,0.2255674,0.1541125,0,0.3210224,0.5248651,0.5239281,0.3101292,0,0.1659427,0.1997207,0.4579444,0.3423350,0),4,4,byrow=T,dimnames=list(NUCLEOTIDES,NUCLEOTIDES)) + load("FiveS_Substitution.RData") + + # Mutability Models: Shapiro GS et al. 2002 + triMutability_Literature_Human <- matrix(c(0.24, 1.2, 0.96, 0.43, 2.14, 2, 1.11, 1.9, 0.85, 1.83, 2.36, 1.31, 0.82, 0.52, 0.89, 1.33, 1.4, 0.82, 1.83, 0.73, 1.83, 1.62, 1.53, 0.57, 0.92, 0.42, 0.42, 1.47, 3.44, 2.58, 1.18, 0.47, 0.39, 1.12, 1.8, 0.68, 0.47, 2.19, 2.35, 2.19, 1.05, 1.84, 1.26, 0.28, 0.98, 2.37, 0.66, 1.58, 0.67, 0.92, 1.76, 0.83, 0.97, 0.56, 0.75, 0.62, 2.26, 0.62, 0.74, 1.11, 1.16, 0.61, 0.88, 0.67, 0.37, 0.07, 1.08, 0.46, 0.31, 0.94, 0.62, 0.57, 0.29, NA, 1.44, 0.46, 0.69, 0.57, 0.24, 0.37, 1.1, 0.99, 1.39, 0.6, 2.26, 1.24, 1.36, 0.52, 0.33, 0.26, 1.25, 0.37, 0.58, 1.03, 1.2, 0.34, 0.49, 0.33, 2.62, 0.16, 0.4, 0.16, 0.35, 0.75, 1.85, 0.94, 1.61, 0.85, 2.09, 1.39, 0.3, 0.52, 1.33, 0.29, 0.51, 0.26, 0.51, 3.83, 2.01, 0.71, 0.58, 0.62, 1.07, 0.28, 1.2, 0.74, 0.25, 0.59, 1.09, 0.91, 1.36, 0.45, 2.89, 1.27, 3.7, 0.69, 0.28, 0.41, 1.17, 0.56, 0.93, 3.41, 1, 1, NA, 5.9, 0.74, 2.51, 2.24, 2.24, 1.95, 3.32, 2.34, 1.3, 2.3, 1, 0.66, 0.73, 0.93, 0.41, 0.65, 0.89, 0.65, 0.32, NA, 0.43, 0.85, 0.43, 0.31, 0.31, 0.23, 0.29, 0.57, 0.71, 0.48, 0.44, 0.76, 0.51, 1.7, 0.85, 0.74, 2.23, 2.08, 1.16, 0.51, 0.51, 1, 0.5, NA, NA, 0.71, 2.14), nrow=64,byrow=T) + triMutability_Literature_Mouse <- matrix(c(1.31, 1.35, 1.42, 1.18, 2.02, 2.02, 1.02, 1.61, 1.99, 1.42, 2.01, 1.03, 2.02, 0.97, 0.53, 0.71, 1.19, 0.83, 0.96, 0.96, 0, 1.7, 2.22, 0.59, 1.24, 1.07, 0.51, 1.68, 3.36, 3.36, 1.14, 0.29, 0.33, 0.9, 1.11, 0.63, 1.08, 2.07, 2.27, 1.74, 0.22, 1.19, 2.37, 1.15, 1.15, 1.56, 0.81, 0.34, 0.87, 0.79, 2.13, 0.49, 0.85, 0.97, 0.36, 0.82, 0.66, 0.63, 1.15, 0.94, 0.85, 0.25, 0.93, 1.19, 0.4, 0.2, 0.44, 0.44, 0.88, 1.06, 0.77, 0.39, 0, 0, 0, 0, 0, 0, 0.43, 0.43, 0.86, 0.59, 0.59, 0, 1.18, 0.86, 2.9, 1.66, 0.4, 0.2, 1.54, 0.43, 0.69, 1.71, 0.68, 0.55, 0.91, 0.7, 1.71, 0.09, 0.27, 0.63, 0.2, 0.45, 1.01, 1.63, 0.96, 1.48, 2.18, 1.2, 1.31, 0.66, 2.13, 0.49, 0, 0, 0, 2.97, 2.8, 0.79, 0.4, 0.5, 0.4, 0.11, 1.68, 0.42, 0.13, 0.44, 0.93, 0.71, 1.11, 1.19, 2.71, 1.08, 3.43, 0.4, 0.67, 0.47, 1.02, 0.14, 1.56, 1.98, 0.53, 0.33, 0.63, 2.06, 1.77, 1.46, 3.74, 2.93, 2.1, 2.18, 0.78, 0.73, 2.93, 0.63, 0.57, 0.17, 0.85, 0.52, 0.31, 0.31, 0, 0, 0.51, 0.29, 0.83, 0.54, 0.28, 0.47, 0.9, 0.99, 1.24, 2.47, 0.73, 0.23, 1.13, 0.24, 2.12, 0.24, 0.33, 0.83, 1.41, 0.62, 0.28, 0.35, 0.77, 0.17, 0.72, 0.58, 0.45, 0.41), nrow=64,byrow=T) + triMutability_Names <- c("AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT") + load("FiveS_Mutability.RData") + +# Functions + + # Translate codon to amino acid + translateCodonToAminoAcid<-function(Codon){ + return(AMINO_ACIDS[Codon]) + } + + # Translate amino acid to trait change + translateAminoAcidToTraitChange<-function(AminoAcid){ + return(TRAITS_AMINO_ACIDS[AminoAcid]) + } + + # Initialize Amino Acid Trait Changes + initializeTraitChange <- function(traitChangeModel=1,species=1,traitChangeFileName=NULL){ + if(!is.null(traitChangeFileName)){ + tryCatch( + traitChange <- read.delim(traitChangeFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading trait changes. Please check file name/path and format.\n") + q() + } + ) + }else{ + traitChange <- TRAITS_AMINO_ACIDS_CHOTHIA98 + } + TRAITS_AMINO_ACIDS <<- traitChange + } + + # Read in formatted nucleotide substitution matrix + initializeSubstitutionMatrix <- function(substitutionModel,species,subsMatFileName=NULL){ + if(!is.null(subsMatFileName)){ + tryCatch( + subsMat <- read.delim(subsMatFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading substitution matrix. Please check file name/path and format.\n") + q() + } + ) + if(sum(apply(subsMat,1,sum)==1)!=4) subsMat = t(apply(subsMat,1,function(x)x/sum(x))) + }else{ + if(substitutionModel==1)subsMat <- substitution_Literature_Mouse + if(substitutionModel==2)subsMat <- substitution_Flu_Human + if(substitutionModel==3)subsMat <- substitution_Flu25_Human + + } + + if(substitutionModel==0){ + subsMat <- matrix(1,4,4) + subsMat[,] = 1/3 + subsMat[1,1] = 0 + subsMat[2,2] = 0 + subsMat[3,3] = 0 + subsMat[4,4] = 0 + } + + + NUCLEOTIDESN = c(NUCLEOTIDES,"N", "-") + if(substitutionModel==5){ + subsMat <- FiveS_Substitution + return(subsMat) + }else{ + subsMat <- rbind(subsMat,rep(NA,4),rep(NA,4)) + return( matrix(data.matrix(subsMat),6,4,dimnames=list(NUCLEOTIDESN,NUCLEOTIDES) ) ) + } + } + + + # Read in formatted Mutability file + initializeMutabilityMatrix <- function(mutabilityModel=1, species=1,mutabilityMatFileName=NULL){ + if(!is.null(mutabilityMatFileName)){ + tryCatch( + mutabilityMat <- read.delim(mutabilityMatFileName,sep="\t",header=T) + , error = function(ex){ + cat("Error|Error reading mutability matrix. Please check file name/path and format.\n") + q() + } + ) + }else{ + mutabilityMat <- triMutability_Literature_Human + if(species==2) mutabilityMat <- triMutability_Literature_Mouse + } + + if(mutabilityModel==0){ mutabilityMat <- matrix(1,64,3)} + + if(mutabilityModel==5){ + mutabilityMat <- FiveS_Mutability + return(mutabilityMat) + }else{ + return( matrix( data.matrix(mutabilityMat), 64, 3, dimnames=list(triMutability_Names,1:3)) ) + } + } + + # Read FASTA file formats + # Modified from read.fasta from the seqinR package + baseline.read.fasta <- + function (file = system.file("sequences/sample.fasta", package = "seqinr"), + seqtype = c("DNA", "AA"), as.string = FALSE, forceDNAtolower = TRUE, + set.attributes = TRUE, legacy.mode = TRUE, seqonly = FALSE, + strip.desc = FALSE, sizeof.longlong = .Machine$sizeof.longlong, + endian = .Platform$endian, apply.mask = TRUE) + { + seqtype <- match.arg(seqtype) + + lines <- readLines(file) + + if (legacy.mode) { + comments <- grep("^;", lines) + if (length(comments) > 0) + lines <- lines[-comments] + } + + + ind_groups<-which(substr(lines, 1L, 3L) == ">>>") + lines_mod<-lines + + if(!length(ind_groups)){ + lines_mod<-c(">>>All sequences combined",lines) + } + + ind_groups<-which(substr(lines_mod, 1L, 3L) == ">>>") + + lines <- array("BLA",dim=(length(ind_groups)+length(lines_mod))) + id<-sapply(1:length(ind_groups),function(i)ind_groups[i]+i-1)+1 + lines[id] <- "THIS IS A FAKE SEQUENCE" + lines[-id] <- lines_mod + rm(lines_mod) + + ind <- which(substr(lines, 1L, 1L) == ">") + nseq <- length(ind) + if (nseq == 0) { + stop("no line starting with a > character found") + } + start <- ind + 1 + end <- ind - 1 + + while( any(which(ind%in%end)) ){ + ind=ind[-which(ind%in%end)] + nseq <- length(ind) + if (nseq == 0) { + stop("no line starting with a > character found") + } + start <- ind + 1 + end <- ind - 1 + } + + end <- c(end[-1], length(lines)) + sequences <- lapply(seq_len(nseq), function(i) paste(lines[start[i]:end[i]], collapse = "")) + if (seqonly) + return(sequences) + nomseq <- lapply(seq_len(nseq), function(i) { + + #firstword <- strsplit(lines[ind[i]], " ")[[1]][1] + substr(lines[ind[i]], 2, nchar(lines[ind[i]])) + + }) + if (seqtype == "DNA") { + if (forceDNAtolower) { + sequences <- as.list(tolower(chartr(".","-",sequences))) + }else{ + sequences <- as.list(toupper(chartr(".","-",sequences))) + } + } + if (as.string == FALSE) + sequences <- lapply(sequences, s2c) + if (set.attributes) { + for (i in seq_len(nseq)) { + Annot <- lines[ind[i]] + if (strip.desc) + Annot <- substr(Annot, 2L, nchar(Annot)) + attributes(sequences[[i]]) <- list(name = nomseq[[i]], + Annot = Annot, class = switch(seqtype, AA = "SeqFastaAA", + DNA = "SeqFastadna")) + } + } + names(sequences) <- nomseq + return(sequences) + } + + + # Replaces non FASTA characters in input files with N + replaceNonFASTAChars <-function(inSeq="ACGTN-AApA"){ + gsub('[^ACGTNacgt[:punct:]-[:punct:].]','N',inSeq,perl=TRUE) + } + + # Find the germlines in the FASTA list + germlinesInFile <- function(seqIDs){ + firstChar = sapply(seqIDs,function(x){substr(x,1,1)}) + secondChar = sapply(seqIDs,function(x){substr(x,2,2)}) + return(firstChar==">" & secondChar!=">") + } + + # Find the groups in the FASTA list + groupsInFile <- function(seqIDs){ + sapply(seqIDs,function(x){substr(x,1,2)})==">>" + } + + # In the process of finding germlines/groups, expand from the start to end of the group + expandTillNext <- function(vecPosToID){ + IDs = names(vecPosToID) + posOfInterests = which(vecPosToID) + + expandedID = rep(NA,length(IDs)) + expandedIDNames = gsub(">","",IDs[posOfInterests]) + startIndexes = c(1,posOfInterests[-1]) + stopIndexes = c(posOfInterests[-1]-1,length(IDs)) + expandedID = unlist(sapply(1:length(startIndexes),function(i){ + rep(i,stopIndexes[i]-startIndexes[i]+1) + })) + names(expandedID) = unlist(sapply(1:length(startIndexes),function(i){ + rep(expandedIDNames[i],stopIndexes[i]-startIndexes[i]+1) + })) + return(expandedID) + } + + # Process FASTA (list) to return a matrix[input, germline) + processInputAdvanced <- function(inputFASTA){ + + seqIDs = names(inputFASTA) + numbSeqs = length(seqIDs) + posGermlines1 = germlinesInFile(seqIDs) + numbGermlines = sum(posGermlines1) + posGroups1 = groupsInFile(seqIDs) + numbGroups = sum(posGroups1) + consDef = NA + + if(numbGermlines==0){ + posGermlines = 2 + numbGermlines = 1 + } + + glPositionsSum = cumsum(posGermlines1) + glPositions = table(glPositionsSum) + #Find the position of the conservation row + consDefPos = as.numeric(names(glPositions[names(glPositions)!=0 & glPositions==1]))+1 + if( length(consDefPos)> 0 ){ + consDefID = match(consDefPos, glPositionsSum) + #The coservation rows need to be pulled out and stores seperately + consDef = inputFASTA[consDefID] + inputFASTA = inputFASTA[-consDefID] + + seqIDs = names(inputFASTA) + numbSeqs = length(seqIDs) + posGermlines1 = germlinesInFile(seqIDs) + numbGermlines = sum(posGermlines1) + posGroups1 = groupsInFile(seqIDs) + numbGroups = sum(posGroups1) + if(numbGermlines==0){ + posGermlines = 2 + numbGermlines = 1 + } + } + + posGroups <- expandTillNext(posGroups1) + posGermlines <- expandTillNext(posGermlines1) + posGermlines[posGroups1] = 0 + names(posGermlines)[posGroups1] = names(posGroups)[posGroups1] + posInput = rep(TRUE,numbSeqs) + posInput[posGroups1 | posGermlines1] = FALSE + + matInput = matrix(NA, nrow=sum(posInput), ncol=2) + rownames(matInput) = seqIDs[posInput] + colnames(matInput) = c("Input","Germline") + + vecInputFASTA = unlist(inputFASTA) + matInput[,1] = vecInputFASTA[posInput] + matInput[,2] = vecInputFASTA[ which( names(inputFASTA)%in%paste(">",names(posGermlines)[posInput],sep="") )[ posGermlines[posInput]] ] + + germlines = posGermlines[posInput] + groups = posGroups[posInput] + + return( list("matInput"=matInput, "germlines"=germlines, "groups"=groups, "conservationDefinition"=consDef )) + } + + + # Replace leading and trailing dashes in the sequence + replaceLeadingTrailingDashes <- function(x,readEnd){ + iiGap = unlist(gregexpr("-",x[1])) + ggGap = unlist(gregexpr("-",x[2])) + #posToChange = intersect(iiGap,ggGap) + + + seqIn = replaceLeadingTrailingDashesHelper(x[1]) + seqGL = replaceLeadingTrailingDashesHelper(x[2]) + seqTemplate = rep('N',readEnd) + seqIn <- c(seqIn,seqTemplate[(length(seqIn)+1):readEnd]) + seqGL <- c(seqGL,seqTemplate[(length(seqGL)+1):readEnd]) +# if(posToChange!=-1){ +# seqIn[posToChange] = "-" +# seqGL[posToChange] = "-" +# } + + seqIn = c2s(seqIn[1:readEnd]) + seqGL = c2s(seqGL[1:readEnd]) + + lenGL = nchar(seqGL) + if(lenGL seqLen ) + trimmedSeq = substr(seqToTrim,1, ( (getCodonPos(seqLen)[1])-1 ) ) + + return(trimmedSeq) + } + + # Given a nuclotide position, returns the pos of the 3 nucs that made the codon + # e.g. nuc 86 is part of nucs 85,86,87 + getCodonPos <- function(nucPos){ + codonNum = (ceiling(nucPos/3))*3 + return( (codonNum-2):codonNum) + } + + # Given a nuclotide position, returns the codon number + # e.g. nuc 86 = codon 29 + getCodonNumb <- function(nucPos){ + return( ceiling(nucPos/3) ) + } + + # Given a codon, returns all the nuc positions that make the codon + getCodonNucs <- function(codonNumb){ + getCodonPos(codonNumb*3) + } + + computeCodonTable <- function(testID=1){ + + if(testID<=4){ + # Pre-compute every codons + intCounter = 1 + for(pOne in NUCLEOTIDES){ + for(pTwo in NUCLEOTIDES){ + for(pThree in NUCLEOTIDES){ + codon = paste(pOne,pTwo,pThree,sep="") + colnames(CODON_TABLE)[intCounter] = codon + intCounter = intCounter + 1 + CODON_TABLE[,codon] = mutationTypeOptimized(cbind(permutateAllCodon(codon),rep(codon,12))) + } + } + } + chars = c("N","A","C","G","T", "-") + for(a in chars){ + for(b in chars){ + for(c in chars){ + if(a=="N" | b=="N" | c=="N"){ + #cat(paste(a,b,c),sep="","\n") + CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) + } + } + } + } + + chars = c("-","A","C","G","T") + for(a in chars){ + for(b in chars){ + for(c in chars){ + if(a=="-" | b=="-" | c=="-"){ + #cat(paste(a,b,c),sep="","\n") + CODON_TABLE[,paste(a,b,c,sep="")] = rep(NA,12) + } + } + } + } + CODON_TABLE <<- as.matrix(CODON_TABLE) + } + } + + collapseClone <- function(vecInputSeqs,glSeq,readEnd,nonTerminalOnly=0){ + #print(length(vecInputSeqs)) + vecInputSeqs = unique(vecInputSeqs) + if(length(vecInputSeqs)==1){ + return( list( c(vecInputSeqs,glSeq), F) ) + }else{ + charInputSeqs <- sapply(vecInputSeqs, function(x){ + s2c(x)[1:readEnd] + }) + charGLSeq <- s2c(glSeq) + matClone <- sapply(1:readEnd, function(i){ + posNucs = unique(charInputSeqs[i,]) + posGL = charGLSeq[i] + error = FALSE + if(posGL=="-" & sum(!(posNucs%in%c("-","N")))==0 ){ + return(c("-",error)) + } + if(length(posNucs)==1) + return(c(posNucs[1],error)) + else{ + if("N"%in%posNucs){ + error=TRUE + } + if(sum(!posNucs[posNucs!="N"]%in%posGL)==0){ + return( c(posGL,error) ) + }else{ + #return( c(sample(posNucs[posNucs!="N"],1),error) ) + if(nonTerminalOnly==0){ + return( c(sample(charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL],1),error) ) + }else{ + posNucs = charInputSeqs[i,charInputSeqs[i,]!="N" & charInputSeqs[i,]!=posGL] + posNucsTable = table(posNucs) + if(sum(posNucsTable>1)==0){ + return( c(posGL,error) ) + }else{ + return( c(sample( posNucs[posNucs%in%names(posNucsTable)[posNucsTable>1]],1),error) ) + } + } + + } + } + }) + + + #print(length(vecInputSeqs)) + return(list(c(c2s(matClone[1,]),glSeq),"TRUE"%in%matClone[2,])) + } + } + + # Compute the expected for each sequence-germline pair + getExpectedIndividual <- function(matInput){ + if( any(grep("multicore",search())) ){ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_Exp = mclapply(1:dim(matInput)[1], function(x){ + computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) + }) + + ul_LisGLs_Exp = unlist(LisGLs_Exp) + return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) + }else{ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_Exp = lapply(1:dim(matInput)[1], function(x){ + computeExpected(LisGLs_Targeting[[x]],LisGLs_MutationTypes[[x]]) + }) + + ul_LisGLs_Exp = unlist(LisGLs_Exp) + return(matrix(ul_LisGLs_Exp,ncol=4,nrow=(length(ul_LisGLs_Exp)/4),byrow=T)) + + } + } + + # Compute mutabilities of sequence based on the tri-nucleotide model + computeMutabilities <- function(paramSeq){ + seqLen = nchar(paramSeq) + seqMutabilites = rep(NA,seqLen) + + gaplessSeq = gsub("-", "", paramSeq) + gaplessSeqLen = nchar(gaplessSeq) + gaplessSeqMutabilites = rep(NA,gaplessSeqLen) + + if(mutabilityModel!=5){ + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqMutabilites[pos] = + tapply( c( + getMutability( substr(subSeq,1,3), 3) , + getMutability( substr(subSeq,2,4), 2), + getMutability( substr(subSeq,3,5), 1) + ),rep(1:(gaplessSeqLen-2),3),mean,na.rm=TRUE + ) + #Pos 1 + subSeq = substr(gaplessSeq,1,3) + gaplessSeqMutabilites[1] = getMutability(subSeq , 1) + #Pos 2 + subSeq = substr(gaplessSeq,1,4) + gaplessSeqMutabilites[2] = mean( c( + getMutability( substr(subSeq,1,3), 2) , + getMutability( substr(subSeq,2,4), 1) + ),na.rm=T + ) + seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites + return(seqMutabilites) + }else{ + + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqMutabilites[pos] = sapply(subSeq,function(x){ getMutability5(x) }, simplify=T) + seqMutabilites[which(s2c(paramSeq)!="-")]<- gaplessSeqMutabilites + return(seqMutabilites) + } + + } + + # Returns the mutability of a triplet at a given position + getMutability <- function(codon, pos=1:3){ + triplets <- rownames(mutability) + mutability[ match(codon,triplets) ,pos] + } + + getMutability5 <- function(fivemer){ + return(mutability[fivemer]) + } + + # Returns the substitution probabilty + getTransistionProb <- function(nuc){ + substitution[nuc,] + } + + getTransistionProb5 <- function(fivemer){ + if(any(which(fivemer==colnames(substitution)))){ + return(substitution[,fivemer]) + }else{ + return(array(NA,4)) + } + } + + # Given a nuc, returns the other 3 nucs it can mutate to + canMutateTo <- function(nuc){ + NUCLEOTIDES[- which(NUCLEOTIDES==nuc)] + } + + # Given a nucleotide, returns the probabilty of other nucleotide it can mutate to + canMutateToProb <- function(nuc){ + substitution[nuc,canMutateTo(nuc)] + } + + # Compute targeting, based on precomputed mutatbility & substitution + computeTargeting <- function(param_strSeq,param_vecMutabilities){ + + if(substitutionModel!=5){ + vecSeq = s2c(param_strSeq) + matTargeting = sapply( 1:length(vecSeq), function(x) { param_vecMutabilities[x] * getTransistionProb(vecSeq[x]) } ) + #matTargeting = apply( rbind(vecSeq,param_vecMutabilities),2, function(x) { as.vector(as.numeric(x[2]) * getTransistionProb(x[1])) } ) + dimnames( matTargeting ) = list(NUCLEOTIDES,1:(length(vecSeq))) + return (matTargeting) + }else{ + + seqLen = nchar(param_strSeq) + seqsubstitution = matrix(NA,ncol=seqLen,nrow=4) + paramSeq <- param_strSeq + gaplessSeq = gsub("-", "", paramSeq) + gaplessSeqLen = nchar(gaplessSeq) + gaplessSeqSubstitution = matrix(NA,ncol=gaplessSeqLen,nrow=4) + + pos<- 3:(gaplessSeqLen) + subSeq = substr(rep(gaplessSeq,gaplessSeqLen-2),(pos-2),(pos+2)) + gaplessSeqSubstitution[,pos] = sapply(subSeq,function(x){ getTransistionProb5(x) }, simplify=T) + seqsubstitution[,which(s2c(paramSeq)!="-")]<- gaplessSeqSubstitution + #matTargeting <- param_vecMutabilities %*% seqsubstitution + matTargeting <- sweep(seqsubstitution,2,param_vecMutabilities,`*`) + dimnames( matTargeting ) = list(NUCLEOTIDES,1:(seqLen)) + return (matTargeting) + } + } + + # Compute the mutations types + computeMutationTypes <- function(param_strSeq){ + #cat(param_strSeq,"\n") + #vecSeq = trimToLastCodon(param_strSeq) + lenSeq = nchar(param_strSeq) + vecCodons = sapply({1:(lenSeq/3)}*3-2,function(x){substr(param_strSeq,x,x+2)}) + matMutationTypes = matrix( unlist(CODON_TABLE[,vecCodons]) ,ncol=lenSeq,nrow=4, byrow=F) + dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(ncol(matMutationTypes))) + return(matMutationTypes) + } + computeMutationTypesFast <- function(param_strSeq){ + matMutationTypes = matrix( CODON_TABLE[,param_strSeq] ,ncol=3,nrow=4, byrow=F) + #dimnames( matMutationTypes ) = list(NUCLEOTIDES,1:(length(vecSeq))) + return(matMutationTypes) + } + mutationTypeOptimized <- function( matOfCodons ){ + apply( matOfCodons,1,function(x){ mutationType(x[2],x[1]) } ) + } + + # Returns a vector of codons 1 mutation away from the given codon + permutateAllCodon <- function(codon){ + cCodon = s2c(codon) + matCodons = t(array(cCodon,dim=c(3,12))) + matCodons[1:4,1] = NUCLEOTIDES + matCodons[5:8,2] = NUCLEOTIDES + matCodons[9:12,3] = NUCLEOTIDES + apply(matCodons,1,c2s) + } + + # Given two codons, tells you if the mutation is R or S (based on your definition) + mutationType <- function(codonFrom,codonTo){ + if(testID==4){ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + mutationType = "S" + if( translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonFrom)) != translateAminoAcidToTraitChange(translateCodonToAminoAcid(codonTo)) ){ + mutationType = "R" + } + if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ + mutationType = "Stop" + } + return(mutationType) + } + }else if(testID==5){ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + if(codonFrom==codonTo){ + mutationType = "S" + }else{ + codonFrom = s2c(codonFrom) + codonTo = s2c(codonTo) + mutationType = "Stop" + nucOfI = codonFrom[which(codonTo!=codonFrom)] + if(nucOfI=="C"){ + mutationType = "R" + }else if(nucOfI=="G"){ + mutationType = "S" + } + } + return(mutationType) + } + }else{ + if( is.na(codonFrom) | is.na(codonTo) | is.na(translateCodonToAminoAcid(codonFrom)) | is.na(translateCodonToAminoAcid(codonTo)) ){ + return(NA) + }else{ + mutationType = "S" + if( translateCodonToAminoAcid(codonFrom) != translateCodonToAminoAcid(codonTo) ){ + mutationType = "R" + } + if(translateCodonToAminoAcid(codonTo)=="*" | translateCodonToAminoAcid(codonFrom)=="*"){ + mutationType = "Stop" + } + return(mutationType) + } + } + } + + + #given a mat of targeting & it's corresponding mutationtypes returns + #a vector of Exp_RCDR,Exp_SCDR,Exp_RFWR,Exp_RFWR + computeExpected <- function(paramTargeting,paramMutationTypes){ + # Replacements + RPos = which(paramMutationTypes=="R") + #FWR + Exp_R_FWR = sum(paramTargeting[ RPos[which(FWR_Nuc_Mat[RPos]==T)] ],na.rm=T) + #CDR + Exp_R_CDR = sum(paramTargeting[ RPos[which(CDR_Nuc_Mat[RPos]==T)] ],na.rm=T) + # Silents + SPos = which(paramMutationTypes=="S") + #FWR + Exp_S_FWR = sum(paramTargeting[ SPos[which(FWR_Nuc_Mat[SPos]==T)] ],na.rm=T) + #CDR + Exp_S_CDR = sum(paramTargeting[ SPos[which(CDR_Nuc_Mat[SPos]==T)] ],na.rm=T) + + return(c(Exp_R_CDR,Exp_S_CDR,Exp_R_FWR,Exp_S_FWR)) + } + + # Count the mutations in a sequence + # each mutation is treated independently + analyzeMutations2NucUri_website <- function( rev_in_matrix ){ + paramGL = rev_in_matrix[2,] + paramSeq = rev_in_matrix[1,] + + #Fill seq with GL seq if gapped + #if( any(paramSeq=="-") ){ + # gapPos_Seq = which(paramSeq=="-") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "-"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + + + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + + analyzeMutations2NucUri( matrix(c( paramGL, paramSeq ),2,length(paramGL),byrow=T) ) + + } + + #1 = GL + #2 = Seq + analyzeMutations2NucUri <- function( in_matrix=matrix(c(c("A","A","A","C","C","C"),c("A","G","G","C","C","A")),2,6,byrow=T) ){ + paramGL = in_matrix[2,] + paramSeq = in_matrix[1,] + paramSeqUri = paramGL + #mutations = apply(rbind(paramGL,paramSeq), 2, function(x){!x[1]==x[2]}) + mutations_val = paramGL != paramSeq + if(any(mutations_val)){ + mutationPos = {1:length(mutations_val)}[mutations_val] + mutationPos = mutationPos[sapply(mutationPos, function(x){!any(paramSeq[getCodonPos(x)]=="N")})] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + if(any(mutationPos)){ + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + } + if(any(!is.na(mutationInfo))){ + return(mutationInfo[!is.na(mutationInfo)]) + }else{ + return(NA) + } + + + }else{ + return (NA) + } + } + + processNucMutations2 <- function(mu){ + if(!is.na(mu)){ + #R + if(any(mu=="R")){ + Rs = mu[mu=="R"] + nucNumbs = as.numeric(names(Rs)) + R_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) + R_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) + }else{ + R_CDR = 0 + R_FWR = 0 + } + + #S + if(any(mu=="S")){ + Ss = mu[mu=="S"] + nucNumbs = as.numeric(names(Ss)) + S_CDR = sum(as.integer(CDR_Nuc[nucNumbs]),na.rm=T) + S_FWR = sum(as.integer(FWR_Nuc[nucNumbs]),na.rm=T) + }else{ + S_CDR = 0 + S_FWR = 0 + } + + + retVec = c(R_CDR,S_CDR,R_FWR,S_FWR) + retVec[is.na(retVec)]=0 + return(retVec) + }else{ + return(rep(0,4)) + } + } + + + ## Z-score Test + computeZScore <- function(mat, test="Focused"){ + matRes <- matrix(NA,ncol=2,nrow=(nrow(mat))) + if(test=="Focused"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(1,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(3,2,4)],P),1,function(x){x[4]*(sum(x[1:3]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + if(test=="Local"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(1,2)],P),1,function(x){x[3]*(sum(x[1:2]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}) + R_mean = apply(cbind(mat[,c(3,4)],P),1,function(x){x[3]*(sum(x[1:2]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + if(test=="Imbalanced"){ + #Z_Focused_CDR + #P_Denom = sum( mat[1,c(5,6,8)], na.rm=T ) + P = apply(mat[,5:8],1,function(x){((x[1]+x[2])/sum(x))}) + R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,1] = (mat[,1]-R_mean)/R_sd + + #Z_Focused_FWR + #P_Denom = sum( mat[1,c(7,6,8)], na.rm=T ) + P = apply(mat[,5:8],1,function(x){((x[3]+x[4])/sum(x))}) + R_mean = apply(cbind(mat[,1:4],P),1,function(x){x[5]*(sum(x[1:4]))}) + R_sd=sqrt(R_mean*(1-P)) + matRes[,2] = (mat[,3]-R_mean)/R_sd + } + + matRes[is.nan(matRes)] = NA + return(matRes) + } + + # Return a p-value for a z-score + z2p <- function(z){ + p=NA + if( !is.nan(z) && !is.na(z)){ + if(z>0){ + p = (1 - pnorm(z,0,1)) + } else if(z<0){ + p = (-1 * pnorm(z,0,1)) + } else{ + p = 0.5 + } + }else{ + p = NA + } + return(p) + } + + + ## Bayesian Test + + # Fitted parameter for the bayesian framework +BAYESIAN_FITTED<-c(0.407277142798302, 0.554007336744485, 0.63777155771234, 0.693989162719009, 0.735450014674917, 0.767972534429806, 0.794557287143399, 0.816906816601605, 0.83606796225341, 0.852729446430296, 0.867370424541641, 0.880339760590323, 0.891900995024999, 0.902259181289864, 0.911577919359,0.919990301665853, 0.927606458124537, 0.934518806350661, 0.940805863754375, 0.946534836475715, 0.951763691199255, 0.95654428191308, 0.960920179487397, 0.964930893680829, 0.968611312149038, 0.971992459313836, 0.975102110004818, 0.977964943023096, 0.980603428208439, 0.983037660179428, 0.985285800977406, 0.987364285326685, 0.989288037855441, 0.991070478823525, 0.992723699729969, 0.994259575477392, 0.995687688867975, 0.997017365051493, 0.998257085153047, 0.999414558305388, 1.00049681357804, 1.00151036237481, 1.00246080204981, 1.00335370751909, 1.0041939329768, 1.0049859393417, 1.00573382091263, 1.00644127217376, 1.00711179729107, 1.00774845526417, 1.00835412715854, 1.00893143010366, 1.00948275846309, 1.01001030293661, 1.01051606798079, 1.01100188771288, 1.01146944044216, 1.01192026195449, 1.01235575766094, 1.01277721370986) + CONST_i <- sort(c(((2^(seq(-39,0,length.out=201)))/2)[1:200],(c(0:11,13:99)+0.5)/100,1-(2^(seq(-39,0,length.out=201)))/2)) + + # Given x, M & p, returns a pdf + calculate_bayes <- function ( x=3, N=10, p=0.33, + i=CONST_i, + max_sigma=20,length_sigma=4001 + ){ + if(!0%in%N){ + G <- max(length(x),length(N),length(p)) + x=array(x,dim=G) + N=array(N,dim=G) + p=array(p,dim=G) + sigma_s<-seq(-max_sigma,max_sigma,length.out=length_sigma) + sigma_1<-log({i/{1-i}}/{p/{1-p}}) + index<-min(N,60) + y<-dbeta(i,x+BAYESIAN_FITTED[index],N+BAYESIAN_FITTED[index]-x)*(1-p)*p*exp(sigma_1)/({1-p}^2+2*p*{1-p}*exp(sigma_1)+{p^2}*exp(2*sigma_1)) + if(!sum(is.na(y))){ + tmp<-approx(sigma_1,y,sigma_s)$y + tmp/sum(tmp)/{2*max_sigma/{length_sigma-1}} + }else{ + return(NA) + } + }else{ + return(NA) + } + } + # Given a mat of observed & expected, return a list of CDR & FWR pdf for selection + computeBayesianScore <- function(mat, test="Focused", max_sigma=20,length_sigma=4001){ + flagOneSeq = F + if(nrow(mat)==1){ + mat=rbind(mat,mat) + flagOneSeq = T + } + if(test=="Focused"){ + #CDR + P = c(apply(mat[,c(5,6,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(1,2,4)],1,function(x){(sum(x))}),0) + X = c(mat[,1],0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(7,6,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(3,2,4)],1,function(x){(sum(x))}),0) + X = c(mat[,3],0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="Local"){ + #CDR + P = c(apply(mat[,c(5,6)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(1,2)],1,function(x){(sum(x))}),0) + X = c(mat[,1],0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(7,8)],1,function(x){(x[1]/sum(x))}),0.5) + N = c(apply(mat[,c(3,4)],1,function(x){(sum(x))}),0) + X = c(mat[,3],0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="Imbalanced"){ + #CDR + P = c(apply(mat[,c(5:8)],1,function(x){((x[1]+x[2])/sum(x))}),0.5) + N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(1:2)],1,function(x){(sum(x))}),0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(5:8)],1,function(x){((x[3]+x[4])/sum(x))}),0.5) + N = c(apply(mat[,c(1:4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(3:4)],1,function(x){(sum(x))}),0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(test=="ImbalancedSilent"){ + #CDR + P = c(apply(mat[,c(6,8)],1,function(x){((x[1])/sum(x))}),0.5) + N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(2,4)],1,function(x){(x[1])}),0) + bayesCDR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesCDR = bayesCDR[-length(bayesCDR)] + + #FWR + P = c(apply(mat[,c(6,8)],1,function(x){((x[2])/sum(x))}),0.5) + N = c(apply(mat[,c(2,4)],1,function(x){(sum(x))}),0) + X = c(apply(mat[,c(2,4)],1,function(x){(x[2])}),0) + bayesFWR = apply(cbind(X,N,P),1,function(x){calculate_bayes(x=x[1],N=x[2],p=x[3],max_sigma=max_sigma,length_sigma=length_sigma)}) + bayesFWR = bayesFWR[-length(bayesFWR)] + } + + if(flagOneSeq==T){ + bayesCDR = bayesCDR[1] + bayesFWR = bayesFWR[1] + } + return( list("CDR"=bayesCDR, "FWR"=bayesFWR) ) + } + + ##Covolution + break2chunks<-function(G=1000){ + base<-2^round(log(sqrt(G),2),0) + return(c(rep(base,floor(G/base)-1),base+G-(floor(G/base)*base))) + } + + PowersOfTwo <- function(G=100){ + exponents <- array() + i = 0 + while(G > 0){ + i=i+1 + exponents[i] <- floor( log2(G) ) + G <- G-2^exponents[i] + } + return(exponents) + } + + convolutionPowersOfTwo <- function( cons, length_sigma=4001 ){ + G = ncol(cons) + if(G>1){ + for(gen in log(G,2):1){ + ll<-seq(from=2,to=2^gen,by=2) + sapply(ll,function(l){cons[,l/2]<<-weighted_conv(cons[,l],cons[,l-1],length_sigma=length_sigma)}) + } + } + return( cons[,1] ) + } + + convolutionPowersOfTwoByTwos <- function( cons, length_sigma=4001,G=1 ){ + if(length(ncol(cons))) G<-ncol(cons) + groups <- PowersOfTwo(G) + matG <- matrix(NA, ncol=length(groups), nrow=length(cons)/G ) + startIndex = 1 + for( i in 1:length(groups) ){ + stopIndex <- 2^groups[i] + startIndex - 1 + if(stopIndex!=startIndex){ + matG[,i] <- convolutionPowersOfTwo( cons[,startIndex:stopIndex], length_sigma=length_sigma ) + startIndex = stopIndex + 1 + } + else { + if(G>1) matG[,i] <- cons[,startIndex:stopIndex] + else matG[,i] <- cons + #startIndex = stopIndex + 1 + } + } + return( list( matG, groups ) ) + } + + weighted_conv<-function(x,y,w=1,m=100,length_sigma=4001){ + lx<-length(x) + ly<-length(y) + if({lx1){ + while( i1 & Length_Postrior<=Threshold){ + cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) + listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) + y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + }else if(Length_Postrior==1) return(listPosteriors[[1]]) + else if(Length_Postrior==0) return(NA) + else { + cons = matrix(unlist(listPosteriors),length(listPosteriors[[1]]),length(listPosteriors)) + y = fastConv(cons,max_sigma=max_sigma, length_sigma=length_sigma ) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + } + } + + fastConv<-function(cons, max_sigma=20, length_sigma=4001){ + chunks<-break2chunks(G=ncol(cons)) + if(ncol(cons)==3) chunks<-2:1 + index_chunks_end <- cumsum(chunks) + index_chunks_start <- c(1,index_chunks_end[-length(index_chunks_end)]+1) + index_chunks <- cbind(index_chunks_start,index_chunks_end) + + case <- sum(chunks!=chunks[1]) + if(case==1) End <- max(1,((length(index_chunks)/2)-1)) + else End <- max(1,((length(index_chunks)/2))) + + firsts <- sapply(1:End,function(i){ + indexes<-index_chunks[i,1]:index_chunks[i,2] + convolutionPowersOfTwoByTwos(cons[ ,indexes])[[1]] + }) + if(case==0){ + result<-calculate_bayesGHelper( convolutionPowersOfTwoByTwos(firsts) ) + }else if(case==1){ + last<-list(calculate_bayesGHelper( + convolutionPowersOfTwoByTwos( cons[ ,index_chunks[length(index_chunks)/2,1]:index_chunks[length(index_chunks)/2,2]] ) + ),0) + result_first<-calculate_bayesGHelper(convolutionPowersOfTwoByTwos(firsts)) + result<-calculate_bayesGHelper( + list( + cbind( + result_first,last[[1]]), + c(log(index_chunks_end[length(index_chunks)/2-1],2),log(index_chunks[length(index_chunks)/2,2]-index_chunks[length(index_chunks)/2,1]+1,2)) + ) + ) + } + return(as.vector(result)) + } + + # Computes the 95% CI for a pdf + calcBayesCI <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ + if(length(Pdf)!=length_sigma) return(NA) + sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) + cdf = cumsum(Pdf) + cdf = cdf/cdf[length(cdf)] + return( c(sigma_s[findInterval(low,cdf)-1] , sigma_s[findInterval(up,cdf)]) ) + } + + # Computes a mean for a pdf + calcBayesMean <- function(Pdf,max_sigma=20,length_sigma=4001){ + if(length(Pdf)!=length_sigma) return(NA) + sigma_s=seq(-max_sigma,max_sigma,length.out=length_sigma) + norm = {length_sigma-1}/2/max_sigma + return( (Pdf%*%sigma_s/norm) ) + } + + # Returns the mean, and the 95% CI for a pdf + calcBayesOutputInfo <- function(Pdf,low=0.025,up=0.975,max_sigma=20, length_sigma=4001){ + if(is.na(Pdf)) + return(rep(NA,3)) + bCI = calcBayesCI(Pdf=Pdf,low=low,up=up,max_sigma=max_sigma,length_sigma=length_sigma) + bMean = calcBayesMean(Pdf=Pdf,max_sigma=max_sigma,length_sigma=length_sigma) + return(c(bMean, bCI)) + } + + # Computes the p-value of a pdf + computeSigmaP <- function(Pdf, length_sigma=4001, max_sigma=20){ + if(length(Pdf)>1){ + norm = {length_sigma-1}/2/max_sigma + pVal = {sum(Pdf[1:{{length_sigma-1}/2}]) + Pdf[{{length_sigma+1}/2}]/2}/norm + if(pVal>0.5){ + pVal = pVal-1 + } + return(pVal) + }else{ + return(NA) + } + } + + # Compute p-value of two distributions + compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ + #print(c(length(dens1),length(dens2))) + if(length(dens1)>1 & length(dens2)>1 ){ + dens1<-dens1/sum(dens1) + dens2<-dens2/sum(dens2) + cum2 <- cumsum(dens2)-dens2/2 + tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) + #print(tmp) + if(tmp>0.5)tmp<-tmp-1 + return( tmp ) + } + else { + return(NA) + } + #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) + } + + # get number of seqeunces contributing to the sigma (i.e. seqeunces with mutations) + numberOfSeqsWithMutations <- function(matMutations,test=1){ + if(test==4)test=2 + cdrSeqs <- 0 + fwrSeqs <- 0 + if(test==1){#focused + cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2,4)]) }) + fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4,2)]) }) + if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) + if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) + } + if(test==2){#local + cdrMutations <- apply(matMutations, 1, function(x){ sum(x[c(1,2)]) }) + fwrMutations <- apply(matMutations, 1, function(x){ sum(x[c(3,4)]) }) + if( any(which(cdrMutations>0)) ) cdrSeqs <- sum(cdrMutations>0) + if( any(which(fwrMutations>0)) ) fwrSeqs <- sum(fwrMutations>0) + } + return(c("CDR"=cdrSeqs, "FWR"=fwrSeqs)) +} + + + +shadeColor <- function(sigmaVal=NA,pVal=NA){ + if(is.na(sigmaVal) & is.na(pVal)) return(NA) + if(is.na(sigmaVal) & !is.na(pVal)) sigmaVal=sign(pVal) + if(is.na(pVal) || pVal==1 || pVal==0){ + returnColor = "#FFFFFF"; + }else{ + colVal=abs(pVal); + + if(sigmaVal<0){ + if(colVal>0.1) + returnColor = "#CCFFCC"; + if(colVal<=0.1) + returnColor = "#99FF99"; + if(colVal<=0.050) + returnColor = "#66FF66"; + if(colVal<=0.010) + returnColor = "#33FF33"; + if(colVal<=0.005) + returnColor = "#00FF00"; + + }else{ + if(colVal>0.1) + returnColor = "#FFCCCC"; + if(colVal<=0.1) + returnColor = "#FF9999"; + if(colVal<=0.05) + returnColor = "#FF6666"; + if(colVal<=0.01) + returnColor = "#FF3333"; + if(colVal<0.005) + returnColor = "#FF0000"; + } + } + + return(returnColor) +} + + + +plotHelp <- function(xfrac=0.05,yfrac=0.05,log=FALSE){ + if(!log){ + x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac + y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac + }else { + if(log==2){ + x = par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac + y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) + } + if(log==1){ + x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) + y = par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac + } + if(log==3){ + x = 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac) + y = 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac) + } + } + return(c("x"=x,"y"=y)) +} + +# SHMulation + + # Based on targeting, introduce a single mutation & then update the targeting + oneMutation <- function(){ + # Pick a postion + mutation + posMutation = sample(1:(seqGermlineLen*4),1,replace=F,prob=as.vector(seqTargeting)) + posNucNumb = ceiling(posMutation/4) # Nucleotide number + posNucKind = 4 - ( (posNucNumb*4) - posMutation ) # Nuc the position mutates to + + #mutate the simulation sequence + seqSimVec <- s2c(seqSim) + seqSimVec[posNucNumb] <- NUCLEOTIDES[posNucKind] + seqSim <<- c2s(seqSimVec) + + #update Mutability, Targeting & MutationsTypes + updateMutabilityNTargeting(posNucNumb) + + #return(c(posNucNumb,NUCLEOTIDES[posNucKind])) + return(posNucNumb) + } + + updateMutabilityNTargeting <- function(position){ + min_i<-max((position-2),1) + max_i<-min((position+2),nchar(seqSim)) + min_ii<-min(min_i,3) + + #mutability - update locally + seqMutability[(min_i):(max_i)] <<- computeMutabilities(substr(seqSim,position-4,position+4))[(min_ii):(max_i-min_i+min_ii)] + + + #targeting - compute locally + seqTargeting[,min_i:max_i] <<- computeTargeting(substr(seqSim,min_i,max_i),seqMutability[min_i:max_i]) + seqTargeting[is.na(seqTargeting)] <<- 0 + #mutCodonPos = getCodonPos(position) + mutCodonPos = seq(getCodonPos(min_i)[1],getCodonPos(max_i)[3]) + #cat(mutCodonPos,"\n") + mutTypeCodon = getCodonPos(position) + seqMutationTypes[,mutTypeCodon] <<- computeMutationTypesFast( substr(seqSim,mutTypeCodon[1],mutTypeCodon[3]) ) + # Stop = 0 + if(any(seqMutationTypes[,mutCodonPos]=="Stop",na.rm=T )){ + seqTargeting[,mutCodonPos][seqMutationTypes[,mutCodonPos]=="Stop"] <<- 0 + } + + + #Selection + selectedPos = (min_i*4-4)+(which(seqMutationTypes[,min_i:max_i]=="R")) + # CDR + selectedCDR = selectedPos[which(matCDR[selectedPos]==T)] + seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR] * exp(selCDR) + seqTargeting[selectedCDR] <<- seqTargeting[selectedCDR]/baseLineCDR_K + + # FWR + selectedFWR = selectedPos[which(matFWR[selectedPos]==T)] + seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR] * exp(selFWR) + seqTargeting[selectedFWR] <<- seqTargeting[selectedFWR]/baseLineFWR_K + + } + + + + # Validate the mutation: if the mutation has not been sampled before validate it, else discard it. + validateMutation <- function(){ + if( !(mutatedPos%in%mutatedPositions) ){ # if it's a new mutation + uniqueMutationsIntroduced <<- uniqueMutationsIntroduced + 1 + mutatedPositions[uniqueMutationsIntroduced] <<- mutatedPos + }else{ + if(substr(seqSim,mutatedPos,mutatedPos)==substr(seqGermline,mutatedPos,mutatedPos)){ # back to germline mutation + mutatedPositions <<- mutatedPositions[-which(mutatedPositions==mutatedPos)] + uniqueMutationsIntroduced <<- uniqueMutationsIntroduced - 1 + } + } + } + + + + # Places text (labels) at normalized coordinates + myaxis <- function(xfrac=0.05,yfrac=0.05,log=FALSE,w="text",cex=1,adj=1,thecol="black"){ + par(xpd=TRUE) + if(!log) + text(par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac,par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac,w,cex=cex,adj=adj,col=thecol) + else { + if(log==2) + text( + par()$usr[1]-(par()$usr[2]-par()$usr[1])*xfrac, + 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), + w,cex=cex,adj=adj,col=thecol) + if(log==1) + text( + 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), + par()$usr[4]+(par()$usr[4]-par()$usr[3])*yfrac, + w,cex=cex,adj=adj,col=thecol) + if(log==3) + text( + 10^((par()$usr[1])-((par()$usr[2])-(par()$usr[1]))*xfrac), + 10^((par()$usr[4])+((par()$usr[4])-(par()$usr[3]))*yfrac), + w,cex=cex,adj=adj,col=thecol) + } + par(xpd=FALSE) + } + + + + # Count the mutations in a sequence + analyzeMutations <- function( inputMatrixIndex, model = 0 , multipleMutation=0, seqWithStops=0){ + + paramGL = s2c(matInput[inputMatrixIndex,2]) + paramSeq = s2c(matInput[inputMatrixIndex,1]) + + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + mutations_val = paramGL != paramSeq + + if(any(mutations_val)){ + mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + codonSeqWhole = paramSeq[pos_array] + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + + mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfoWhole) = mutationPos + + mutationInfo <- mutationInfo[!is.na(mutationInfo)] + mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] + + if(any(!is.na(mutationInfo))){ + + #Filter based on Stop (at the codon level) + if(seqWithStops==1){ + nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) + mutationInfo = mutationInfo[nucleotidesAtStopCodons] + mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] + }else{ + countStops = sum(mutationInfoWhole=="Stop") + if(seqWithStops==2 & countStops==0) mutationInfo = NA + if(seqWithStops==3 & countStops>0) mutationInfo = NA + } + + if(any(!is.na(mutationInfo))){ + #Filter mutations based on multipleMutation + if(multipleMutation==1 & !is.na(mutationInfo)){ + mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) + tableMutationCodons <- table(mutationCodons) + codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) + if(any(codonsWithMultipleMutations)){ + #remove the nucleotide mutations in the codons with multiple mutations + mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] + #replace those codons with Ns in the input sequence + paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" + matInput[inputMatrixIndex,1] <<- c2s(paramSeq) + } + } + + #Filter mutations based on the model + if(any(mutationInfo)==T | is.na(any(mutationInfo))){ + + if(model==1 & !is.na(mutationInfo)){ + mutationInfo <- mutationInfo[mutationInfo=="S"] + } + if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(mutationInfo) + else return(NA) + }else{ + return(NA) + } + }else{ + return(NA) + } + + + }else{ + return(NA) + } + + + }else{ + return (NA) + } + } + + analyzeMutationsFixed <- function( inputArray, model = 0 , multipleMutation=0, seqWithStops=0){ + + paramGL = s2c(inputArray[2]) + paramSeq = s2c(inputArray[1]) + inputSeq <- inputArray[1] + #if( any(paramSeq=="N") ){ + # gapPos_Seq = which(paramSeq=="N") + # gapPos_Seq_ToReplace = gapPos_Seq[paramGL[gapPos_Seq] != "N"] + # paramSeq[gapPos_Seq_ToReplace] = paramGL[gapPos_Seq_ToReplace] + #} + mutations_val = paramGL != paramSeq + + if(any(mutations_val)){ + mutationPos = which(mutations_val)#{1:length(mutations_val)}[mutations_val] + length_mutations =length(mutationPos) + mutationInfo = rep(NA,length_mutations) + + pos<- mutationPos + pos_array<-array(sapply(pos,getCodonPos)) + codonGL = paramGL[pos_array] + codonSeqWhole = paramSeq[pos_array] + codonSeq = sapply(pos,function(x){ + seqP = paramGL[getCodonPos(x)] + muCodonPos = {x-1}%%3+1 + seqP[muCodonPos] = paramSeq[x] + return(seqP) + }) + GLcodons = apply(matrix(codonGL,length_mutations,3,byrow=TRUE),1,c2s) + SeqcodonsWhole = apply(matrix(codonSeqWhole,length_mutations,3,byrow=TRUE),1,c2s) + Seqcodons = apply(codonSeq,2,c2s) + + mutationInfo = apply(rbind(GLcodons , Seqcodons),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfo) = mutationPos + + mutationInfoWhole = apply(rbind(GLcodons , SeqcodonsWhole),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + names(mutationInfoWhole) = mutationPos + + mutationInfo <- mutationInfo[!is.na(mutationInfo)] + mutationInfoWhole <- mutationInfoWhole[!is.na(mutationInfoWhole)] + + if(any(!is.na(mutationInfo))){ + + #Filter based on Stop (at the codon level) + if(seqWithStops==1){ + nucleotidesAtStopCodons = names(mutationInfoWhole[mutationInfoWhole!="Stop"]) + mutationInfo = mutationInfo[nucleotidesAtStopCodons] + mutationInfoWhole = mutationInfo[nucleotidesAtStopCodons] + }else{ + countStops = sum(mutationInfoWhole=="Stop") + if(seqWithStops==2 & countStops==0) mutationInfo = NA + if(seqWithStops==3 & countStops>0) mutationInfo = NA + } + + if(any(!is.na(mutationInfo))){ + #Filter mutations based on multipleMutation + if(multipleMutation==1 & !is.na(mutationInfo)){ + mutationCodons = getCodonNumb(as.numeric(names(mutationInfoWhole))) + tableMutationCodons <- table(mutationCodons) + codonsWithMultipleMutations <- as.numeric(names(tableMutationCodons[tableMutationCodons>1])) + if(any(codonsWithMultipleMutations)){ + #remove the nucleotide mutations in the codons with multiple mutations + mutationInfo <- mutationInfo[!(mutationCodons %in% codonsWithMultipleMutations)] + #replace those codons with Ns in the input sequence + paramSeq[unlist(lapply(codonsWithMultipleMutations, getCodonNucs))] = "N" + #matInput[inputMatrixIndex,1] <<- c2s(paramSeq) + inputSeq <- c2s(paramSeq) + } + } + + #Filter mutations based on the model + if(any(mutationInfo)==T | is.na(any(mutationInfo))){ + + if(model==1 & !is.na(mutationInfo)){ + mutationInfo <- mutationInfo[mutationInfo=="S"] + } + if(any(mutationInfo)==T | is.na(any(mutationInfo))) return(list(mutationInfo,inputSeq)) + else return(list(NA,inputSeq)) + }else{ + return(list(NA,inputSeq)) + } + }else{ + return(list(NA,inputSeq)) + } + + + }else{ + return(list(NA,inputSeq)) + } + + + }else{ + return (list(NA,inputSeq)) + } + } + + # triMutability Background Count + buildMutabilityModel <- function( inputMatrixIndex, model=0 , multipleMutation=0, seqWithStops=0, stopMutations=0){ + + #rowOrigMatInput = matInput[inputMatrixIndex,] + seqGL = gsub("-", "", matInput[inputMatrixIndex,2]) + seqInput = gsub("-", "", matInput[inputMatrixIndex,1]) + #matInput[inputMatrixIndex,] <<- cbind(seqInput,seqGL) + tempInput <- cbind(seqInput,seqGL) + seqLength = nchar(seqGL) + list_analyzeMutationsFixed<- analyzeMutationsFixed(tempInput, model, multipleMutation, seqWithStops) + mutationCount <- list_analyzeMutationsFixed[[1]] + seqInput <- list_analyzeMutationsFixed[[2]] + BackgroundMatrix = mutabilityMatrix + MutationMatrix = mutabilityMatrix + MutationCountMatrix = mutabilityMatrix + if(!is.na(mutationCount)){ + if((stopMutations==0 & model==0) | (stopMutations==1 & (sum(mutationCount=="Stop")0)) ){ + + fivermerStartPos = 1:(seqLength-4) + fivemerLength <- length(fivermerStartPos) + fivemerGL <- substr(rep(seqGL,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) + fivemerSeq <- substr(rep(seqInput,length(fivermerStartPos)),(fivermerStartPos),(fivermerStartPos+4)) + + #Background + for(fivemerIndex in 1:fivemerLength){ + fivemer = fivemerGL[fivemerIndex] + if(!any(grep("N",fivemer))){ + fivemerCodonPos = fivemerCodon(fivemerIndex) + fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + fivemerReadingFrameCodonInputSeq = substr(fivemerSeq[fivemerIndex],fivemerCodonPos[1],fivemerCodonPos[3]) + + # All mutations model + #if(!any(grep("N",fivemerReadingFrameCodon))){ + if(model==0){ + if(stopMutations==0){ + if(!any(grep("N",fivemerReadingFrameCodonInputSeq))) + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + 1) + }else{ + if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" ){ + positionWithinCodon = which(fivemerCodonPos==3)#positionsWithinCodon[(fivemerCodonPos[1]%%3)+1] + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probNonStopMutations[fivemerReadingFrameCodon,positionWithinCodon]) + } + } + }else{ # Only silent mutations + if( !any(grep("N",fivemerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(fivemerReadingFrameCodon)!="*" & translateCodonToAminoAcid(fivemerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(fivemerReadingFrameCodon)){ + positionWithinCodon = which(fivemerCodonPos==3) + BackgroundMatrix[fivemer] <- (BackgroundMatrix[fivemer] + probSMutations[fivemerReadingFrameCodon,positionWithinCodon]) + } + } + #} + } + } + + #Mutations + if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] + if(model==1) mutationCount = mutationCount[mutationCount=="S"] + mutationPositions = as.numeric(names(mutationCount)) + mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] + mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] + countMutations = 0 + for(mutationPosition in mutationPositions){ + fivemerIndex = mutationPosition-2 + fivemer = fivemerSeq[fivemerIndex] + GLfivemer = fivemerGL[fivemerIndex] + fivemerCodonPos = fivemerCodon(fivemerIndex) + fivemerReadingFrameCodon = substr(fivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + fivemerReadingFrameCodonGL = substr(GLfivemer,fivemerCodonPos[1],fivemerCodonPos[3]) + if(!any(grep("N",fivemer)) & !any(grep("N",GLfivemer))){ + if(model==0){ + countMutations = countMutations + 1 + MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + 1) + MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) + }else{ + if( translateCodonToAminoAcid(fivemerReadingFrameCodonGL)!="*" ){ + countMutations = countMutations + 1 + positionWithinCodon = which(fivemerCodonPos==3) + glNuc = substr(fivemerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) + inputNuc = substr(fivemerReadingFrameCodon,positionWithinCodon,positionWithinCodon) + MutationMatrix[GLfivemer] <- (MutationMatrix[GLfivemer] + substitution[glNuc,inputNuc]) + MutationCountMatrix[GLfivemer] <- (MutationCountMatrix[GLfivemer] + 1) + } + } + } + } + + seqMutability = MutationMatrix/BackgroundMatrix + seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) + #cat(inputMatrixIndex,"\t",countMutations,"\n") + return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) + + } + } + + } + + #Returns the codon position containing the middle nucleotide + fivemerCodon <- function(fivemerIndex){ + codonPos = list(2:4,1:3,3:5) + fivemerType = fivemerIndex%%3 + return(codonPos[[fivemerType+1]]) + } + + #returns probability values for one mutation in codons resulting in R, S or Stop + probMutations <- function(typeOfMutation){ + matMutationProb <- matrix(0,ncol=3,nrow=125,dimnames=list(words(alphabet = c(NUCLEOTIDES,"N"), length=3),c(1:3))) + for(codon in rownames(matMutationProb)){ + if( !any(grep("N",codon)) ){ + for(muPos in 1:3){ + matCodon = matrix(rep(s2c(codon),3),nrow=3,ncol=3,byrow=T) + glNuc = matCodon[1,muPos] + matCodon[,muPos] = canMutateTo(glNuc) + substitutionRate = substitution[glNuc,matCodon[,muPos]] + typeOfMutations = apply(rbind(rep(codon,3),apply(matCodon,1,c2s)),2,function(x){mutationType(c2s(x[1]),c2s(x[2]))}) + matMutationProb[codon,muPos] <- sum(substitutionRate[typeOfMutations==typeOfMutation]) + } + } + } + + return(matMutationProb) + } + + + + +#Mapping Trinucleotides to fivemers +mapTriToFivemer <- function(triMutability=triMutability_Literature_Human){ + rownames(triMutability) <- triMutability_Names + Fivemer<-rep(NA,1024) + names(Fivemer)<-words(alphabet=NUCLEOTIDES,length=5) + Fivemer<-sapply(names(Fivemer),function(Word)return(sum( c(triMutability[substring(Word,3,5),1],triMutability[substring(Word,2,4),2],triMutability[substring(Word,1,3),3]),na.rm=TRUE))) + Fivemer<-Fivemer/sum(Fivemer) + return(Fivemer) +} + +collapseFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)weighted.mean(Fivemer[i],Weights[i],na.rm=TRUE)) +} + + + +CountFivemerToTri<-function(Fivemer,Weights=MutabilityWeights,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)sum(Weights[i],na.rm=TRUE)) +} + +#Uses the real counts of the mutated fivemers +CountFivemerToTri2<-function(Fivemer,Counts=MutabilityCounts,position=1,NUC="A"){ + Indices<-substring(names(Fivemer),3,3)==NUC + Factors<-substring(names(Fivemer[Indices]),(4-position),(6-position)) + tapply(which(Indices),Factors,function(i)sum(Counts[i],na.rm=TRUE)) +} + +bootstrap<-function(x=c(33,12,21),M=10000,alpha=0.05){ +N<-sum(x) +if(N){ +p<-x/N +k<-length(x)-1 +tmp<-rmultinom(M, size = N, prob=p) +tmp_p<-apply(tmp,2,function(y)y/N) +(apply(tmp_p,1,function(y)quantile(y,c(alpha/2/k,1-alpha/2/k)))) +} +else return(matrix(0,2,length(x))) +} + + + + +bootstrap2<-function(x=c(33,12,21),n=10,M=10000,alpha=0.05){ + +N<-sum(x) +k<-length(x) +y<-rep(1:k,x) +tmp<-sapply(1:M,function(i)sample(y,n)) +if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i)))/n +if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i)))/n +(apply(tmp_p,1,function(z)quantile(z,c(alpha/2/(k-1),1-alpha/2/(k-1))))) +} + + + +p_value<-function(x=c(33,12,21),M=100000,x_obs=c(2,5,3)){ +n=sum(x_obs) +N<-sum(x) +k<-length(x) +y<-rep(1:k,x) +tmp<-sapply(1:M,function(i)sample(y,n)) +if(n>1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[,j]==i))) +if(n==1)tmp_p<-sapply(1:M,function(j)sapply(1:k,function(i)sum(tmp[j]==i))) +tmp<-rbind(sapply(1:3,function(i)sum(tmp_p[i,]>=x_obs[i])/M), +sapply(1:3,function(i)sum(tmp_p[i,]<=x_obs[i])/M)) +sapply(1:3,function(i){if(tmp[1,i]>=tmp[2,i])return(-tmp[2,i])else return(tmp[1,i])}) +} + +#"D:\\Sequences\\IMGT Germlines\\Human_SNPless_IGHJ.FASTA" +# Remove SNPs from IMGT germline segment alleles +generateUnambiguousRepertoire <- function(repertoireInFile,repertoireOutFile){ + repertoireIn <- read.fasta(repertoireInFile, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) + alleleNames <- sapply(names(repertoireIn),function(x)strsplit(x,"|",fixed=TRUE)[[1]][2]) + SNPs <- tapply(repertoireIn,sapply(alleleNames,function(x)strsplit(x,"*",fixed=TRUE)[[1]][1]),function(x){ + Indices<-NULL + for(i in 1:length(x)){ + firstSeq = s2c(x[[1]]) + iSeq = s2c(x[[i]]) + Indices<-c(Indices,which(firstSeq[1:320]!=iSeq[1:320] & firstSeq[1:320]!="." & iSeq[1:320]!="." )) + } + return(sort(unique(Indices))) + }) + repertoireOut <- repertoireIn + repertoireOut <- lapply(names(repertoireOut), function(repertoireName){ + alleleName <- strsplit(repertoireName,"|",fixed=TRUE)[[1]][2] + geneSegmentName <- strsplit(alleleName,"*",fixed=TRUE)[[1]][1] + alleleSeq <- s2c(repertoireOut[[repertoireName]]) + alleleSeq[as.numeric(unlist(SNPs[geneSegmentName]))] <- "N" + alleleSeq <- c2s(alleleSeq) + repertoireOut[[repertoireName]] <- alleleSeq + }) + names(repertoireOut) <- names(repertoireIn) + write.fasta(repertoireOut,names(repertoireOut),file.out=repertoireOutFile) + +} + + + + + + +############ +groupBayes2 = function(indexes, param_resultMat){ + + BayesGDist_Focused_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[4])})) + BayesGDist_Focused_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,2,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[2]+x[4])})) + #BayesGDist_Local_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2])})) + #BayesGDist_Local_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[3]+x[4])})) + #BayesGDist_Global_CDR = calculate_bayesG( x=param_resultMat[indexes,1], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[1]/(x[1]+x[2]+x[3]+x[4])})) + #BayesGDist_Global_FWR = calculate_bayesG( x=param_resultMat[indexes,3], N=apply(param_resultMat[indexes,c(1,2,3,4)],1,sum,na.rm=T), p=apply(param_resultMat[indexes,5:8],1,function(x){x[3]/(x[1]+x[2]+x[3]+x[4])})) + return ( list("BayesGDist_Focused_CDR"=BayesGDist_Focused_CDR, + "BayesGDist_Focused_FWR"=BayesGDist_Focused_FWR) ) + #"BayesGDist_Local_CDR"=BayesGDist_Local_CDR, + #"BayesGDist_Local_FWR" = BayesGDist_Local_FWR)) +# "BayesGDist_Global_CDR" = BayesGDist_Global_CDR, +# "BayesGDist_Global_FWR" = BayesGDist_Global_FWR) ) + + +} + + +calculate_bayesG <- function( x=array(), N=array(), p=array(), max_sigma=20, length_sigma=4001){ + G <- max(length(x),length(N),length(p)) + x=array(x,dim=G) + N=array(N,dim=G) + p=array(p,dim=G) + + indexOfZero = N>0 & p>0 + N = N[indexOfZero] + x = x[indexOfZero] + p = p[indexOfZero] + G <- length(x) + + if(G){ + + cons<-array( dim=c(length_sigma,G) ) + if(G==1) { + return(calculate_bayes(x=x[G],N=N[G],p=p[G],max_sigma=max_sigma,length_sigma=length_sigma)) + } + else { + for(g in 1:G) cons[,g] <- calculate_bayes(x=x[g],N=N[g],p=p[g],max_sigma=max_sigma,length_sigma=length_sigma) + listMatG <- convolutionPowersOfTwoByTwos(cons,length_sigma=length_sigma) + y<-calculate_bayesGHelper(listMatG,length_sigma=length_sigma) + return( y/sum(y)/(2*max_sigma/(length_sigma-1)) ) + } + }else{ + return(NA) + } +} + + +calculate_bayesGHelper <- function( listMatG,length_sigma=4001 ){ + matG <- listMatG[[1]] + groups <- listMatG[[2]] + i = 1 + resConv <- matG[,i] + denom <- 2^groups[i] + if(length(groups)>1){ + while( i0)) ){ + +# ONEmerStartPos = 1:(seqLength) +# ONEmerLength <- length(ONEmerStartPos) + ONEmerGL <- s2c(seqGL) + ONEmerSeq <- s2c(seqInput) + + #Background + for(ONEmerIndex in 1:seqLength){ + ONEmer = ONEmerGL[ONEmerIndex] + if(ONEmer!="N"){ + ONEmerCodonPos = getCodonPos(ONEmerIndex) + ONEmerReadingFrameCodon = c2s(ONEmerGL[ONEmerCodonPos]) + ONEmerReadingFrameCodonInputSeq = c2s(ONEmerSeq[ONEmerCodonPos] ) + + # All mutations model + #if(!any(grep("N",ONEmerReadingFrameCodon))){ + if(model==0){ + if(stopMutations==0){ + if(!any(grep("N",ONEmerReadingFrameCodonInputSeq))) + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + 1) + }else{ + if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*"){ + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex)#positionsWithinCodon[(ONEmerCodonPos[1]%%3)+1] + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probNonStopMutations[ONEmerReadingFrameCodon,positionWithinCodon]) + } + } + }else{ # Only silent mutations + if( !any(grep("N",ONEmerReadingFrameCodonInputSeq)) & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)!="*" & translateCodonToAminoAcid(ONEmerReadingFrameCodonInputSeq)==translateCodonToAminoAcid(ONEmerReadingFrameCodon) ){ + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) + BackgroundMatrix[ONEmer] <- (BackgroundMatrix[ONEmer] + probSMutations[ONEmerReadingFrameCodon,positionWithinCodon]) + } + } + } + } + } + + #Mutations + if(stopMutations==1) mutationCount = mutationCount[mutationCount!="Stop"] + if(model==1) mutationCount = mutationCount[mutationCount=="S"] + mutationPositions = as.numeric(names(mutationCount)) + mutationCount = mutationCount[mutationPositions>2 & mutationPositions<(seqLength-1)] + mutationPositions = mutationPositions[mutationPositions>2 & mutationPositions<(seqLength-1)] + countMutations = 0 + for(mutationPosition in mutationPositions){ + ONEmerIndex = mutationPosition + ONEmer = ONEmerSeq[ONEmerIndex] + GLONEmer = ONEmerGL[ONEmerIndex] + ONEmerCodonPos = getCodonPos(ONEmerIndex) + ONEmerReadingFrameCodon = c2s(ONEmerSeq[ONEmerCodonPos]) + ONEmerReadingFrameCodonGL =c2s(ONEmerGL[ONEmerCodonPos]) + if(!any(grep("N",ONEmer)) & !any(grep("N",GLONEmer))){ + if(model==0){ + countMutations = countMutations + 1 + MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + 1) + MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) + }else{ + if( translateCodonToAminoAcid(ONEmerReadingFrameCodonGL)!="*" ){ + countMutations = countMutations + 1 + positionWithinCodon = which(ONEmerCodonPos==ONEmerIndex) + glNuc = substr(ONEmerReadingFrameCodonGL,positionWithinCodon,positionWithinCodon) + inputNuc = substr(ONEmerReadingFrameCodon,positionWithinCodon,positionWithinCodon) + MutationMatrix[GLONEmer] <- (MutationMatrix[GLONEmer] + substitution[glNuc,inputNuc]) + MutationCountMatrix[GLONEmer] <- (MutationCountMatrix[GLONEmer] + 1) + } + } + } + } + + seqMutability = MutationMatrix/BackgroundMatrix + seqMutability = seqMutability/sum(seqMutability,na.rm=TRUE) + #cat(inputMatrixIndex,"\t",countMutations,"\n") + return(list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix, "BackgroundMatrix"=BackgroundMatrix)) +# tmp<-list("seqMutability" = seqMutability,"numbMutations" = countMutations,"seqMutabilityCount" = MutationCountMatrix) + } + } + +################ +# $Id: trim.R 989 2006-10-29 15:28:26Z ggorjan $ + +trim <- function(s, recode.factor=TRUE, ...) + UseMethod("trim", s) + +trim.default <- function(s, recode.factor=TRUE, ...) + s + +trim.character <- function(s, recode.factor=TRUE, ...) +{ + s <- sub(pattern="^ +", replacement="", x=s) + s <- sub(pattern=" +$", replacement="", x=s) + s +} + +trim.factor <- function(s, recode.factor=TRUE, ...) +{ + levels(s) <- trim(levels(s)) + if(recode.factor) { + dots <- list(x=s, ...) + if(is.null(dots$sort)) dots$sort <- sort + s <- do.call(what=reorder.factor, args=dots) + } + s +} + +trim.list <- function(s, recode.factor=TRUE, ...) + lapply(s, trim, recode.factor=recode.factor, ...) + +trim.data.frame <- function(s, recode.factor=TRUE, ...) +{ + s[] <- trim.list(s, recode.factor=recode.factor, ...) + s +} +####################################### +# Compute the expected for each sequence-germline pair by codon +getExpectedIndividualByCodon <- function(matInput){ +if( any(grep("multicore",search())) ){ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = mclapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = mclapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = mclapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = mclapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_R_Exp = mclapply(1:nrow(matInput), function(x){ + Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") + sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) + } + ) + }) + + LisGLs_S_Exp = mclapply(1:nrow(matInput), function(x){ + Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") + sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) + } + ) + }) + + Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) + }else{ + facGL <- factor(matInput[,2]) + facLevels = levels(facGL) + LisGLs_MutabilityU = lapply(1:length(facLevels), function(x){ + computeMutabilities(facLevels[x]) + }) + facIndex = match(facGL,facLevels) + + LisGLs_Mutability = lapply(1:nrow(matInput), function(x){ + cInput = rep(NA,nchar(matInput[x,1])) + cInput[s2c(matInput[x,1])!="N"] = 1 + LisGLs_MutabilityU[[facIndex[x]]] * cInput + }) + + LisGLs_Targeting = lapply(1:dim(matInput)[1], function(x){ + computeTargeting(matInput[x,2],LisGLs_Mutability[[x]]) + }) + + LisGLs_MutationTypes = lapply(1:length(matInput[,2]),function(x){ + #print(x) + computeMutationTypes(matInput[x,2]) + }) + + LisGLs_R_Exp = lapply(1:nrow(matInput), function(x){ + Exp_R <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + RPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="R") + sum( LisGLs_Targeting[[x]][,codonNucs][RPos], na.rm=T ) + } + ) + }) + + LisGLs_S_Exp = lapply(1:nrow(matInput), function(x){ + Exp_S <- rollapply(as.zoo(1:readEnd),width=3,by=3, + function(codonNucs){ + SPos = which(LisGLs_MutationTypes[[x]][,codonNucs]=="S") + sum( LisGLs_Targeting[[x]][,codonNucs][SPos], na.rm=T ) + } + ) + }) + + Exp_R = matrix(unlist(LisGLs_R_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + Exp_S = matrix(unlist(LisGLs_S_Exp),nrow=nrow(matInput),ncol=readEnd/3,T) + return( list( "Expected_R"=Exp_R, "Expected_S"=Exp_S) ) + } +} + +# getObservedMutationsByCodon <- function(listMutations){ +# numbSeqs <- length(listMutations) +# obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) +# obsMu_S <- obsMu_R +# temp <- mclapply(1:length(listMutations), function(i){ +# arrMutations = listMutations[[i]] +# RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) +# RPos <- sapply(RPos,getCodonNumb) +# if(any(RPos)){ +# tabR <- table(RPos) +# obsMu_R[i,as.numeric(names(tabR))] <<- tabR +# } +# +# SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) +# SPos <- sapply(SPos,getCodonNumb) +# if(any(SPos)){ +# tabS <- table(SPos) +# obsMu_S[i,names(tabS)] <<- tabS +# } +# } +# ) +# return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) +# } + +getObservedMutationsByCodon <- function(listMutations){ + numbSeqs <- length(listMutations) + obsMu_R <- matrix(0,nrow=numbSeqs,ncol=readEnd/3,dimnames=list(c(1:numbSeqs),c(1:(readEnd/3)))) + obsMu_S <- obsMu_R + temp <- lapply(1:length(listMutations), function(i){ + arrMutations = listMutations[[i]] + RPos = as.numeric(names(arrMutations)[arrMutations=="R"]) + RPos <- sapply(RPos,getCodonNumb) + if(any(RPos)){ + tabR <- table(RPos) + obsMu_R[i,as.numeric(names(tabR))] <<- tabR + } + + SPos = as.numeric(names(arrMutations)[arrMutations=="S"]) + SPos <- sapply(SPos,getCodonNumb) + if(any(SPos)){ + tabS <- table(SPos) + obsMu_S[i,names(tabS)] <<- tabS + } + } + ) + return( list( "Observed_R"=obsMu_R, "Observed_S"=obsMu_S) ) +} + diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/Baseline_Main.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/Baseline_Main.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,388 @@ +######################################################################################### +# License Agreement +# +# THIS WORK IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE +# ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER +# APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE +# OR COPYRIGHT LAW IS PROHIBITED. +# +# BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE +# BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED +# TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN +# CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS. +# +# BASELIne: Bayesian Estimation of Antigen-Driven Selection in Immunoglobulin Sequences +# Coded by: Mohamed Uduman & Gur Yaari +# Copyright 2012 Kleinstein Lab +# Version: 1.3 (01/23/2014) +######################################################################################### + +op <- options(); +options(showWarnCalls=FALSE, showErrorCalls=FALSE, warn=-1) +library('seqinr') +if( F & Sys.info()[1]=="Linux"){ + library("multicore") +} + +# Load functions and initialize global variables +source("Baseline_Functions.r") + +# Initialize parameters with user provided arguments + arg <- commandArgs(TRUE) + #arg = c(2,1,5,5,0,1,"1:26:38:55:65:104:116", "test.fasta","","sample") + #arg = c(1,1,5,5,0,1,"1:38:55:65:104:116:200", "test.fasta","","sample") + #arg = c(1,1,5,5,1,1,"1:26:38:55:65:104:116", "/home/mu37/Wu/Wu_Cloned_gapped_sequences_D-masked.fasta","/home/mu37/Wu/","Wu") + testID <- as.numeric(arg[1]) # 1 = Focused, 2 = Local + species <- as.numeric(arg[2]) # 1 = Human. 2 = Mouse + substitutionModel <- as.numeric(arg[3]) # 0 = Uniform substitution, 1 = Smith DS et al. 1996, 5 = FiveS + mutabilityModel <- as.numeric(arg[4]) # 0 = Uniform mutablity, 1 = Tri-nucleotide (Shapiro GS et al. 2002) , 5 = FiveS + clonal <- as.numeric(arg[5]) # 0 = Independent sequences, 1 = Clonally related, 2 = Clonally related & only non-terminal mutations + fixIndels <- as.numeric(arg[6]) # 0 = Do nothing, 1 = Try and fix Indels + region <- as.numeric(strsplit(arg[7],":")[[1]]) # StartPos:LastNucleotideF1:C1:F2:C2:F3:C3 + inputFilePath <- arg[8] # Full path to input file + outputPath <- arg[9] # Full path to location of output files + outputID <- arg[10] # ID for session output + + + if(testID==5){ + traitChangeModel <- 1 + if( !is.na(any(arg[11])) ) traitChangeModel <- as.numeric(arg[11]) # 1 <- Chothia 1998 + initializeTraitChange(traitChangeModel) + } + +# Initialize other parameters/variables + + # Initialzie the codon table ( definitions of R/S ) + computeCodonTable(testID) + + # Initialize + # Test Name + testName<-"Focused" + if(testID==2) testName<-"Local" + if(testID==3) testName<-"Imbalanced" + if(testID==4) testName<-"ImbalancedSilent" + + # Indel placeholders initialization + indelPos <- NULL + delPos <- NULL + insPos <- NULL + + # Initialize in Tranistion & Mutability matrixes + substitution <- initializeSubstitutionMatrix(substitutionModel,species) + mutability <- initializeMutabilityMatrix(mutabilityModel,species) + + # FWR/CDR boundaries + flagTrim <- F + if( is.na(region[7])){ + flagTrim <- T + region[7]<-region[6] + } + readStart = min(region,na.rm=T) + readEnd = max(region,na.rm=T) + if(readStart>1){ + region = region - (readStart - 1) + } + region_Nuc = c( (region[1]*3-2) , (region[2:7]*3) ) + region_Cod = region + + readStart = (readStart*3)-2 + readEnd = (readEnd*3) + + FWR_Nuc <- c( rep(TRUE,(region_Nuc[2])), + rep(FALSE,(region_Nuc[3]-region_Nuc[2])), + rep(TRUE,(region_Nuc[4]-region_Nuc[3])), + rep(FALSE,(region_Nuc[5]-region_Nuc[4])), + rep(TRUE,(region_Nuc[6]-region_Nuc[5])), + rep(FALSE,(region_Nuc[7]-region_Nuc[6])) + ) + CDR_Nuc <- (1-FWR_Nuc) + CDR_Nuc <- as.logical(CDR_Nuc) + FWR_Nuc_Mat <- matrix( rep(FWR_Nuc,4), ncol=length(FWR_Nuc), nrow=4, byrow=T) + CDR_Nuc_Mat <- matrix( rep(CDR_Nuc,4), ncol=length(CDR_Nuc), nrow=4, byrow=T) + + FWR_Codon <- c( rep(TRUE,(region[2])), + rep(FALSE,(region[3]-region[2])), + rep(TRUE,(region[4]-region[3])), + rep(FALSE,(region[5]-region[4])), + rep(TRUE,(region[6]-region[5])), + rep(FALSE,(region[7]-region[6])) + ) + CDR_Codon <- (1-FWR_Codon) + CDR_Codon <- as.logical(CDR_Codon) + + +# Read input FASTA file + tryCatch( + inputFASTA <- baseline.read.fasta(inputFilePath, seqtype="DNA",as.string=T,set.attributes=F,forceDNAtolower=F) + , error = function(ex){ + cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") + q() + } + ) + + if (length(inputFASTA)==1) { + cat("Error|Error reading input. Please enter or upload a valid FASTA file.\n") + q() + } + + # Process sequence IDs/names + names(inputFASTA) <- sapply(names(inputFASTA),function(x){trim(x)}) + + # Convert non nucleotide characters to N + inputFASTA[length(inputFASTA)] = gsub("\t","",inputFASTA[length(inputFASTA)]) + inputFASTA <- lapply(inputFASTA,replaceNonFASTAChars) + + # Process the FASTA file and conver to Matrix[inputSequence, germlineSequence] + processedInput <- processInputAdvanced(inputFASTA) + matInput <- processedInput[[1]] + germlines <- processedInput[[2]] + lenGermlines = length(unique(germlines)) + groups <- processedInput[[3]] + lenGroups = length(unique(groups)) + rm(processedInput) + rm(inputFASTA) + +# # remove clones with less than 2 seqeunces +# tableGL <- table(germlines) +# singletons <- which(tableGL<8) +# rowsToRemove <- match(singletons,germlines) +# if(any(rowsToRemove)){ +# matInput <- matInput[-rowsToRemove,] +# germlines <- germlines[-rowsToRemove] +# groups <- groups[-rowsToRemove] +# } +# +# # remove unproductive seqs +# nonFuctionalSeqs <- sapply(rownames(matInput),function(x){any(grep("unproductive",x))}) +# if(any(nonFuctionalSeqs)){ +# if(sum(nonFuctionalSeqs)==length(germlines)){ +# write.table("Unproductive",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } +# matInput <- matInput[-which(nonFuctionalSeqs),] +# germlines <- germlines[-which(nonFuctionalSeqs)] +# germlines[1:length(germlines)] <- 1:length(germlines) +# groups <- groups[-which(nonFuctionalSeqs)] +# } +# +# if(class(matInput)=="character"){ +# write.table("All unproductive seqs",file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } +# +# if(nrow(matInput)<10 | is.null(nrow(matInput))){ +# write.table(paste(nrow(matInput), "seqs only",sep=""),file=paste(outputPath,outputID,".txt",sep=""),quote=F,sep="\t",row.names=F,col.names=T) +# q() +# } + +# replace leading & trailing "-" with "N: + matInput <- t(apply(matInput,1,replaceLeadingTrailingDashes,readEnd)) + + # Trim (nucleotide) input sequences to the last codon + #matInput[,1] <- apply(matrix(matInput[,1]),1,trimToLastCodon) + +# # Check for Indels +# if(fixIndels){ +# delPos <- fixDeletions(matInput) +# insPos <- fixInsertions(matInput) +# }else{ +# # Check for indels +# indelPos <- checkForInDels(matInput) +# indelPos <- apply(cbind(indelPos[[1]],indelPos[[2]]),1,function(x){(x[1]==T & x[2]==T)}) +# } + + # If indels are present, remove mutations in the seqeunce & throw warning at end + #matInput[indelPos,] <- apply(matrix(matInput[indelPos,],nrow=sum(indelPos),ncol=2),1,function(x){x[1]=x[2]; return(x) }) + + colnames(matInput)=c("Input","Germline") + + # If seqeunces are clonal, create effective sequence for each clone & modify germline/group definitions + germlinesOriginal = NULL + if(clonal){ + germlinesOriginal <- germlines + collapseCloneResults <- tapply(1:nrow(matInput),germlines,function(i){ + collapseClone(matInput[i,1],matInput[i[1],2],readEnd,nonTerminalOnly=(clonal-1)) + }) + matInput = t(sapply(collapseCloneResults,function(x){return(x[[1]])})) + names_groups = tapply(groups,germlines,function(x){names(x[1])}) + groups = tapply(groups,germlines,function(x){array(x[1],dimnames=names(x[1]))}) + names(groups) = names_groups + + names_germlines = tapply(germlines,germlines,function(x){names(x[1])}) + germlines = tapply( germlines,germlines,function(x){array(x[1],dimnames=names(x[1]))} ) + names(germlines) = names_germlines + matInputErrors = sapply(collapseCloneResults,function(x){return(x[[2]])}) + } + + +# Selection Analysis + + +# if (length(germlines)>sequenceLimit) { +# # Code to parallelize processing goes here +# stop( paste("Error: Cannot process more than ", Upper_limit," sequences",sep="") ) +# } + +# if (length(germlines)1){ + groups <- c(groups,lenGroups+1) + names(groups)[length(groups)] = "All sequences combined" + bayesPDF_groups_cdr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_cdr,length_sigma=4001) + bayesPDF_groups_fwr[[lenGroups+1]] = groupPosteriors(bayesPDF_groups_fwr,length_sigma=4001) + } + + #Bayesian Outputs + bayes_cdr = t(sapply(bayesPDF_cdr,calcBayesOutputInfo)) + bayes_fwr = t(sapply(bayesPDF_fwr,calcBayesOutputInfo)) + bayes_germlines_cdr = t(sapply(bayesPDF_germlines_cdr,calcBayesOutputInfo)) + bayes_germlines_fwr = t(sapply(bayesPDF_germlines_fwr,calcBayesOutputInfo)) + bayes_groups_cdr = t(sapply(bayesPDF_groups_cdr,calcBayesOutputInfo)) + bayes_groups_fwr = t(sapply(bayesPDF_groups_fwr,calcBayesOutputInfo)) + + #P-values + simgaP_cdr = sapply(bayesPDF_cdr,computeSigmaP) + simgaP_fwr = sapply(bayesPDF_fwr,computeSigmaP) + + simgaP_germlines_cdr = sapply(bayesPDF_germlines_cdr,computeSigmaP) + simgaP_germlines_fwr = sapply(bayesPDF_germlines_fwr,computeSigmaP) + + simgaP_groups_cdr = sapply(bayesPDF_groups_cdr,computeSigmaP) + simgaP_groups_fwr = sapply(bayesPDF_groups_fwr,computeSigmaP) + + + #Format output + + # Round expected mutation frequencies to 3 decimal places + matMutationInfo[germlinesOriginal[indelPos],] = NA + if(nrow(matMutationInfo)==1){ + matMutationInfo[5:8] = round(matMutationInfo[,5:8]/sum(matMutationInfo[,5:8],na.rm=T),3) + }else{ + matMutationInfo[,5:8] = t(round(apply(matMutationInfo[,5:8],1,function(x){ return(x/sum(x,na.rm=T)) }),3)) + } + + listPDFs = list() + nRows = length(unique(groups)) + length(unique(germlines)) + length(groups) + + matOutput = matrix(NA,ncol=18,nrow=nRows) + rowNumb = 1 + for(G in unique(groups)){ + #print(G) + matOutput[rowNumb,c(1,2,11:18)] = c("Group",names(groups)[groups==G][1],bayes_groups_cdr[G,],bayes_groups_fwr[G,],simgaP_groups_cdr[G],simgaP_groups_fwr[G]) + listPDFs[[rowNumb]] = list("CDR"=bayesPDF_groups_cdr[[G]],"FWR"=bayesPDF_groups_fwr[[G]]) + names(listPDFs)[rowNumb] = names(groups[groups==paste(G)])[1] + #if(names(groups)[which(groups==G)[1]]!="All sequences combined"){ + gs = unique(germlines[groups==G]) + rowNumb = rowNumb+1 + if( !is.na(gs) ){ + for( g in gs ){ + matOutput[rowNumb,c(1,2,11:18)] = c("Germline",names(germlines)[germlines==g][1],bayes_germlines_cdr[g,],bayes_germlines_fwr[g,],simgaP_germlines_cdr[g],simgaP_germlines_fwr[g]) + listPDFs[[rowNumb]] = list("CDR"=bayesPDF_germlines_cdr[[g]],"FWR"=bayesPDF_germlines_fwr[[g]]) + names(listPDFs)[rowNumb] = names(germlines[germlines==paste(g)])[1] + rowNumb = rowNumb+1 + indexesOfInterest = which(germlines==g) + numbSeqsOfInterest = length(indexesOfInterest) + rowNumb = seq(rowNumb,rowNumb+(numbSeqsOfInterest-1)) + matOutput[rowNumb,] = matrix( c( rep("Sequence",numbSeqsOfInterest), + rownames(matInput)[indexesOfInterest], + c(matMutationInfo[indexesOfInterest,1:4]), + c(matMutationInfo[indexesOfInterest,5:8]), + c(bayes_cdr[indexesOfInterest,]), + c(bayes_fwr[indexesOfInterest,]), + c(simgaP_cdr[indexesOfInterest]), + c(simgaP_fwr[indexesOfInterest]) + ), ncol=18, nrow=numbSeqsOfInterest,byrow=F) + increment=0 + for( ioi in indexesOfInterest){ + listPDFs[[min(rowNumb)+increment]] = list("CDR"=bayesPDF_cdr[[ioi]] , "FWR"=bayesPDF_fwr[[ioi]]) + names(listPDFs)[min(rowNumb)+increment] = rownames(matInput)[ioi] + increment = increment + 1 + } + rowNumb=max(rowNumb)+1 + + } + } + } + colsToFormat = 11:18 + matOutput[,colsToFormat] = formatC( matrix(as.numeric(matOutput[,colsToFormat]), nrow=nrow(matOutput), ncol=length(colsToFormat)) , digits=3) + matOutput[matOutput== " NaN"] = NA + + + + colnames(matOutput) = c("Type", "ID", "Observed_CDR_R", "Observed_CDR_S", "Observed_FWR_R", "Observed_FWR_S", + "Expected_CDR_R", "Expected_CDR_S", "Expected_FWR_R", "Expected_FWR_S", + paste( rep(testName,6), rep(c("Sigma","CIlower","CIupper"),2),rep(c("CDR","FWR"),each=3), sep="_"), + paste( rep(testName,2), rep("P",2),c("CDR","FWR"), sep="_") + ) + fileName = paste(outputPath,outputID,".txt",sep="") + write.table(matOutput,file=fileName,quote=F,sep="\t",row.names=T,col.names=NA) + fileName = paste(outputPath,outputID,".RData",sep="") + save(listPDFs,file=fileName) + +indelWarning = FALSE +if(sum(indelPos)>0){ + indelWarning = "

Warning: The following sequences have either gaps and/or deletions, and have been ommited from the analysis."; + indelWarning = paste( indelWarning , "

    ", sep="" ) + for(indels in names(indelPos)[indelPos]){ + indelWarning = paste( indelWarning , "
  • ", indels, "
  • ", sep="" ) + } + indelWarning = paste( indelWarning , "

", sep="" ) +} + +cloneWarning = FALSE +if(clonal==1){ + if(sum(matInputErrors)>0){ + cloneWarning = "

Warning: The following clones have sequences of unequal length."; + cloneWarning = paste( cloneWarning , "

    ", sep="" ) + for(clone in names(matInputErrors)[matInputErrors]){ + cloneWarning = paste( cloneWarning , "
  • ", names(germlines)[as.numeric(clone)], "
  • ", sep="" ) + } + cloneWarning = paste( cloneWarning , "

", sep="" ) + } +} +cat(paste("Success",outputID,indelWarning,cloneWarning,sep="|")) diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/FiveS_Mutability.RData Binary file baseline/FiveS_Mutability.RData has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/FiveS_Substitution.RData Binary file baseline/FiveS_Substitution.RData has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,703 @@ +>IGHV1-18*01 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-18*02 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctaagatctgacgacacggcc +>IGHV1-18*03 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctatggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1-18*04 +caggttcagctggtgcagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctacggtatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcagcgcttac......aatggtaacacaaactatgcacagaagctccag...ggcagagtcaccatgaccacagacacatccacgagcacagcctacatggagctgaggagcctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccagtaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga +>IGHV1-2*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcttggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcnacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*04 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggctgggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggccgtgtattactgtgcgagaga +>IGHV1-2*05 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accggctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagggtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcaggctgagatctgacgacacggtcgtgtattactgtgcgagaga +>IGHV1-24*01 +caggtccagctggtacagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggtttccggatacaccctc............actgaattatccatgcactgggtgcgacaggctcctggaaaagggcttgagtggatgggaggttttgatcctgaa......gatggtgaaacaatctacgcacagaagttccag...ggcagagtcaccatgaccgaggacacatctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-3*01 +caggtccagcttgtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtgcgagaga +>IGHV1-3*02 +caggttcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgcattgggtgcgccaggcccccggacaaaggcttgagtggatgggatggagcaacgctggc......aatggtaacacaaaatattcacaggagttccag...ggcagagtcaccattaccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaggacatggctgtgtattactgtgcgagaga +>IGHV1-38-4*01 +caggtccagctggtgcagtcttgggct...gaggtgaggaagtctggggcctcagtgaaagtctcctgtagtttttctgggtttaccatc............accagctacggtatacattgggtgcaacagtcccctggacaagggcttgagtggatgggatggatcaaccctggc......aatggtagcccaagctatgccaagaagtttcag...ggcagattcaccatgaccagggacatgtccacaaccacagcctacacagacctgagcagcctgacatctgaggacatggctgtgtattactatgcaagaca +>IGHV1-45*01 +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattactagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagana +>IGHV1-45*02 +cagatgcagctggtgcagtctggggct...gaggtgaagaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccggacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaagata +>IGHV1-45*03 +.....................................agaagactgggtcctcagtgaaggtttcctgcaaggcttccggatacaccttc............acctaccgctacctgcactgggtgcgacaggcccccagacaagcgcttgagtggatgggatggatcacacctttc......aatggtaacaccaactacgcacagaaattccag...gacagagtcaccattaccagggacaggtctatgagcacagcctacatggagctgagcagcctgagatctgaggacacagccatgtattactgtgcaaga +>IGHV1-46*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-46*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............aacagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-46*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtttcctgcaaggcatctggatacaccttc............accagctactatatgcactgggtgcgacaggcccctggacaagggcttgagtggatgggaataatcaaccctagt......ggtggtagcacaagctacgcacagaagttccag...ggcagagtcaccatgaccagggacacgtccacgagcacagtctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgctagaga +>IGHV1-58*01 +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctgtgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-58*02 +caaatgcagctggtgcagtctgggcct...gaggtgaagaagcctgggacctcagtgaaggtctcctgcaaggcttctggattcaccttt............actagctctgctatgcagtgggtgcgacaggctcgtggacaacgccttgagtggataggatggatcgtcgttggc......agtggtaacacaaactacgcacagaagttccag...gaaagagtcaccattaccagggacatgtccacaagcacagcctacatggagctgagcagcctgagatccgaggacacggccgtgtattactgtgcggcaga +>IGHV1-68*01 +caggtgcagctggggcagtctgaggct...gaggtaaagaagcctggggcctcagtgaaggtctcctgcaaggcttccggatacaccttc............acttgctgctccttgcactggttgcaacaggcccctggacaagggcttgaaaggatgagatggatcacactttac......aatggtaacaccaactatgcaaagaagttccag...ggcagagtcaccattaccagggacatgtccctgaggacagcctacatagagctgagcagcctgagatctgaggactcggctgtgtattactgggcaagata +>IGHV1-69*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*02 +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1-69*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgatgacacggc +>IGHV1-69*04 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*05 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccacggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1-69*06 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*07 +.....................................agaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgag +>IGHV1-69*08 +caggtccagctggtgcaatctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatactatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*09 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*10 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......cttggtatagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*11 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggaaggatcatccctatc......cttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*12 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*13 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcagtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69*14 +caggtccagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacaaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-69-2*01 +gaggtccagctggtacagtctggggct...gaggtgaagaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatacgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcaacaga +>IGHV1-69-2*02 +.....................................agaagcctggggctacagtgaaaatctcctgcaaggtttctggatacaccttc............accgactactacatgcactgggtgcaacaggcccctggaaaagggcttgagtggatgggacttgttgatcctgaa......gatggtgaaacaatatatgcagagaagttccag...ggcagagtcaccataaccgcggacacgtctacagacacagcctacatggagctgagcagcctgagatctgag +>IGHV1-69D*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctgggtcctcggtgaaggtctcctgcaaggcttctggaggcaccttc............agcagctatgctatcagctgggtgcgacaggcccctggacaagggcttgagtggatgggagggatcatccctatc......tttggtacagcaaactacgcacagaagttccag...ggcagagtcacgattaccgcggacgaatccacgagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1-8*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagttatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-8*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctatgatatcaactgggtgcgacaggccactggacaagggcttgagtggatgggatggatgaaccctaac......agtggtaacacaggctatgcacagaagttccag...ggcagagtcaccatgaccaggaacacctccataagcacagcctacatggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagagg +>IGHV1-NL1*01 +caggttcagctgttgcagcctggggtc...caggtgaagaagcctgggtcctcagtgaaggtctcctgctaggcttccagatacaccttc............accaaatactttacacggtgggtgtgacaaagccctggacaagggcatnagtggatgggatgaatcaacccttac......aacgataacacacactacgcacagacgttctgg...ggcagagtcaccattaccagtgacaggtccatgagcacagcctacatggagctgagcngcctgagatccgaagacatggtcgtgtattactgtgtgagaga +>IGHV1/OR15-1*01 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgaga +>IGHV1/OR15-1*02 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctgcacggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga +>IGHV1/OR15-1*03 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacacggagctgagcagcctgagatctgaggacacagccacgtattactgtgcgagaga +>IGHV1/OR15-1*04 +caggtgcagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacatcttc............accgactactatatgcactgggtgcgacaggcccctggacaagagcttgggtggatgggacggatcaaccctaac......agtggtggcacaaactatgcacagaagtttcag...ggcagagtcaccatgaccagggacacgtccatcagcacagcctacatggagctgagcagcctgagatctgaggacacggccacgtattactgtgcgagaga +>IGHV1/OR15-2*01 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcaggctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-2*02 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctggagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-2*03 +caggtgcagctggtgcagtctggagct...gaggtgaagaagcctagagcctcagtgaaggtctcctgcaaggcttctggttacaccttt............accagctactatatgcactgggtgtgacaggcccctgaacaagggcttgagtggatgggatggatcaacacttac......aatggtaacacaaactacccacagaagctccag...ggcagagtcaccatgaccagagacacatccacgagcacagcctacatggagctgagcagcctgagatctgacgacatggccgtgtattactgtgcgagaga +>IGHV1/OR15-3*01 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatcttcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-3*02 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accgactactttatgaactggatgcgccaggcccctggacaaaggcttgagtggatgggatggatcaacgctggc......aatggtaacacaaaatattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgagaga +>IGHV1/OR15-3*03 +caggtccaactggtgtagtctggagct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactatatgaactggatgcgccaggcccctggacaaggcttcgagtggatgggatggatcaacgctggc......aatggtaacacaaagtattcacagaagctccag...ggcagagtcaccattaccagggacacatctgcgagcacagcctacatgcagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-4*01 +caggaccagttggtgcagtctggggct...gaggtgaagaagcctctgtcctcagtgaaggtctccttcaaggcttctggatacaccttc............accaacaactttatgcactgggtgtgacaggcccctggacaaggacttgagtggatgggatggatcaatgctggc......aatggtaacacaacatatgcacagaagttccag...ggcagagtcaccataaccagggacacgtccatgagcacagcctacacggagctgagcagcctgagatctgaggacacggccgtgtattactgtgcgaga +>IGHV1/OR15-5*01 +.....................................agaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga +>IGHV1/OR15-5*02 +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccttc............accaactactgtatgcactgggtgcgccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacaaaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgaga +>IGHV1/OR15-9*01 +caggtacagctgatgcagtctggggct...gaggtgaagaagcctggggcctcagtgaggatctcctgcaaggcttctggatacaccttc............accagctactgtatgcactgggtgtgccaggcccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...ggcagagtcaccataaccagggacacatccatgggcacagcctacatggagctaagcagcctgagatctgaggacacggccatgtattactgtgtgagaga +>IGHV1/OR21-1*01 +caggtacagctggtgcagtctggggct...gaggtgaagaagcctggggcctcagtgaaggtctcctgcaaggcttctggatacaccatc............accagctactgtatgcactgggtgcaccaggtccatgcacaagggcttgagtggatgggattggtgtgccctagt......gatggcagcacaagctatgcacagaagttccag...gccagagtcaccataaccagggacacatccatgagcacagcctacatggagctaagcagtctgagatctgaggacacggccatgtattactgtgtgagaga +>IGHV2-10*01 +caggtcaccttgaaggagtctggtcct...gcactggtgaaacccacacagaccctcatgctgacctgcaccttctctgggttctcactcagc......acttctggaatgggtgtgggttagatctgtcagccctcagcaaaggccctggagtggcttgcacacatttattagaat.........gataataaatactacagcccatctctgaag...agtaggctcattatctccaaggacacctccaagaatgaagtggttctaacagtgatcaacatggacattgtggacacagccacacattactgtgcaaggagac +>IGHV2-26*01 +caggtcaccttgaaggagtctggtcct...gtgctggtgaaacccacagagaccctcacgctgacctgcaccgtctctgggttctcactcagc......aatgctagaatgggtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacacattttttcgaat.........gacgaaaaatcctacagcacatctctgaag...agcaggctcaccatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacatattactgtgcacggatac +>IGHV2-5*01 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*02 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*03 +................................gctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccattaccaaggacacctccaaaaaccaggt +>IGHV2-5*04| +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattggaat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtac +>IGHV2-5*05 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*06 +cagatcaccttgaaggagtctggtcct...acgctggtaaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacaga +>IGHV2-5*08 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacagcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-5*09 +caggtcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggagtgggtgtgggctggatccgtcagcccccaggaaaggccctggagtggcttgcactcatttattgggat.........gatgataagcgctacggcccatctctgaag...agcaggctcaccatcaccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*01 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*02 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*03 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*04 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattac +>IGHV2-70*05 +..........................t...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgcgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatgga +>IGHV2-70*06 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatccctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*07 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccggggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*08 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcgccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacggccgtgtattactg +>IGHV2-70*09 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacccgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaac...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacaggcacatattactgtgtacgg +>IGHV2-70*10 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggattgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*11 +cgggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70*12 +cagatcaccttgaaggagtctggtcct...acgctggtgaaacccacacagaccctcacgctgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacatattactgtgcacacagac +>IGHV2-70*13 +caggtcaccttgagggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgtgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcactcattgattgggat.........gatgataaatactacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattattgtgcacggatac +>IGHV2-70D*04 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccagggaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2-70D*14 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacacagaccctcacactgacctgcaccttctctgggttctcactcagc......actagtggaatgcgtgtgagctggatccgtcagcccccaggtaaggccctggagtggcttgcacgcattgattgggat.........gatgataaattctacagcacatctctgaag...accaggctcaccatctccaaggacacctccaaaaaccaggtggtccttacaatgaccaacatggaccctgtggacacagccacgtattactgtgcacggatac +>IGHV2/OR16-5*01 +caggtcaccttgaaggagtctggtcct...gcgctggtgaaacccacagagaccctcacgctgacctgcactctctctgggttctcactcagc......acttctggaatgggtatgagctggatccgtcagcccccagggaaggccctggagtggcttgctcacatttttttgaat.........gacaaaaaatcctacagcacgtctctgaag...aacaggctcatcatctccaaggacacctccaaaagccaggtggtccttaccatgaccaacatggaccctgtggacacagccacgtattactgtgcatggagag +>IGHV3-11*01 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-11*03 +caggtgcagctgttggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgaga +>IGHV3-11*04 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-11*05 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-11*06 +caggtgcagctggtggagtctggggga...ggcttggtcaagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctggatccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-13*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*02 +gaggtgcatctggtggagtctggggga...ggcttggtacagcctgggggggccctgagactctcctgtgcagcctctggattcaccttc............agtaactacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagccaatggtactgct.........ggtgacacatactatccaggctccgtgaag...gggcgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctgtggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccaattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaaga +>IGHV3-13*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggaatgggtctcagctattggtactgct.........ggtgacacatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-13*05 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctacgacatgcactgggtccgccaagctacaggaaaaggtctggagtgggtctcagctattggtactgct.........ggtgacccatactatccaggctccgtgaag...ggccgattcaccatctccagagaaaatgccaagaactccttgtatcttcaaatgaacagcctgagagccggggacacggctgtgtattactgtgcaagaga +>IGHV3-15*01 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*02 +gaggtgcagctggtggagtctggggga...gccttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*03 +gaggtgcagctggtggagtctgccgga...gccttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagttgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*04 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattgaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*05 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagtctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*06 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggattcactttc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacaaactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*07 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtcccttagactctcctgtgcagcctctggtttcactttc............agtaacgcctggatgaactgggtccgccaggctccagggaaggggctggagtgggtcggccgtattaaaagcaaaactgatggtgggacaacagactacgctgcacccgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtaccacaga +>IGHV3-15*08 +gaggtgcagctggtggagtctgcggga...ggcttggtacagcctggggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg +>IGHV3-16*01 +gaggtacaactggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-16*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggcccgcaaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgtggactccgtgaag...cgccgattcatcatctccagagacaattccaggaactccctgtatctgcaaaagaacagacggagagccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-19*01 +acagtgcagctggtggagtctggggga...ggcttggtagagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccgccaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacttcctgtatcagcaaatgaacagcctgaggcccgaggacatggctgtgtattactgtgtgagaaa +>IGHV3-20*01 +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-20*02 +gaggtgcagctggtggagtctggggga...ggtgtggtacggcctggggggtccctgagactctcctttgcagcctctggattcaccttt............gatgattatggcatgagctgggtccgccaagctccagggaaggggctggagtgggtctctggtattaattggaat......ggtggtagcacaggttatgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagccgaggacacggccttgtatcactgtgcgagaga +>IGHV3-21*01 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-21*02 +gaggtgcaactggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-21*03 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacagctgtgtattactgtgcgagaga +>IGHV3-21*04 +gaggtgcagctggtggagtctggggga...ggcctggtcaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt......agtagttacatatactacgcagactcagtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-22*01 +gaggtgcatctggtggagtctggggga...gccttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-22*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agttactactacatgagcggggtccgccaggctcccgggaaggggctggaatgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaagagcctgaaaaccgaggacacggccgtgtattactgttccagaga +>IGHV3-23*01 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*02 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacggagactccgtgaag...ggccggttcaccatctcaagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*03 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagataattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23*05 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctatttatagcagt......ggtagtagcacatactatgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaa +>IGHV3-23D*01 +gaggtgcagctgttggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-23D*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agcagctatgccatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagctattagtggtagt......ggtggtagcacatactacgcagactccgtgaag...ggccggttcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggccgtatattactgtgcgaaaga +>IGHV3-25*01 +gagatgcagctggtggagtctggggga...ggcttgcaaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-25*02 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggtttgacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-25*03 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattagtgtaccaga +>IGHV3-25*04 +gagacgcagctggtggagtctggggga...ggcttggcaaagcctgggcggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctgtattactgtaccagaga +>IGHV3-25*05 +gagatgcagctggtggagtctggggga...ggcttggcaaagcctgcgtggtccccgagactctcctgtgcagcctctcaattcaccttc............agtagctactacatgaactgtgtccgccaggctccagggaatgggctggagttggttggacaagttaatcctaat......gggggtagcacatacctcatagactccggtaag...gaccgattcaatacctccagagataacgccaagaacacacttcatctgcaaatgaacagcctgaaaaccgaggacacggccctctattagtgtaccagaga +>IGHV3-29*01 +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*04 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*05 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgagggcacggctgtgtattactgtgcgagaga +>IGHV3-30*06 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*07 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*08 +caggtgcagctggtggactctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctgcattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga +>IGHV3-30*09 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcgccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*10 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacacagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*11 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*12 +caggtgcagctggtggagtctgggggg...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*13 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacaggctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*14 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*15 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgagcagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*16 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggccccaggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*17 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccgggcaaggggctagagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30*18 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30*19 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-2*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgaggca +>IGHV3-30-22*01 +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagagctgaggacatggacgtgtatggctgtacataaggtc +>IGHV3-30-3*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-3*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagcaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-3*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-30-33*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgagg +>IGHV3-30-42*01 +gaggtggagctgatagagcccacagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagcccagttcaccagtctgcaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacagtcagagaactgaggacatggctgtgtatggctgtacataaggtt +>IGHV3-30-5*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-5*02 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcatttatacggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-30-52*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaaggaactcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcataatctttgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgctaatgaacagtctgagagcagcgggcacagctgtgtgttactgtatgtgagg +>IGHV3-32*01 +gaggtggagctgatagagtccatagag...gacctgagacaacctgggaagttcctgagactctcctgtgtagcctctagattcgccttc............agtagcttctgaatgagccgagttcaccagtctccaggcaaggggctggagtgagtaatagatataaaagatgat......ggaagtcagatacaccatgcagactctgtgaag...ggcagattctccatctccaaagacaatgctaagaactctctgtatctgcaaatgaacactcagagagctgaggacgtggccgtgtatggctatacataaggtc +>AIGHV3-33*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*02 +caggtacagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgcgaag...ggccgattcaccatctccagagacaattccacgaacacgctgtttctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*03 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaactccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-33*04 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctagagtgggtggcagttatatggtatgac......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*05 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatcatatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-33*06 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctgggaggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtggcagttatatggtatgat......ggaagtaataaatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaaaga +>IGHV3-33-2*01 +gaggtacagctcgtggagtccggagag...gacccaagacaacctgggggatccttgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcggtttcccaggctccagggaaggggctggagtgagtagtagatatacagtgtgat......ggaagtcagatatgttatgcccaatctgtgaag...agcaaattcaccatctccaaagaaaatgccaagaactcactgtatttgcaaatgaacagtctgagagcagagggcacagctgtgtgttactgtatgtgaggca +>IGHV3-35*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctgggggatccctgagactctcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtccatcaggctccaggaaaggggctggagtgggtatcgggtgttagttggaat......ggcagtaggacgcactatgcagactctgtgaag...ggccgattcatcatctccagagacaattccaggaacaccctgtatctgcaaacgaatagcctgagggccgaggacacggctgtgtattactgtgtgagaaa +>IGHV3-38*01| +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgcgtattactgtgccagatata +>IGHV3-38*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaaggggctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctagggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctggatccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacaacctgagagctgagggcacggccgtgtattactgtgccagatata +>IGHV3-38-3*01 +gaggtgcagctggtggagtctcgggga...gtcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaatgagatgagctgggtccgccaggctccagggaagggtctggagtgggtctcatccattagtggt............ggtagcacatactacgcagactccaggaag...ggcagattcaccatctccagagacaattccaagaacacgctgcatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtaagaaaga +>IGHV3-43*01 +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattataccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43*02 +gaagtgcagctggtggagtctggggga...ggcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccagggaagggtctggagtgggtctctcttattagtggggat......ggtggtagcacatactatgcagactctgtgaag...ggccgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagaactgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-43D*01 +gaagtgcagctggtggagtctggggga...gtcgtggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccgtcaagctccggggaagggtctggagtgggtctctcttattagttgggat......ggtggtagcacctactatgcagactctgtgaag...ggtcgattcaccatctccagagacaacagcaaaaactccctgtatctgcaaatgaacagtctgagagctgaggacaccgccttgtattactgtgcaaaagata +>IGHV3-47*01 +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgcgaccctcctgtgcagcctctggattcgccttc............agtagctatgctctgcactgggttcgccgggctccagggaagggtctggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcatatgaacagcctgatagctgaggacatggctgtgtattattgtgcaaga +>IGHV3-47*02 +gaggatcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagaccctcctgtgcagcctctggattcgccttc............agtagctatgttctgcactgggttcgccgggctccagggaagggtccggagtgggtatcagctattggtactggt.........ggtgatacatactatgcagactccgtgatg...ggccgattcaccatctccagagacaacgccaagaagtccttgtatcttcaaatgaacagcctgatagctgaggacatggctgtgtattattgtgcaagaga +>IGHV3-48*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-48*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaatgccaagaactcactgtatctgcaaatgaacagcctgagagacgaggacacggctgtgtattactgtgcgagaga +>IGHV3-48*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagttatgaaatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......ggtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga +>IGHV3-48*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatagcatgaactgggtccgccaggctccagggaaggggctggagtgggtttcatacattagtagtagt......agtagtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-49*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacaccgcgtctgtgaaa...ggcagattcaccatctcaagagatggttccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggccgtccctgagactctcctgtacagcttctggattcaccttt............gggtattatcctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*04 +gaggtgcagctggtggagtctggggga...ggcttggtacagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctgggtccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-49*05 +gaggtgcagctggtggagtctggggga...ggcttggtaaagccagggcggtccctgagactctcctgtacagcttctggattcaccttt............ggtgattatgctatgagctggttccgccaggctccagggaaggggctggagtgggtaggtttcattagaagcaaagcttatggtgggacaacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattccaaaagcatcgcctatctgcaaatgaacagcctgaaaaccgaggacacagccgtgtattactgtactagaga +>IGHV3-52*01 +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgagagg +>IGHV3-52*02 +gaggtgcagctggtggagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggcaggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga +>IGHV3-52*03 +gaggtgcagctggtcgagtctgggtga...ggcttggtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctcctggatgcactgggtctgccaggctccggagaaggggctggagtgggtggccgacataaagtgtgac......ggaagtgagaaatactatgtagactctgtgaag...ggccgattgaccatctccagagacaatgccaagaactccctctatctgcaagtgaacagcctgagagctgaggacatgaccgtgtattactgtgtgaga +>IGHV3-53*01 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-53*02 +gaggtgcagctggtggagactggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-53*03 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccagcctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactctgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgctaggga +>IGHV3-53*04 +gaggtgcagctggtggagtctggagga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagacacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggccgtgtattactgtgcgagaga +>IGHV3-54*01 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaagctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-54*02 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtacgat......agaagtcagatatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactccgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagg +>IGHV3-54*04 +gaggtacagctggtggagtctgaagaa...aaccaaagacaacttgggggatccctgagactctcctgtgcagactctggattaaccttc............agtagctactgaatgagctcagattcccaggctccagggaaggggctggagtgagtagtagatatatagtaggat......agaagtcagctatgttatgcacaatctgtgaag...agcagattcaccatctccaaagaaaatgccaagaactcactctgtttgcaaatgaacagtctgagagcagagggcacggccgtgtattactgtatgtgagt +>IGHV3-62*01 +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctctgctatgcactgggtccgccaggctccaagaaagggtttgtagtgggtctcagttattagtacaagt......ggtgataccgtactctacacagactctgtgaag...ggccgattcaccatctccagagacaatgcccagaattcactgtctctgcaaatgaacagcctgagagccgagggcacagttgtgtactactgtgtgaaaga +>IGHV3-63*01 +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctccaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataaggtt +>IGHV3-63*02 +gaggtggagctgatagagtccatagag...ggcctgagacaacttgggaagttcctgagactctcctgtgtagcctctggattcaccttc............agtagctactgaatgagctgggtcaatgagactctagggaaggggctggagggagtaatagatgtaaaatatgat......ggaagtcagatataccatgcagactctgtgaag...ggcagattcaccatctccaaagacaatgctaagaactcaccgtatctgcaaacgaacagtctgagagctgaggacatgaccatgcatggctgtacataa +>IGHV3-64*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcaaactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga +>IGHV3-64*02 +gaggtgcagctggtggagtctggggaa...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatattatgcagactctgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgggcagcctgagagctgaggacatggctgtgtattactgtgcgagaga +>IGHV3-64*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgtccaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-64*04 +caggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-64*05 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactcagtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatgttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-64D*06 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgttcagcctctggattcaccttc............agtagctatgctatgcactgggtccgccaggctccagggaagggactggaatatgtttcagctattagtagtaat......gggggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgagcagtctgagagctgaggacacggctgtgtattactgtgtgaaaga +>IGHV3-66*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-66*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaga +>IGHV3-66*03 +gaggtgcagctggtggagtctggagga...ggcttgatccagcctggggggtccctgagactctcctgtgcagcctctgggttcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagctgt.........ggtagcacatactacgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgagaga +>IGHV3-66*04 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccgtc............agtagcaactacatgagctgggtccgccaggctccagggaaggggctggagtgggtctcagttatttatagcggt.........ggtagcacatactacgcagactccgtgaag...ggcagattcaccatctccagagacaattccaagaacacgctgtatcttcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaca +>IGHV3-69-1*01 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-69-1*02 +gaggtgcagctggtggagtctggggga...ggcttggtaaagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgaactgggtccgccaggctccagggaaggggctggagtgggtctcatccattagtagtagt.........agtaccatatactacgcagactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtttattactgtgcgagaga +>IGHV3-7*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-7*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaagggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgaga +>IGHV3-7*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttt............agtagctattggatgagctgggtccgccaggctccagggaaggggctggagtgggtggccaacataaagcaagat......ggaagtgagaaatactatgtggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-71*01 +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggccgtgtattactgtgcgagaga +>IGHV3-71*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcgagaga +>IGHV3-71*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctggtttcaccttc............agtgactactacatgagctgggtccgccaggctcccgggaaggggctggagtgggtaggtttcattagaaacaaagctaatggtgggacaacagaatagaccacgtctgtgaaa...ggcagattcacaatctcaagagatgattccaaaagcatcacctatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgcgagaga +>IGHV3-72*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagttacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtgctagaga +>IGHV3-72*02 +....................................................................................accttc............agtgaccactacatggactgggtccgccaggctccagggaaggggctggagtgggttggccgtactagaaacaaagctaacagctacaccacagaatacgccgcgtctgtgaaa...ggcagattcaccatctcaagagatgattcaaagaactcactgtat +>IGHV3-73*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-73*02 +gaggtgcagctggtggagtccggggga...ggcttggtccagcctggggggtccctgaaactctcctgtgcagcctctgggttcaccttc............agtggctctgctatgcactgggtccgccaggcttccgggaaagggctggagtgggttggccgtattagaagcaaagctaacagttacgcgacagcatatgctgcgtcggtgaaa...ggcaggttcaccatctccagagatgattcaaagaacacggcgtatctgcaaatgaacagcctgaaaaccgaggacacggccgtgtattactgtactagaca +>IGHV3-74*01 +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-74*02 +gaggtgcagctggtggagtctggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaaga +>IGHV3-74*03 +gaggtgcagctggtggagtccggggga...ggcttagttcagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaacgtacgcggactccgtgaag...ggccgattcaccatctccagagacaacgccaagaacacgctgtatctgcaaatgaacagtctgagagccgaggacacggctgtgtattactgtgcaagaga +>IGHV3-9*01 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-9*02 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcacctct............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacacggccttgtattactgtgcaaaagata +>IGHV3-9*03 +gaagtgcagctggtggagtctggggga...ggcttggtacagcctggcaggtccctgagactctcctgtgcagcctctggattcaccttt............gatgattatgccatgcactgggtccggcaagctccagggaagggcctggagtgggtctcaggtattagttggaat......agtggtagcataggctatgcggactctgtgaag...ggccgattcaccatctccagagacaacgccaagaactccctgtatctgcaaatgaacagtctgagagctgaggacatggccttgtattactgtgcaaaagata +>IGHV3-NL1*01 +caggtgcagctggtggagtctggggga...ggcgtggtccagcctggggggtccctgagactctcctgtgcagcgtctggattcaccttc............agtagctatggcatgcactgggtccgccaggctccaggcaaggggctggagtgggtctcagttatttatagcggt......ggtagtagcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaattccaagaacacgctgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgcgaaaga +>IGHV3/OR15-7*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgatgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*02 +gaggtgcagctgttggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgctgcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*03 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcagcctgaaaaccgaggacttggccgtgtattactgtgctaga +>IGHV3/OR15-7*05 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctgggggttctctgagactctcatgtgcagcctctggattcaccttc............agtgaccactacatgagctgggtccgccaggctcaagggaaagggctagagttggtaggtttaataagaaacaaagctaacagttacacgacagaatatgctgcgtctgtgaaa...ggcagacttaccatctcaagagaggattcaaagaacacgctgtatctgcaaatgagcaacctgaaaaccgaggacttggccgtgtattactgtgctagaga +>IGHV3/OR16-10*01 +gaggttcagctggtgcagtctggggga...ggcttggtacatcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga +>IGHV3/OR16-10*02 +gaggttcagctggtgcagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaaga +>IGHV3/OR16-10*03 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactctcctgtgcaggctctggattcaccttc............agtagctatgctatgcactgggttcgccaggctccaggaaaaggtctggagtgggtatcagctattggtactggt.........ggtggcacatactatgcagactccgtgaag...ggccgattcaccatctccagagacaatgccaagaactccttgtatcttcaaatgaacagcctgagagccgaggacatggctgtgtattactgtgcaagaga +>IGHV3/OR16-12*01 +gaggtgcagctggtagagtctgggaga...ggcttggcccagcctggggggtacctaaaactctccggtgcagcctctggattcaccgtc............ggtagctggtacatgagctggatccaccaggctccagggaagggtctggagtgggtctcatacattagtagtagt......ggttgtagcacaaactacgcagactctgtgaag...ggcagattcaccatctccacagacaactcaaagaacacgctctacctgcaaatgaacagcctgagagtggaggacacggccgtgtattactgtgcaaga +>IGHV3/OR16-13*01 +gaggtgcagctggtggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaagctccagggaaggggctggtgtgggtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccatgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga +>IGHV3/OR16-14*01 +gaggtgcagctggaggagtctggggga...ggcttagtacagcctggagggtccctgagactctcctgtgcagcctctggattcaccttc............agtagctactggatgcactgggtccgccaatctccagggaaggggctggtgtgagtctcacgtattaatagtgat......gggagtagcacaagctacgcagactccttgaag...ggccaattcaccatctccagagacaatgctaagaacacgctgtatctgcaaatgaacagtctgagagctgaggacatggctgtgtattactgtactaga +>IGHV3/OR16-15*01 +gaagtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagactctcctgtgcagcctctgtattcaccttc............agtaacagtgacataaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaattttccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgagaaa +>IGHV3/OR16-15*02 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcgggtattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaagacatggccgtgtattactgtgtgaga +>IGHV3/OR16-16*01 +gaggtgcagctggtggagtctggggga...ggcttggtccagcctggggggtccctgagacactcctgtgcagcctctggattcaccttc............agtaacagtgacatgaactgggtcctctaggctccaggaaaggggctggagtgggtctcggatattagttggaat......ggcggtaagacgcactatgtggactccgtgaag...ggccaatttaccatctccagagacaattccagcaagtccctgtatctgcaaaagaacagacagagagccaaggacatggccgtgtattactgtgtgaga +>IGHV3/OR16-6*02 +gaggtgcagctggtggagtctgcggga...ggccttggtacagcctgggggtcccttagactctcctgtgcagcctctggattcacttgc............agtaacgcctggatgagctgggtccgccaggctccagggaaggggctggagtgggttggctgtattaaaagcaaagctaatggtgggacaacagactacgctgcacctgtgaaa...ggcagattcaccatctcaagagatgattcaaaaaacacgctgtatctgcaaatgatcagcctgaaaaccgaggacacggccgtgtattactgtaccacagg +>IGHV3/OR16-8*01 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagcctctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtgggtttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcctgagagctgaggacacggctgtgtattactgtgtgaaa +>IGHV3/OR16-8*02 +gaggtgcagctggtggagtctggggga...ggcttggtacagcctggggggtccctgagactgtcctgtccagactctggattcaccttc............agtaaccactacatgagctgggtccgccaggctccagggaagggactggagtggatttcatacattagtggtgat......agtggttacacaaactacgcagactctgtgaag...ggccgattcaccatctccagggacaacgccaataactcaccgtatctgcaaatgaacagcttgagagctgaggacacggctgtgtattactgtgtgaaaca +>IGHV3/OR16-9*01 +gaggtgcagctggtggagtctggagga...ggcttggtacagcctggggggtccctgagactctcctgtgcagcctctggattcaccttc............agtaaccactacacgagctgggtccgccaggctccagggaagggactggagtgggtttcatacagtagtggtaat......agtggttacacaaactacgcagactctgtgaaa...ggccgattcaccatctccagggacaacgccaagaactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgtgaaa +>IGHV4-28*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaga +>IGHV4-28*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacaccggcgtgtattactgtgcgaga +>IGHV4-28*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcatctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*06 +caggtgcagctacaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccttggacacggccgtgtattactgtgcgagaaa +>IGHV4-28*07 +caggtacagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtagtaactggtggggctggatccggcagcccccagggaagggactggagtggattgggtacatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-30-2*01 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-30-2*02 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg +>IGHV4-30-2*03 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcagacacggctgtgtattactgtgcgagaca +>IGHV4-30-2*04 +...........................................................................tctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-30-2*05 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-2*06 +cagctgcagctgcaggagtccggctca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagtcaccagggaagggcctggagtggattgggtacatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaggtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-30-4*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgcagcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>XIGHV4-30-4*04 +caggtgcagctgcaggactcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcccccagggaagggcctggagtggattgggtacttctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactg +>IGHV4-30-4*05 +..........................................................................ctctggtggctccatcagc......agtggtgattactactggagttggatccgccagcncccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*06 +...........................................................................tctggtggctccatcagc......agtggtgattactactggagttggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcagacacggccgtgtattactgtgccagaga +>IGHV4-30-4*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctctggtggctccatcagc......agtggtggttactcctggagctggatccggcagccaccagggaagggactggagtggattgggtatatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-31*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtctagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgtactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-31*04 +caggtgcggctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg +>IGHV4-31*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgacc...gcggacgcggccgtgtattactgtgcg +>IGHV4-31*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggatccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactg +>IGHV4-31*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-31*10 +caggtgcagctgcaggagtcgggccca...ggactgttgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtggttactactggagctggatccgccagcacccagggaagggcctggagtggattgggtgcatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacccgtccaagaaccagttctccctgaagccgagctctgtgactgccgcggacacggccgtggattactgtgcgagaga +>IGHV4-34*01 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*02 +caggtgcagctacaacagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*03 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*04 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*05 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggtgctggatccgccagcccctagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaacaacaacccgtccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-34*06 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgggctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*07 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaaccatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-34*08 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggaccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcg +>IGHV4-34*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-34*10 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaagggactggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-34*11 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccgtc............agtggttactactggagctggatccggcagcccccagggaaggggctggagtggattgggtatatctattatagt.........gggagcaccaacaacaacccctccctcaag...agtcgagccaccatatcagtagacacgtccaagaaccagttctccctgaacctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga +>IGHV4-34*12 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcattcatagt.........ggaagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgaga +>IGHV4-34*13 +...........................................................................tatggtgggtccttc............agtggttactactggagctggatccgccagcccccagggaaggggctggagtggattggggaaatcaatcatagt.........ggaagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggctgtgtattactgtgcgagagg +>IGHV4-38-2*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgctgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgaga +>IGHV4-38-2*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggttactccatcagc.........agtggttactactggggctggatccggcagcccccagggaaggggctggagtggattgggagtatctatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-39*01 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaca +>IGHV4-39*02 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcgagaga +>IGHV4-39*03 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg +>IGHV4-39*04 +..................................................................................gctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacac +>IGHV4-39*05 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccccgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-39*06 +cggctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttccccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-39*07 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccgccagcccccagggaaggggctggagtggattgggagtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattgctgtgcgagaga +>IGHV4-4*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctatctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*05 +caggtgcagctgcaggagttgggccca...ggactggtgaagcctccggggaccctgtccctcacctgcgctgtctctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-4*06 +............................................................ +...............tctggtggctccatcagc.........agtagtaactggtggagttgggtccgccagcccccagggannnggctggagtggattggggaaatctatcatagt.........gggagcaccaactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-4*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-55*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-55*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-55*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagctttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactg +>IGHV4-55*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaagcagttctacctgaagctgagctctgtgaccgctgcggacacggccgtgtattactg +>IGHV4-55*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaggaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactg +>IGHV4-55*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtcagtagacacgtccaagaaccagttctacctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4-55*09 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcatctgcgctgtctctggtgactccatcagc.........agtggtaactggtgaatctgggtccgccagcccccagggaaggggctggagtggattggggaaatccatcatagt.........gggagcacctactacaacccgtccctcaag...agtcgaatcaccatgtccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgtggacacggccgtgtattactgtgcgagaaa +>IGHV4-59*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-59*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-59*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccaattctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcg +>IGHV4-59*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-59*05 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagccgccggggaagggactggagtggattgggcgtatctattatagt.........gggagcacctactacaacccgtccctcaag...agtcgagtcaccatatccgtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggctgtgtattactgtgcg +>IGHV4-59*06 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtcactggtggctccatc............agtagttactactggagctggatccggcagcccgctgggaagggcctggagtggattgggtacatctattacagt.........gggagcacctactacaacccgtccctcaag...agtcgagttaccatatcagtagacacgtctaagaaccagttctccctgaagctgagctctgtgactgccgcggacacggccgtgtattactgtgcg +>IGHV4-59*07 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggacaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgaga +>IGHV4-59*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatc............agtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaca +>IGHV4-59*09 +...........................................................................tctggtggctccatc............agtagttactactggagctggatccggcagcccccaggnannngactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagagg +>IGHV4-59*10 +caggtgcagctacagcagtggggcgca...ggactgttgaagccttcggagaccctgtccctcacctgcgctgtctatggtggctccatc............agtagttactactggagctggatccggcagcccgccgggaaggggctggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatgtcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagata +>IGHV4-61*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcacagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtggtagttactactggagctggatccggcagcccgccgggaagggactggagtggattgggcgtatctataccagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcagacacggccgtgtattactgtgcgagaga +>IGHV4-61*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccacttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4-61*04 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattggatatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgct...gacacggccgtgtattactg +>IGHV4-61*05 +cagctgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccatcagc......agtagtagttactactggggctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgaga +>IGHV4-61*06 +...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgccagaga +>IGHV4-61*07 +...........................................................................tctggtggctccgtcagc......agtggtagttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaca +>IGHV4-61*08 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcactgtctctggtggctccgtcagc......agtggtggttactactggagctggatccggcagcccccagggaagggactggagtggattgggtatatctattacagt.........gggagcaccaactacaacccctccctcaag...agtcgagtcaccatatcagtagacacgtccaagaaccagttctccctgaagctgagctctgtgaccgctgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*01 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccgtccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*02 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggaaccccaactacaacccgtccctcaag...agtcgagtcaccatatcaatagacaagtccaagaaccaattctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV4/OR15-8*03 +caggtgcagctgcaggagtcgggccca...ggactggtgaagccttcggagaccctgtccctcacctgcgttgtctctggtggctccatcagc.........agtagtaactggtggagctgggtccgccagcccccagggaaggggctggagtggattggggaaatctatcatagt.........gggagccccaactacaacccatccctcaag...agtcgagtcaccatatcagtagacaagtccaagaaccagttctccctgaagctgagctctgtgaccgccgcggacacggccgtgtattactgtgcgagaga +>IGHV5-10-1*01 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-10-1*02 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcttggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggc.tcggacaccgccatgtattactgtgcgagaca +>IGHV5-10-1*03 +gaagtgcagctggtgcagtccggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccacgtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-10-1*04 +gaagtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaggatctcctgtaagggttctggatacagcttt............accagctactggatcagctgggtgcgccagatgcccgggaaaggcctggagtggatggggaggattgatcctagt......gactcttataccaactacagcccgtccttccaa...ggccaggtcaccatctcagctgacaagtccatcagcactgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*01 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*02 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggaccggctgggtgcgccagatgcccgggaaaggcttggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgagaca +>IGHV5-51*03 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*04 +gaggtgcagctggtgcagtctggagca...gaggtgaaaaagccgggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccgggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagcccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatgtattactgtgcgaga +>IGHV5-51*05 +.....................................aaaagcccggggagtctctgaagatctcctgtaagggttctggatacagcttt............accagctactggatcggctgggtgcgccagatgcccaggaaaggcctggagtggatggggatcatctatcctggt......gactctgataccagatacagcccgtccttccaa...ggccaggtcaccatctcagccgacaagtccatcagcaccgcctacctgcagtggagcagcctgaaggcctcggacaccgccatg +>IGHV5-78*01 +gaggtgcagctgttgcagtctgcagca...gaggtgaaaagacccggggagtctctgaggatctcctgtaagacttctggatacagcttt............accagctactggatccactgggtgcgccagatgcccgggaaagaactggagtggatggggagcatctatcctggg......aactctgataccagatacagcccatccttccaa...ggccacgtcaccatctcagccgacagctccagcagcaccgcctacctgcagtggagcagcctgaaggcctcggacgccgccatgtattattgtgtgaga +>IGHV6-1*01 +caggtacagctgcagcagtcaggtcca...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV6-1*02 +caggtacagctgcagcagtcaggtccg...ggactggtgaagccctcgcagaccctctcactcacctgtgccatctccggggacagtgtctct......agcaacagtgctgcttggaactggatcaggcagtccccatcgagaggccttgagtggctgggaaggacatactacaggtcc...aagtggtataatgattatgcagtatctgtgaaa...agtcgaataaccatcaacccagacacatccaagaaccagttctccctgcagctgaactctgtgactcccgaggacacggctgtgtattactgtgcaagaga +>IGHV7-34-1*01 +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......actgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-34-1*02 +...ctgcagctggtgcagtctgggcct...gaggtgaagaagcctggggcctcagtgaaggtctcctataagtcttctggttacaccttc............accatctatggtatgaattgggtatgatagacccctggacagggctttgagtggatgtgatggatcatcacctac......aatgggaacccaacgtatacccacggcttcaca...ggatggtttgtcttctccatggacacgtctgtcagcacggcgtgtcttcagatcagcagcctaaaggctgaggacacggccgagtattactgtgcgaagta +>IGHV7-4-1*01 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatctgcagcctaaaggctgaggacactgccgtgtattactgtgcgaga +>IGHV7-4-1*02 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-4-1*03 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcacggcatatctgcagatcagcacgctaaaggctgaggacactg +>IGHV7-4-1*04 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtattactgtgcgagaga +>IGHV7-4-1*05 +caggtgcagctggtgcaatctgggtct...gagttgaagaagcctggggcctcagtgaaggtttcctgcaaggcttctggatacaccttc............actagctatgctatgaattgggtgcgacaggcccctggacaagggcttgagtggatgggatggatcaacaccaac......actgggaacccaacgtatgcccagggcttcaca...ggacggtttgtcttctccttggacacctctgtcagcatggcatatctgcagatcagcagcctaaaggctgaggacactgccgtgtgttactgtgcgagaga +>AIGHV7-40*03| +ttttcaatagaaaagtcaaataatcta...agtgtcaatcagtggatgattagataaaatatgatatatgtaaatcatggaatactatgc............agccagtatggtatgaattcagtgtgaccagcccctggacaagggcttgagtggatgggatggatcatcacctac......actgggaacccaacatataccaacggcttcaca...ggacggtttctattctccatggacacctctgtcagcatggcgtatctgcagatcagcagcctaaaggctgaggacacggccgtgtatgactgtatgagaga +>IGHV7-81*01 +caggtgcagctggtgcagtctggccat...gaggtgaagcagcctggggcctcagtgaaggtctcctgcaaggcttctggttacagtttc............accacctatggtatgaattgggtgccacaggcccctggacaagggcttgagtggatgggatggttcaacacctac......actgggaacccaacatatgcccagggcttcaca...ggacggtttgtcttctccatggacacctctgccagcacagcatacctgcagatcagcagcctaaaggctgaggacatggccatgtattactgtgcgagata diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/comparePDFs.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/comparePDFs.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,225 @@ +options("warn"=-1) + +#from http://selection.med.yale.edu/baseline/Archive/Baseline%20Version%201.3/Baseline_Functions_Version1.3.r +# Compute p-value of two distributions +compareTwoDistsFaster <-function(sigma_S=seq(-20,20,length.out=4001), N=10000, dens1=runif(4001,0,1), dens2=runif(4001,0,1)){ +#print(c(length(dens1),length(dens2))) +if(length(dens1)>1 & length(dens2)>1 ){ + dens1<-dens1/sum(dens1) + dens2<-dens2/sum(dens2) + cum2 <- cumsum(dens2)-dens2/2 + tmp<- sum(sapply(1:length(dens1),function(i)return(dens1[i]*cum2[i]))) + #print(tmp) + if(tmp>0.5)tmp<-tmp-1 + return( tmp ) + } + else { + return(NA) + } + #return (sum(sapply(1:N,function(i)(sample(sigma_S,1,prob=dens1)>sample(sigma_S,1,prob=dens2))))/N) +} + + +require("grid") +arg <- commandArgs(TRUE) +#arg <- c("300143","4","5") +arg[!arg=="clonal"] +input <- arg[1] +output <- arg[2] +rowIDs <- as.numeric( sapply(arg[3:(max(3,length(arg)))],function(x){ gsub("chkbx","",x) } ) ) + +numbSeqs = length(rowIDs) + +if ( is.na(rowIDs[1]) | numbSeqs>10 ) { + stop( paste("Error: Please select between one and 10 seqeunces to compare.") ) +} + +#load( paste("output/",sessionID,".RData",sep="") ) +load( input ) +#input + +xMarks = seq(-20,20,length.out=4001) + +plot_grid_s<-function(pdf1,pdf2,Sample=100,cex=1,xlim=NULL,xMarks = seq(-20,20,length.out=4001)){ + yMax = max(c(abs(as.numeric(unlist(listPDFs[pdf1]))),abs(as.numeric(unlist(listPDFs[pdf2]))),0),na.rm=T) * 1.1 + + if(length(xlim==2)){ + xMin=xlim[1] + xMax=xlim[2] + } else { + xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] + xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] + xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] + xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] + + xMin_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][1] + xMin_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][1] + xMax_CDR2 = xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["CDR"]]>0.001])] + xMax_FWR2 = xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf2][[1]][["FWR"]]>0.001])] + + xMin=min(c(xMin_CDR,xMin_FWR,xMin_CDR2,xMin_FWR2,0),na.rm=TRUE) + xMax=max(c(xMax_CDR,xMax_FWR,xMax_CDR2,xMax_FWR2,0),na.rm=TRUE) + } + + sigma<-approx(xMarks,xout=seq(xMin,xMax,length.out=Sample))$x + grid.rect(gp = gpar(col=gray(0.6),fill="white",cex=cex)) + x <- sigma + pushViewport(viewport(x=0.175,y=0.175,width=0.825,height=0.825,just=c("left","bottom"),default.units="npc")) + #pushViewport(plotViewport(c(1.8, 1.8, 0.25, 0.25)*cex)) + pushViewport(dataViewport(x, c(yMax,-yMax),gp = gpar(cex=cex),extension=c(0.05))) + grid.polygon(c(0,0,1,1),c(0,0.5,0.5,0),gp=gpar(col=grey(0.95),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.9),fill=grey(0.9)),default.units="npc") + grid.rect() + grid.xaxis(gp = gpar(cex=cex/1.1)) + yticks = pretty(c(-yMax,yMax),8) + yticks = yticks[yticks>(-yMax) & yticks<(yMax)] + grid.yaxis(at=yticks,label=abs(yticks),gp = gpar(cex=cex/1.1)) + if(length(listPDFs[pdf1][[1]][["CDR"]])>1){ + ycdr<-approx(xMarks,listPDFs[pdf1][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(ycdr,"native"),gp=gpar(col=2,lwd=2)) + } + if(length(listPDFs[pdf1][[1]][["FWR"]])>1){ + yfwr<-approx(xMarks,listPDFs[pdf1][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(-yfwr,"native"),gp=gpar(col=4,lwd=2)) + } + + if(length(listPDFs[pdf2][[1]][["CDR"]])>1){ + ycdr2<-approx(xMarks,listPDFs[pdf2][[1]][["CDR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(ycdr2,"native"),gp=gpar(col=2,lwd=2,lty=2)) + } + if(length(listPDFs[pdf2][[1]][["FWR"]])>1){ + yfwr2<-approx(xMarks,listPDFs[pdf2][[1]][["FWR"]],xout=seq(xMin,xMax,length.out=Sample),yleft=0,yright=0)$y + grid.lines(unit(x,"native"), unit(-yfwr2,"native"),gp=gpar(col=4,lwd=2,lty=2)) + } + + grid.lines(unit(c(0,1),"npc"), unit(c(0.5,0.5),"npc"),gp=gpar(col=1)) + grid.lines(unit(c(0,0),"native"), unit(c(0,1),"npc"),gp=gpar(col=1,lwd=1,lty=3)) + + grid.text("Density", x = unit(-2.5, "lines"), rot = 90,gp = gpar(cex=cex)) + grid.text( expression(paste("Selection Strength (", Sigma, ")", sep="")) , y = unit(-2.5, "lines"),gp = gpar(cex=cex)) + + if(pdf1==pdf2 & length(listPDFs[pdf2][[1]][["FWR"]])>1 & length(listPDFs[pdf2][[1]][["CDR"]])>1 ){ + pCDRFWR = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf1]][["FWR"]]) + pval = formatC(as.numeric(pCDRFWR),digits=3) + grid.text( substitute(expression(paste(P[CDR/FWR], "=", x, sep="")),list(x=pval))[[2]] , x = unit(0.02, "npc"),y = unit(0.98, "npc"),just=c("left", "top"),gp = gpar(cex=cex*1.2)) + } + grid.text(paste("CDR"), x = unit(0.98, "npc"),y = unit(0.98, "npc"),just=c("right", "top"),gp = gpar(cex=cex*1.5)) + grid.text(paste("FWR"), x = unit(0.98, "npc"),y = unit(0.02, "npc"),just=c("right", "bottom"),gp = gpar(cex=cex*1.5)) + popViewport(2) +} +#plot_grid_s(1) + + +p2col<-function(p=0.01){ + breaks=c(-.51,-0.1,-.05,-0.01,-0.005,0,0.005,0.01,0.05,0.1,0.51) + i<-findInterval(p,breaks) + cols = c( rgb(0.8,1,0.8), rgb(0.6,1,0.6), rgb(0.4,1,0.4), rgb(0.2,1,0.2) , rgb(0,1,0), + rgb(1,0,0), rgb(1,.2,.2), rgb(1,.4,.4), rgb(1,.6,.6) , rgb(1,.8,.8) ) + return(cols[i]) +} + + +plot_pvals<-function(pdf1,pdf2,cex=1,upper=TRUE){ + if(upper){ + pCDR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["CDR"]], dens2=listPDFs[[pdf2]][["FWR"]]) + pFWR1FWR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens1=listPDFs[[pdf1]][["FWR"]], dens2=listPDFs[[pdf2]][["FWR"]]) + pFWR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["FWR"]]) + pCDR1CDR2 = compareTwoDistsFaster(sigma_S=xMarks, N=10000, dens2=listPDFs[[pdf2]][["CDR"]], dens1=listPDFs[[pdf1]][["CDR"]]) + grid.polygon(c(0.5,0.5,1,1),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1FWR2),fill=p2col(pFWR1FWR2)),default.units="npc") + grid.polygon(c(0.5,0.5,1,1),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1FWR2),fill=p2col(pCDR1FWR2)),default.units="npc") + grid.polygon(c(0.5,0.5,0,0),c(1,0.5,0.5,1),gp=gpar(col=p2col(pCDR1CDR2),fill=p2col(pCDR1CDR2)),default.units="npc") + grid.polygon(c(0.5,0.5,0,0),c(0,0.5,0.5,0),gp=gpar(col=p2col(pFWR1CDR2),fill=p2col(pFWR1CDR2)),default.units="npc") + + grid.lines(c(0,1),0.5,gp=gpar(lty=2,col=gray(0.925))) + grid.lines(0.5,c(0,1),gp=gpar(lty=2,col=gray(0.925))) + + grid.text(formatC(as.numeric(pFWR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pCDR1FWR2),digits=3), x = unit(0.75, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pCDR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.75, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + grid.text(formatC(as.numeric(pFWR1CDR2),digits=3), x = unit(0.25, "npc"),y = unit(0.25, "npc"),just=c("center", "center"),gp = gpar(cex=cex)) + + + # grid.text(paste("P = ",formatC(pCDRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.98, "npc"),just=c("center", "top"),gp = gpar(cex=cex)) + # grid.text(paste("P = ",formatC(pFWRFWR,digits=3)), x = unit(0.5, "npc"),y = unit(0.02, "npc"),just=c("center", "bottom"),gp = gpar(cex=cex)) + } + else{ + } +} + + +################################################################################## +################## The whole OCD's matrix ######################################## +################################################################################## + +#pdf(width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) +pdf( output ,width=4*numbSeqs+1/3,height=4*numbSeqs+1/3) + +pushViewport(viewport(x=0.02,y=0.02,just = c("left", "bottom"),w =0.96,height=0.96,layout = grid.layout(numbSeqs+1,numbSeqs+1,widths=unit.c(unit(rep(1,numbSeqs),"null"),unit(4,"lines")),heights=unit.c(unit(4,"lines"),unit(rep(1,numbSeqs),"null"))))) + +for( seqOne in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.col = seqOne-1, layout.pos.row = 1)) + if(seqOne>2){ + grid.polygon(c(0,0,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") + grid.polygon(c(1,1,0.5,0.5),c(0,0.5,0.5,0),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0,1,1),c(1,0.5,0.5,1),gp=gpar(col=grey(0.5)),default.units="npc") + + grid.text(y=.25,x=0.75,"FWR",gp = gpar(cex=1.5),just="center") + grid.text(y=.25,x=0.25,"CDR",gp = gpar(cex=1.5),just="center") + } + grid.rect(gp = gpar(col=grey(0.9))) + grid.text(y=.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),just="center") + popViewport(1) +} + +for( seqOne in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.row = seqOne, layout.pos.col = numbSeqs+1)) + if(seqOne<=numbSeqs){ + grid.polygon(c(0,0.5,0.5,0),c(0,0,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.95)),default.units="npc") + grid.polygon(c(0,0.5,0.5,0),c(1,1,0.5,0.5),gp=gpar(col=grey(0.5),fill=grey(0.9)),default.units="npc") + grid.polygon(c(1,0.5,0.5,1),c(0,0,1,1),gp=gpar(col=grey(0.5)),default.units="npc") + grid.text(x=.25,y=0.75,"CDR",gp = gpar(cex=1.5),just="center",rot=270) + grid.text(x=.25,y=0.25,"FWR",gp = gpar(cex=1.5),just="center",rot=270) + } + grid.rect(gp = gpar(col=grey(0.9))) + grid.text(x=0.75,substr(paste(names(listPDFs)[rowIDs[seqOne-1]]),1,16),gp = gpar(cex=2),rot=270,just="center") + popViewport(1) +} + +for( seqOne in 1:numbSeqs+1){ + for(seqTwo in 1:numbSeqs+1){ + pushViewport(viewport(layout.pos.col = seqTwo-1, layout.pos.row = seqOne)) + if(seqTwo>seqOne){ + plot_pvals(rowIDs[seqOne-1],rowIDs[seqTwo-1],cex=2) + grid.rect() + } + popViewport(1) + } +} + + +xMin=0 +xMax=0.01 +for(pdf1 in rowIDs){ + xMin_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][1] + xMin_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][1] + xMax_CDR = xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["CDR"]]>0.001])] + xMax_FWR = xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001][length(xMarks[listPDFs[pdf1][[1]][["FWR"]]>0.001])] + xMin=min(c(xMin_CDR,xMin_FWR,xMin),na.rm=TRUE) + xMax=max(c(xMax_CDR,xMax_FWR,xMax),na.rm=TRUE) +} + + + +for(i in 1:numbSeqs+1){ + for(j in (i-1):numbSeqs){ + pushViewport(viewport(layout.pos.col = i-1, layout.pos.row = j+1)) + grid.rect() + plot_grid_s(rowIDs[i-1],rowIDs[j],cex=1) + popViewport(1) + } +} + +dev.off() + +cat("Success", paste(rowIDs,collapse="_"),sep=":") + diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/filter.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/filter.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,35 @@ +arg = commandArgs(TRUE) +summaryfile = arg[1] +gappedfile = arg[2] +selection = arg[3] +output = arg[4] +print(paste("selection = ", selection)) + + +summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) +gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) + +#dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) + +dat = cbind(gappeddat, summarydat$AA.JUNCTION) + +colnames(dat)[length(dat)] = "AA.JUNCTION" + +dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) +dat$VGene = gsub("[*].*", "", dat$VGene) + +dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) +dat$DGene = gsub("[*].*", "", dat$DGene) + +dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) +dat$JGene = gsub("[*].*", "", dat$JGene) + +#print(str(dat)) + +dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) + +dat = dat[!duplicated(dat$past), ] + +dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",] + +write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/script_imgt.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/script_imgt.py Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,79 @@ +#import xlrd #avoid dep +import argparse +import re + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") +parser.add_argument("--ref", help="Reference file") +parser.add_argument("--output", help="Output file") +parser.add_argument("--id", help="ID to be used at the '>>>' line in the output") + +args = parser.parse_args() + +refdic = dict() +with open(args.ref, 'r') as ref: + currentSeq = "" + currentId = "" + for line in ref: + if line[0] is ">": + if currentSeq is not "" and currentId is not "": + refdic[currentId[1:]] = currentSeq + currentId = line.rstrip() + currentSeq = "" + else: + currentSeq += line.rstrip() + refdic[currentId[1:]] = currentSeq + + +vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#, +# r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", +# r"(IGKV[0-3]D?-[0-9]{1,2})", +# r"(IGLV[0-9]-[0-9]{1,2})", +# r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)", +# r"(TRGV[234589])", +# r"(TRDV[1-3])"] + +#vPattern = re.compile(r"|".join(vPattern)) +vPattern = re.compile("|".join(vPattern)) + +def filterGene(s, pattern): + if type(s) is not str: + return None + res = pattern.search(s) + if res: + return res.group(0) + return None + + + +currentSeq = "" +currentId = "" +first=True +with open(args.input, 'r') as i: + with open(args.output, 'a') as o: + o.write(">>>" + args.id + "\n") + outputdic = dict() + for line in i: + if first: + first = False + continue + linesplt = line.split("\t") + ref = filterGene(linesplt[1], vPattern) + if not ref or not linesplt[2].rstrip(): + continue + if ref in outputdic: + outputdic[ref] += [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] + else: + outputdic[ref] = [(linesplt[0].replace(">", ""), linesplt[2].replace(">", "").rstrip())] + #print outputdic + + for k in outputdic.keys(): + if k in refdic: + o.write(">>" + k + "\n") + o.write(refdic[k] + "\n") + for seq in outputdic[k]: + #print seq + o.write(">" + seq[0] + "\n") + o.write(seq[1] + "\n") + else: + print k + " not in reference, skipping " + k diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/script_xlsx.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/script_xlsx.py Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,58 @@ +import xlrd +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Excel input file containing one or more sheets where column G has the gene annotation, H has the sequence id and J has the sequence") +parser.add_argument("--ref", help="Reference file") +parser.add_argument("--output", help="Output file") + +args = parser.parse_args() + +gene_column = 6 +id_column = 7 +seq_column = 8 +LETTERS = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"] + + +refdic = dict() +with open(args.ref, 'r') as ref: + currentSeq = "" + currentId = "" + for line in ref.readlines(): + if line[0] is ">": + if currentSeq is not "" and currentId is not "": + refdic[currentId[1:]] = currentSeq + currentId = line.rstrip() + currentSeq = "" + else: + currentSeq += line.rstrip() + refdic[currentId[1:]] = currentSeq + +currentSeq = "" +currentId = "" +with xlrd.open_workbook(args.input, 'r') as wb: + with open(args.output, 'a') as o: + for sheet in wb.sheets(): + if sheet.cell(1,gene_column).value.find("IGHV") < 0: + print "Genes not in column " + LETTERS[gene_column] + ", skipping sheet " + sheet.name + continue + o.write(">>>" + sheet.name + "\n") + outputdic = dict() + for rowindex in range(1, sheet.nrows): + ref = sheet.cell(rowindex, gene_column).value.replace(">", "") + if ref in outputdic: + outputdic[ref] += [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] + else: + outputdic[ref] = [(sheet.cell(rowindex, id_column).value.replace(">", ""), sheet.cell(rowindex, seq_column).value)] + #print outputdic + + for k in outputdic.keys(): + if k in refdic: + o.write(">>" + k + "\n") + o.write(refdic[k] + "\n") + for seq in outputdic[k]: + #print seq + o.write(">" + seq[0] + "\n") + o.write(seq[1] + "\n") + else: + print k + " not in reference, skipping " + k diff -r beaa487ecf43 -r 5ffd52fc35c4 baseline/wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/wrapper.sh Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,104 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +testID=$1 +species=$2 +substitutionModel=$3 +mutabilityModel=$4 +clonal=$5 +fixIndels=$6 +region=$7 +inputs=$8 +inputs=($inputs) +IDs=$9 +IDs=($IDs) +ref=${10} +output=${11} +selection=${12} +output_table=${13} +outID="result" + +echo "$PWD" + +echo "testID = $testID" +echo "species = $species" +echo "substitutionModel = $substitutionModel" +echo "mutabilityModel = $mutabilityModel" +echo "clonal = $clonal" +echo "fixIndels = $fixIndels" +echo "region = $region" +echo "inputs = ${inputs[@]}" +echo "IDs = ${IDs[@]}" +echo "ref = $ref" +echo "output = $output" +echo "outID = $outID" + +fasta="$PWD/baseline.fasta" + + +count=0 +for current in ${inputs[@]} +do + f=$(file $current) + zipType="Zip archive" + if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"XZ compressed data"* ]] + then + id=${IDs[$count]} + echo "id=$id" + if [[ "$f" == *"Zip archive"* ]] ; then + echo "Zip archive" + echo "unzip $input -d $PWD/files/" + unzip $current -d "$PWD/$id/" + elif [[ "$f" == *"XZ compressed data"* ]] ; then + echo "ZX archive" + echo "tar -xJf $input -C $PWD/files/" + mkdir -p "$PWD/$id/files" + tar -xJf $current -C "$PWD/$id/files/" + fi + summaryfile="$PWD/summary_${id}.txt" + gappedfile="$PWD/gappednt_${id}.txt" + filtered="$PWD/filtered_${id}.txt" + filecount=`ls -l $PWD/$id/ | wc -l` + if [[ "$filecount" -eq "2" ]] + then + cat $PWD/$id/*/1_* > $summaryfile + cat $PWD/$id/*/2_* > $gappedfile + else + cat $PWD/$id/1_* > $summaryfile + cat $PWD/$id/2_* > $gappedfile + fi + Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1 + + final="$PWD/final_${id}.txt" + cat $filtered | cut -f2,4,7 > $final + python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id + else + python $dir/script_xlsx.py --input $current --ref $ref --output $fasta + fi + count=$((count+1)) +done + +if [[ $(wc -l < $fasta) -eq "1" ]]; then + echo "No sequences in the fasta file, exiting" + exit 0 +fi + +workdir="$PWD" +cd $dir +echo "file: ${inputs[0]}" +#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1 +Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1 + +echo "$workdir/${outID}.txt" + +rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` +rows=($rows) +#unset rows[${#rows[@]}-1] + +cd $dir +Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1 +cp $workdir/result.txt ${output_table} + + + + diff -r beaa487ecf43 -r 5ffd52fc35c4 change_o/DefineClones.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/DefineClones.py Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,1052 @@ +#!/usr/bin/env python3 +""" +Assign Ig sequences into clones +""" +# Info +__author__ = 'Namita Gupta, Jason Anthony Vander Heiden, Gur Yaari, Mohamed Uduman' +from changeo import __version__, __date__ + +# Imports +import os +import re +import sys +import numpy as np +from argparse import ArgumentParser +from collections import OrderedDict +from itertools import chain +from textwrap import dedent +from time import time +from Bio import pairwise2 +from Bio.Seq import translate + +# Presto and changeo imports +from presto.Defaults import default_out_args +from presto.IO import getFileType, getOutputHandle, printLog, printProgress +from presto.Multiprocessing import manageProcesses +from presto.Sequence import getDNAScoreDict +from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs +from changeo.Distance import getDNADistMatrix, getAADistMatrix, \ + hs1f_model, m1n_model, hs5f_model, \ + calcDistances, formClusters +from changeo.IO import getDbWriter, readDbFile, countDbFile +from changeo.Multiprocessing import DbData, DbResult + +# Defaults +default_translate = False +default_distance = 0.0 +default_bygroup_model = 'hs1f' +default_hclust_model = 'chen2010' +default_seq_field = 'JUNCTION' +default_norm = 'len' +default_sym = 'avg' +default_linkage = 'single' + +# TODO: should be in Distance, but need to be after function definitions +# Amino acid Hamming distance +aa_model = getAADistMatrix(mask_dist=1, gap_dist=0) + +# DNA Hamming distance +ham_model = getDNADistMatrix(mask_dist=0, gap_dist=0) + + +# TODO: this function is an abstraction to facilitate later cleanup +def getModelMatrix(model): + """ + Simple wrapper to get distance matrix from model name + + Arguments: + model = model name + + Return: + a pandas.DataFrame containing the character distance matrix + """ + if model == 'aa': + return(aa_model) + elif model == 'ham': + return(ham_model) + elif model == 'm1n': + return(m1n_model) + elif model == 'hs1f': + return(hs1f_model) + elif model == 'hs5f': + return(hs5f_model) + else: + sys.stderr.write('Unrecognized distance model: %s.\n' % model) + + +def indexJunctions(db_iter, fields=None, mode='gene', action='first'): + """ + Identifies preclonal groups by V, J and junction length + + Arguments: + db_iter = an iterator of IgRecords defined by readDbFile + fields = additional annotation fields to use to group preclones; + if None use only V, J and junction length + mode = specificity of alignment call to use for assigning preclones; + one of ('allele', 'gene') + action = how to handle multiple value fields when assigning preclones; + one of ('first', 'set') + + Returns: + a dictionary of {(V, J, junction length):[IgRecords]} + """ + # Define functions for grouping keys + if mode == 'allele' and fields is None: + def _get_key(rec, act): + return (rec.getVAllele(act), rec.getJAllele(act), + None if rec.junction is None else len(rec.junction)) + elif mode == 'gene' and fields is None: + def _get_key(rec, act): + return (rec.getVGene(act), rec.getJGene(act), + None if rec.junction is None else len(rec.junction)) + elif mode == 'allele' and fields is not None: + def _get_key(rec, act): + vdj = [rec.getVAllele(act), rec.getJAllele(act), + None if rec.junction is None else len(rec.junction)] + ann = [rec.toDict().get(k, None) for k in fields] + return tuple(chain(vdj, ann)) + elif mode == 'gene' and fields is not None: + def _get_key(rec, act): + vdj = [rec.getVGene(act), rec.getJGene(act), + None if rec.junction is None else len(rec.junction)] + ann = [rec.toDict().get(k, None) for k in fields] + return tuple(chain(vdj, ann)) + + start_time = time() + clone_index = {} + rec_count = 0 + for rec in db_iter: + key = _get_key(rec, action) + + # Print progress + if rec_count == 0: + print('PROGRESS> Grouping sequences') + + printProgress(rec_count, step=1000, start_time=start_time) + rec_count += 1 + + # Assigned passed preclone records to key and failed to index None + if all([k is not None and k != '' for k in key]): + #print key + # TODO: Has much slow. Should have less slow. + if action == 'set': + + f_range = list(range(2, 3 + (len(fields) if fields else 0))) + vdj_range = list(range(2)) + + # Check for any keys that have matching columns and junction length and overlapping genes/alleles + to_remove = [] + if len(clone_index) > (1 if None in clone_index else 0) and key not in clone_index: + key = list(key) + for k in clone_index: + if k is not None and all([key[i] == k[i] for i in f_range]): + if all([not set(key[i]).isdisjoint(set(k[i])) for i in vdj_range]): + for i in vdj_range: key[i] = tuple(set(key[i]).union(set(k[i]))) + to_remove.append(k) + + # Remove original keys, replace with union of all genes/alleles and append values to new key + val = [rec] + val += list(chain(*(clone_index.pop(k) for k in to_remove))) + clone_index[tuple(key)] = clone_index.get(tuple(key),[]) + val + + elif action == 'first': + clone_index.setdefault(key, []).append(rec) + else: + clone_index.setdefault(None, []).append(rec) + + printProgress(rec_count, step=1000, start_time=start_time, end=True) + + return clone_index + + +def distanceClones(records, model=default_bygroup_model, distance=default_distance, + dist_mat=None, norm=default_norm, sym=default_sym, + linkage=default_linkage, seq_field=default_seq_field): + """ + Separates a set of IgRecords into clones + + Arguments: + records = an iterator of IgRecords + model = substitution model used to calculate distance + distance = the distance threshold to assign clonal groups + dist_mat = pandas DataFrame of pairwise nucleotide or amino acid distances + norm = normalization method + sym = symmetry method + linkage = type of linkage + seq_field = sequence field used to calculate distance between records + + Returns: + a dictionary of lists defining {clone number: [IgRecords clonal group]} + """ + # Get distance matrix if not provided + if dist_mat is None: dist_mat = getModelMatrix(model) + + # Determine length of n-mers + if model in ['hs1f', 'm1n', 'aa', 'ham']: + nmer_len = 1 + elif model in ['hs5f']: + nmer_len = 5 + else: + sys.stderr.write('Unrecognized distance model: %s.\n' % model) + + # Define unique junction mapping + seq_map = {} + for ig in records: + seq = ig.getSeqField(seq_field) + # Check if sequence length is 0 + if len(seq) == 0: + return None + + seq = re.sub('[\.-]','N', str(seq)) + if model == 'aa': seq = translate(seq) + + seq_map.setdefault(seq, []).append(ig) + + # Process records + if len(seq_map) == 1: + return {1:records} + + # Define sequences + seqs = list(seq_map.keys()) + + # Calculate pairwise distance matrix + dists = calcDistances(seqs, nmer_len, dist_mat, norm, sym) + + # Perform hierarchical clustering + clusters = formClusters(dists, linkage, distance) + + # Turn clusters into clone dictionary + clone_dict = {} + for i, c in enumerate(clusters): + clone_dict.setdefault(c, []).extend(seq_map[seqs[i]]) + + return clone_dict + + +def distChen2010(records): + """ + Calculate pairwise distances as defined in Chen 2010 + + Arguments: + records = list of IgRecords where first is query to be compared to others in list + + Returns: + list of distances + """ + # Pull out query sequence and V/J information + query = records.popitem(last=False) + query_cdr3 = query.junction[3:-3] + query_v_allele = query.getVAllele() + query_v_gene = query.getVGene() + query_v_family = query.getVFamily() + query_j_allele = query.getJAllele() + query_j_gene = query.getJGene() + # Create alignment scoring dictionary + score_dict = getDNAScoreDict() + + scores = [0]*len(records) + for i in range(len(records)): + ld = pairwise2.align.globalds(query_cdr3, records[i].junction[3:-3], + score_dict, -1, -1, one_alignment_only=True) + # Check V similarity + if records[i].getVAllele() == query_v_allele: ld += 0 + elif records[i].getVGene() == query_v_gene: ld += 1 + elif records[i].getVFamily() == query_v_family: ld += 3 + else: ld += 5 + # Check J similarity + if records[i].getJAllele() == query_j_allele: ld += 0 + elif records[i].getJGene() == query_j_gene: ld += 1 + else: ld += 3 + # Divide by length + scores[i] = ld/max(len(records[i].junction[3:-3]), query_cdr3) + + return scores + + +def distAdemokun2011(records): + """ + Calculate pairwise distances as defined in Ademokun 2011 + + Arguments: + records = list of IgRecords where first is query to be compared to others in list + + Returns: + list of distances + """ + # Pull out query sequence and V family information + query = records.popitem(last=False) + query_cdr3 = query.junction[3:-3] + query_v_family = query.getVFamily() + # Create alignment scoring dictionary + score_dict = getDNAScoreDict() + + scores = [0]*len(records) + for i in range(len(records)): + + if abs(len(query_cdr3) - len(records[i].junction[3:-3])) > 10: + scores[i] = 1 + elif query_v_family != records[i].getVFamily(): + scores[i] = 1 + else: + ld = pairwise2.align.globalds(query_cdr3, records[i].junction[3:-3], + score_dict, -1, -1, one_alignment_only=True) + scores[i] = ld/min(len(records[i].junction[3:-3]), query_cdr3) + + return scores + + +def hierClust(dist_mat, method='chen2010'): + """ + Calculate hierarchical clustering + + Arguments: + dist_mat = square-formed distance matrix of pairwise CDR3 comparisons + + Returns: + list of cluster ids + """ + if method == 'chen2010': + clusters = formClusters(dist_mat, 'average', 0.32) + elif method == 'ademokun2011': + clusters = formClusters(dist_mat, 'complete', 0.25) + else: clusters = np.ones(dist_mat.shape[0]) + + return clusters + +# TODO: Merge duplicate feed, process and collect functions. +def feedQueue(alive, data_queue, db_file, group_func, group_args={}): + """ + Feeds the data queue with Ig records + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue to hold data for processing + db_file = the Ig record database file + group_func = the function to use for assigning preclones + group_args = a dictionary of arguments to pass to group_func + + Returns: + None + """ + # Open input file and perform grouping + try: + # Iterate over Ig records and assign groups + db_iter = readDbFile(db_file) + clone_dict = group_func(db_iter, **group_args) + except: + #sys.stderr.write('Exception in feeder grouping step\n') + alive.value = False + raise + + # Add groups to data queue + try: + #print 'START FEED', alive.value + # Iterate over groups and feed data queue + clone_iter = iter(clone_dict.items()) + while alive.value: + # Get data from queue + if data_queue.full(): continue + else: data = next(clone_iter, None) + # Exit upon reaching end of iterator + if data is None: break + #print "FEED", alive.value, k + + # Feed queue + data_queue.put(DbData(*data)) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in feeder queue feeding step\n') + alive.value = False + raise + + return None + + +def feedQueueClust(alive, data_queue, db_file, group_func=None, group_args={}): + """ + Feeds the data queue with Ig records + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue to hold data for processing + db_file = the Ig record database file + + Returns: + None + """ + # Open input file and perform grouping + try: + # Iterate over Ig records and order by junction length + records = {} + db_iter = readDbFile(db_file) + for rec in db_iter: + records[rec.id] = rec + records = OrderedDict(sorted(list(records.items()), key=lambda i: i[1].junction_length)) + dist_dict = {} + for __ in range(len(records)): + k,v = records.popitem(last=False) + dist_dict[k] = [v].append(list(records.values())) + except: + #sys.stderr.write('Exception in feeder grouping step\n') + alive.value = False + raise + + # Add groups to data queue + try: + # print 'START FEED', alive.value + # Iterate over groups and feed data queue + dist_iter = iter(dist_dict.items()) + while alive.value: + # Get data from queue + if data_queue.full(): continue + else: data = next(dist_iter, None) + # Exit upon reaching end of iterator + if data is None: break + #print "FEED", alive.value, k + + # Feed queue + data_queue.put(DbData(*data)) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in feeder queue feeding step\n') + alive.value = False + raise + + return None + + +def processQueue(alive, data_queue, result_queue, clone_func, clone_args): + """ + Pulls from data queue, performs calculations, and feeds results queue + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue holding data to process + result_queue = a multiprocessing.Queue to hold processed results + clone_func = the function to call for clonal assignment + clone_args = a dictionary of arguments to pass to clone_func + + Returns: + None + """ + try: + # Iterator over data queue until sentinel object reached + while alive.value: + # Get data from queue + if data_queue.empty(): continue + else: data = data_queue.get() + # Exit upon reaching sentinel + if data is None: break + + # Define result object for iteration and get data records + records = data.data + result = DbResult(data.id, records) + + # Check for invalid data (due to failed indexing) and add failed result + if not data: + result_queue.put(result) + continue + + # Add V(D)J to log + result.log['ID'] = ','.join([str(x) for x in data.id]) + result.log['VALLELE'] = ','.join(set([(r.getVAllele() or '') for r in records])) + result.log['DALLELE'] = ','.join(set([(r.getDAllele() or '') for r in records])) + result.log['JALLELE'] = ','.join(set([(r.getJAllele() or '') for r in records])) + result.log['JUNCLEN'] = ','.join(set([(str(len(r.junction)) or '0') for r in records])) + result.log['SEQUENCES'] = len(records) + + # Checking for preclone failure and assign clones + clones = clone_func(records, **clone_args) if data else None + + # import cProfile + # prof = cProfile.Profile() + # clones = prof.runcall(clone_func, records, **clone_args) + # prof.dump_stats('worker-%d.prof' % os.getpid()) + + if clones is not None: + result.results = clones + result.valid = True + result.log['CLONES'] = len(clones) + else: + result.log['CLONES'] = 0 + + # Feed results to result queue + result_queue.put(result) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in worker\n') + alive.value = False + raise + + return None + + +def processQueueClust(alive, data_queue, result_queue, clone_func, clone_args): + """ + Pulls from data queue, performs calculations, and feeds results queue + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + data_queue = a multiprocessing.Queue holding data to process + result_queue = a multiprocessing.Queue to hold processed results + clone_func = the function to call for calculating pairwise distances between sequences + clone_args = a dictionary of arguments to pass to clone_func + + Returns: + None + """ + + try: + # print 'START WORK', alive.value + # Iterator over data queue until sentinel object reached + while alive.value: + # Get data from queue + if data_queue.empty(): continue + else: data = data_queue.get() + # Exit upon reaching sentinel + if data is None: break + # print "WORK", alive.value, data['id'] + + # Define result object for iteration and get data records + records = data.data + result = DbResult(data.id, records) + + # Create row of distance matrix and check for error + dist_row = clone_func(records, **clone_args) if data else None + if dist_row is not None: + result.results = dist_row + result.valid = True + + # Feed results to result queue + result_queue.put(result) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + except: + #sys.stderr.write('Exception in worker\n') + alive.value = False + raise + + return None + + +def collectQueue(alive, result_queue, collect_queue, db_file, out_args, cluster_func=None, cluster_args={}): + """ + Assembles results from a queue of individual sequence results and manages log/file I/O + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + result_queue = a multiprocessing.Queue holding processQueue results + collect_queue = a multiprocessing.Queue to store collector return values + db_file = the input database file name + out_args = common output argument dictionary from parseCommonArgs + cluster_func = the function to call for carrying out clustering on distance matrix + cluster_args = a dictionary of arguments to pass to cluster_func + + Returns: + None + (adds 'log' and 'out_files' to collect_dict) + """ + # Open output files + try: + # Count records and define output format + out_type = getFileType(db_file) if out_args['out_type'] is None \ + else out_args['out_type'] + result_count = countDbFile(db_file) + + # Defined successful output handle + pass_handle = getOutputHandle(db_file, + out_label='clone-pass', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + pass_writer = getDbWriter(pass_handle, db_file, add_fields='CLONE') + + # Defined failed alignment output handle + if out_args['failed']: + fail_handle = getOutputHandle(db_file, + out_label='clone-fail', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + fail_writer = getDbWriter(fail_handle, db_file) + else: + fail_handle = None + fail_writer = None + + # Define log handle + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + except: + #sys.stderr.write('Exception in collector file opening step\n') + alive.value = False + raise + + # Get results from queue and write to files + try: + #print 'START COLLECT', alive.value + # Iterator over results queue until sentinel object reached + start_time = time() + rec_count = clone_count = pass_count = fail_count = 0 + while alive.value: + # Get result from queue + if result_queue.empty(): continue + else: result = result_queue.get() + # Exit upon reaching sentinel + if result is None: break + #print "COLLECT", alive.value, result['id'] + + # Print progress for previous iteration and update record count + if rec_count == 0: + print('PROGRESS> Assigning clones') + printProgress(rec_count, result_count, 0.05, start_time) + rec_count += len(result.data) + + # Write passed and failed records + if result: + for clone in result.results.values(): + clone_count += 1 + for i, rec in enumerate(clone): + rec.annotations['CLONE'] = clone_count + pass_writer.writerow(rec.toDict()) + pass_count += 1 + result.log['CLONE%i-%i' % (clone_count, i + 1)] = str(rec.junction) + + else: + for i, rec in enumerate(result.data): + if fail_writer is not None: fail_writer.writerow(rec.toDict()) + fail_count += 1 + result.log['CLONE0-%i' % (i + 1)] = str(rec.junction) + + # Write log + printLog(result.log, handle=log_handle) + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + + # Print total counts + printProgress(rec_count, result_count, 0.05, start_time) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + # Update return list + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['CLONES'] = clone_count + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + collect_dict = {'log':log, 'out_files': [pass_handle.name]} + collect_queue.put(collect_dict) + except: + #sys.stderr.write('Exception in collector result processing step\n') + alive.value = False + raise + + return None + + +def collectQueueClust(alive, result_queue, collect_queue, db_file, out_args, cluster_func, cluster_args): + """ + Assembles results from a queue of individual sequence results and manages log/file I/O + + Arguments: + alive = a multiprocessing.Value boolean controlling whether processing continues + if False exit process + result_queue = a multiprocessing.Queue holding processQueue results + collect_queue = a multiprocessing.Queue to store collector return values + db_file = the input database file name + out_args = common output argument dictionary from parseCommonArgs + cluster_func = the function to call for carrying out clustering on distance matrix + cluster_args = a dictionary of arguments to pass to cluster_func + + Returns: + None + (adds 'log' and 'out_files' to collect_dict) + """ + # Open output files + try: + + # Iterate over Ig records to count and order by junction length + result_count = 0 + records = {} + # print 'Reading file...' + db_iter = readDbFile(db_file) + for rec in db_iter: + records[rec.id] = rec + result_count += 1 + records = OrderedDict(sorted(list(records.items()), key=lambda i: i[1].junction_length)) + + # Define empty matrix to store assembled results + dist_mat = np.zeros((result_count,result_count)) + + # Count records and define output format + out_type = getFileType(db_file) if out_args['out_type'] is None \ + else out_args['out_type'] + + # Defined successful output handle + pass_handle = getOutputHandle(db_file, + out_label='clone-pass', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + pass_writer = getDbWriter(pass_handle, db_file, add_fields='CLONE') + + # Defined failed cloning output handle + if out_args['failed']: + fail_handle = getOutputHandle(db_file, + out_label='clone-fail', + out_dir=out_args['out_dir'], + out_name=out_args['out_name'], + out_type=out_type) + fail_writer = getDbWriter(fail_handle, db_file) + else: + fail_handle = None + fail_writer = None + + # Open log file + if out_args['log_file'] is None: + log_handle = None + else: + log_handle = open(out_args['log_file'], 'w') + except: + alive.value = False + raise + + try: + # Iterator over results queue until sentinel object reached + start_time = time() + row_count = rec_count = 0 + while alive.value: + # Get result from queue + if result_queue.empty(): continue + else: result = result_queue.get() + # Exit upon reaching sentinel + if result is None: break + + # Print progress for previous iteration + if row_count == 0: + print('PROGRESS> Assigning clones') + printProgress(row_count, result_count, 0.05, start_time) + + # Update counts for iteration + row_count += 1 + rec_count += len(result) + + # Add result row to distance matrix + if result: + dist_mat[list(range(result_count-len(result),result_count)),result_count-len(result)] = result.results + + else: + sys.stderr.write('PID %s: Error in sibling process detected. Cleaning up.\n' \ + % os.getpid()) + return None + + # Calculate linkage and carry out clustering + # print dist_mat + clusters = cluster_func(dist_mat, **cluster_args) if dist_mat is not None else None + clones = {} + # print clusters + for i, c in enumerate(clusters): + clones.setdefault(c, []).append(records[list(records.keys())[i]]) + + # Write passed and failed records + clone_count = pass_count = fail_count = 0 + if clones: + for clone in clones.values(): + clone_count += 1 + for i, rec in enumerate(clone): + rec.annotations['CLONE'] = clone_count + pass_writer.writerow(rec.toDict()) + pass_count += 1 + #result.log['CLONE%i-%i' % (clone_count, i + 1)] = str(rec.junction) + + else: + for i, rec in enumerate(result.data): + fail_writer.writerow(rec.toDict()) + fail_count += 1 + #result.log['CLONE0-%i' % (i + 1)] = str(rec.junction) + + # Print final progress + printProgress(row_count, result_count, 0.05, start_time) + + # Close file handles + pass_handle.close() + if fail_handle is not None: fail_handle.close() + if log_handle is not None: log_handle.close() + + # Update return list + log = OrderedDict() + log['OUTPUT'] = os.path.basename(pass_handle.name) + log['CLONES'] = clone_count + log['RECORDS'] = rec_count + log['PASS'] = pass_count + log['FAIL'] = fail_count + collect_dict = {'log':log, 'out_files': [pass_handle.name]} + collect_queue.put(collect_dict) + except: + alive.value = False + raise + + return None + + +def defineClones(db_file, feed_func, work_func, collect_func, clone_func, cluster_func=None, + group_func=None, group_args={}, clone_args={}, cluster_args={}, + out_args=default_out_args, nproc=None, queue_size=None): + """ + Define clonally related sequences + + Arguments: + db_file = filename of input database + feed_func = the function that feeds the queue + work_func = the worker function that will run on each CPU + collect_func = the function that collects results from the workers + group_func = the function to use for assigning preclones + clone_func = the function to use for determining clones within preclonal groups + group_args = a dictionary of arguments to pass to group_func + clone_args = a dictionary of arguments to pass to clone_func + out_args = common output argument dictionary from parseCommonArgs + nproc = the number of processQueue processes; + if None defaults to the number of CPUs + queue_size = maximum size of the argument queue; + if None defaults to 2*nproc + + Returns: + a list of successful output file names + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'DefineClones' + log['DB_FILE'] = os.path.basename(db_file) + if group_func is not None: + log['GROUP_FUNC'] = group_func.__name__ + log['GROUP_ARGS'] = group_args + log['CLONE_FUNC'] = clone_func.__name__ + + # TODO: this is yucky, but can be fixed by using a model class + clone_log = clone_args.copy() + if 'dist_mat' in clone_log: del clone_log['dist_mat'] + log['CLONE_ARGS'] = clone_log + + if cluster_func is not None: + log['CLUSTER_FUNC'] = cluster_func.__name__ + log['CLUSTER_ARGS'] = cluster_args + log['NPROC'] = nproc + printLog(log) + + # Define feeder function and arguments + feed_args = {'db_file': db_file, + 'group_func': group_func, + 'group_args': group_args} + # Define worker function and arguments + work_args = {'clone_func': clone_func, + 'clone_args': clone_args} + # Define collector function and arguments + collect_args = {'db_file': db_file, + 'out_args': out_args, + 'cluster_func': cluster_func, + 'cluster_args': cluster_args} + + # Call process manager + result = manageProcesses(feed_func, work_func, collect_func, + feed_args, work_args, collect_args, + nproc, queue_size) + + # Print log + result['log']['END'] = 'DefineClones' + printLog(result['log']) + + return result['out_files'] + + +def getArgParser(): + """ + Defines the ArgumentParser + + Arguments: + None + + Returns: + an ArgumentParser object + """ + # Define input and output fields + fields = dedent( + ''' + output files: + clone-pass + database with assigned clonal group numbers. + clone-fail + database with records failing clonal grouping. + + required fields: + SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION_LENGTH + + + sequence field specified by the --sf parameter + + output fields: + CLONE + ''') + + # Define ArgumentParser + parser = ArgumentParser(description=__doc__, epilog=fields, + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='', + help='Cloning method') + # TODO: This is a temporary fix for Python issue 9253 + subparsers.required = True + + # Parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, + multiproc=True) + + # Distance cloning method + parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='''Defines clones as having same V assignment, + J assignment, and junction length with + specified substitution distance model.''') + parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None, + help='Additional fields to use for grouping clones (non VDJ)') + parser_bygroup.add_argument('--mode', action='store', dest='mode', + choices=('allele', 'gene'), default='gene', + help='''Specifies whether to use the V(D)J allele or gene for + initial grouping.''') + parser_bygroup.add_argument('--act', action='store', dest='action', default='set', + choices=('first', 'set'), + help='''Specifies how to handle multiple V(D)J assignments + for initial grouping.''') + parser_bygroup.add_argument('--model', action='store', dest='model', + choices=('aa', 'ham', 'm1n', 'hs1f', 'hs5f'), + default=default_bygroup_model, + help='''Specifies which substitution model to use for + calculating distance between sequences. Where m1n is the + mouse single nucleotide transition/trasversion model + of Smith et al, 1996; hs1f is the human single + nucleotide model derived from Yaari et al, 2013; hs5f + is the human S5F model of Yaari et al, 2013; ham is + nucleotide Hamming distance; and aa is amino acid + Hamming distance. The hs5f data should be + considered experimental.''') + parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, + default=default_distance, + help='The distance threshold for clonal grouping') + parser_bygroup.add_argument('--norm', action='store', dest='norm', + choices=('len', 'mut', 'none'), default=default_norm, + help='''Specifies how to normalize distances. One of none + (do not normalize), len (normalize by length), + or mut (normalize by number of mutations between sequences).''') + parser_bygroup.add_argument('--sym', action='store', dest='sym', + choices=('avg', 'min'), default=default_sym, + help='''Specifies how to combine asymmetric distances. One of avg + (average of A->B and B->A) or min (minimum of A->B and B->A).''') + parser_bygroup.add_argument('--link', action='store', dest='linkage', + choices=('single', 'average', 'complete'), default=default_linkage, + help='''Type of linkage to use for hierarchical clustering.''') + parser_bygroup.add_argument('--sf', action='store', dest='seq_field', + default=default_seq_field, + help='''The name of the field to be used to calculate + distance between records''') + parser_bygroup.set_defaults(feed_func=feedQueue) + parser_bygroup.set_defaults(work_func=processQueue) + parser_bygroup.set_defaults(collect_func=collectQueue) + parser_bygroup.set_defaults(group_func=indexJunctions) + parser_bygroup.set_defaults(clone_func=distanceClones) + + + # Hierarchical clustering cloning method + parser_hclust = subparsers.add_parser('hclust', parents=[parser_parent], + formatter_class=CommonHelpFormatter, + help='Defines clones by specified distance metric on CDR3s and \ + cutting of hierarchical clustering tree') +# parser_hclust.add_argument('-f', nargs='+', action='store', dest='fields', default=None, +# help='Fields to use for grouping clones (non VDJ)') + parser_hclust.add_argument('--method', action='store', dest='method', + choices=('chen2010', 'ademokun2011'), default=default_hclust_model, + help='Specifies which cloning method to use for calculating distance \ + between CDR3s, computing linkage, and cutting clusters') + parser_hclust.set_defaults(feed_func=feedQueueClust) + parser_hclust.set_defaults(work_func=processQueueClust) + parser_hclust.set_defaults(collect_func=collectQueueClust) + parser_hclust.set_defaults(cluster_func=hierClust) + + return parser + + +if __name__ == '__main__': + """ + Parses command line arguments and calls main function + """ + # Parse arguments + parser = getArgParser() + args = parser.parse_args() + args_dict = parseCommonArgs(args) + # Convert case of fields + if 'seq_field' in args_dict: + args_dict['seq_field'] = args_dict['seq_field'].upper() + if 'fields' in args_dict and args_dict['fields'] is not None: + args_dict['fields'] = [f.upper() for f in args_dict['fields']] + + # Define clone_args + if args.command == 'bygroup': + args_dict['group_args'] = {'fields': args_dict['fields'], + 'action': args_dict['action'], + 'mode':args_dict['mode']} + args_dict['clone_args'] = {'model': args_dict['model'], + 'distance': args_dict['distance'], + 'norm': args_dict['norm'], + 'sym': args_dict['sym'], + 'linkage': args_dict['linkage'], + 'seq_field': args_dict['seq_field']} + + # TODO: can be cleaned up with abstract model class + args_dict['clone_args']['dist_mat'] = getModelMatrix(args_dict['model']) + + del args_dict['fields'] + del args_dict['action'] + del args_dict['mode'] + del args_dict['model'] + del args_dict['distance'] + del args_dict['norm'] + del args_dict['sym'] + del args_dict['linkage'] + del args_dict['seq_field'] + + # Define clone_args + if args.command == 'hclust': + dist_funcs = {'chen2010':distChen2010, 'ademokun2011':distAdemokun2011} + args_dict['clone_func'] = dist_funcs[args_dict['method']] + args_dict['cluster_args'] = {'method': args_dict['method']} + #del args_dict['fields'] + del args_dict['method'] + + # Call defineClones + del args_dict['command'] + del args_dict['db_files'] + for f in args.__dict__['db_files']: + args_dict['db_file'] = f + defineClones(**args_dict) \ No newline at end of file diff -r beaa487ecf43 -r 5ffd52fc35c4 change_o/MakeDb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/MakeDb.py Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,1025 @@ +#!/usr/bin/env python3 +""" +Create tab-delimited database file to store sequence alignment information +""" +# Info +__author__ = 'Namita Gupta, Jason Anthony Vander Heiden' +from changeo import __version__, __date__ + +# Imports +import csv +import os +import re +import sys +import pandas as pd +import tarfile +import zipfile +from argparse import ArgumentParser +from collections import OrderedDict +from itertools import groupby +from shutil import rmtree +from tempfile import mkdtemp +from textwrap import dedent +from time import time +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.Alphabet import IUPAC + +# Presto and changeo imports +from presto.Defaults import default_out_args +from presto.Annotation import parseAnnotation +from presto.IO import countSeqFile, printLog, printProgress +from changeo.Commandline import CommonHelpFormatter, getCommonArgParser, parseCommonArgs +from changeo.IO import getDbWriter, countDbFile, getRepo +from changeo.Receptor import IgRecord, parseAllele, v_allele_regex, d_allele_regex, \ + j_allele_regex + +# Default parameters +default_delimiter = ('\t', ',', '-') + + +def gapV(ig_dict, repo_dict): + """ + Insert gaps into V region and update alignment information + + Arguments: + ig_dict : Dictionary of parsed IgBlast output + repo_dict : Dictionary of IMGT gapped germline sequences + + Returns: + dict : Updated with SEQUENCE_IMGT, V_GERM_START_IMGT, and V_GERM_LENGTH_IMGT fields + """ + + seq_imgt = '.' * (int(ig_dict['V_GERM_START_VDJ'])-1) + ig_dict['SEQUENCE_VDJ'] + + # Find gapped germline V segment + vgene = parseAllele(ig_dict['V_CALL'], v_allele_regex, 'first') + vkey = (vgene, ) + #TODO: Figure out else case + if vkey in repo_dict: + vgap = repo_dict[vkey] + # Iterate over gaps in the germline segment + gaps = re.finditer(r'\.', vgap) + gapcount = int(ig_dict['V_GERM_START_VDJ'])-1 + for gap in gaps: + i = gap.start() + # Break if gap begins after V region + if i >= ig_dict['V_GERM_LENGTH_VDJ'] + gapcount: + break + # Insert gap into IMGT sequence + seq_imgt = seq_imgt[:i] + '.' + seq_imgt[i:] + # Update gap counter + gapcount += 1 + ig_dict['SEQUENCE_IMGT'] = seq_imgt + # Update IMGT positioning information for V + ig_dict['V_GERM_START_IMGT'] = 1 + ig_dict['V_GERM_LENGTH_IMGT'] = ig_dict['V_GERM_LENGTH_VDJ'] + gapcount + + return ig_dict + + +def getIMGTJunc(ig_dict, repo_dict): + """ + Identify junction region by IMGT definition + + Arguments: + ig_dict : Dictionary of parsed IgBlast output + repo_dict : Dictionary of IMGT gapped germline sequences + + Returns: + dict : Updated with JUNCTION_LENGTH_IMGT and JUNCTION_IMGT fields + """ + # Find germline J segment + jgene = parseAllele(ig_dict['J_CALL'], j_allele_regex, 'first') + jkey = (jgene, ) + #TODO: Figure out else case + if jkey in repo_dict: + # Get germline J sequence + jgerm = repo_dict[jkey] + jgerm = jgerm[:ig_dict['J_GERM_START']+ig_dict['J_GERM_LENGTH']-1] + # Look for (F|W)GXG aa motif in nt sequence + motif = re.search(r'T(TT|TC|GG)GG[ACGT]{4}GG[AGCT]', jgerm) + aa_end = len(ig_dict['SEQUENCE_IMGT']) + #TODO: Figure out else case + if motif: + # print('\n', motif.group()) + aa_end = motif.start() - len(jgerm) + 3 + # Add fields to dict + ig_dict['JUNCTION'] = ig_dict['SEQUENCE_IMGT'][309:aa_end] + ig_dict['JUNCTION_LENGTH'] = len(ig_dict['JUNCTION']) + + return ig_dict + + +def getRegions(ig_dict): + """ + Identify FWR and CDR regions by IMGT definition + + Arguments: + ig_dict : Dictionary of parsed alignment output + + Returns: + dict : Updated with FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, FWR4_IMGT, + CDR1_IMGT, CDR2_IMGT, and CDR3_IMGT fields + """ + try: + seq_len = len(ig_dict['SEQUENCE_IMGT']) + ig_dict['FWR1_IMGT'] = ig_dict['SEQUENCE_IMGT'][0:min(78,seq_len)] + except (KeyError, IndexError): + return ig_dict + + try: ig_dict['CDR1_IMGT'] = ig_dict['SEQUENCE_IMGT'][78:min(114, seq_len)] + except (IndexError): return ig_dict + + try: ig_dict['FWR2_IMGT'] = ig_dict['SEQUENCE_IMGT'][114:min(165, seq_len)] + except (IndexError): return ig_dict + + try: ig_dict['CDR2_IMGT'] = ig_dict['SEQUENCE_IMGT'][165:min(195, seq_len)] + except (IndexError): return ig_dict + + try: ig_dict['FWR3_IMGT'] = ig_dict['SEQUENCE_IMGT'][195:min(312, seq_len)] + except (IndexError): return ig_dict + + try: + cdr3_end = 306 + ig_dict['JUNCTION_LENGTH'] + ig_dict['CDR3_IMGT'] = ig_dict['SEQUENCE_IMGT'][312:cdr3_end] + ig_dict['FWR4_IMGT'] = ig_dict['SEQUENCE_IMGT'][cdr3_end:] + except (KeyError, IndexError): + return ig_dict + + return ig_dict + + +def getSeqforIgBlast(seq_file): + """ + Fetch input sequences for IgBlast queries + + Arguments: + seq_file = a fasta file of sequences input to IgBlast + + Returns: + a dictionary of {ID:Seq} + """ + + seq_dict = SeqIO.index(seq_file, "fasta", IUPAC.ambiguous_dna) + + # Create a seq_dict ID translation using IDs truncate up to space or 50 chars + seqs = {} + for seq in seq_dict.values(): + seqs.update({seq.description:str(seq.seq)}) + + return seqs + + +def findLine(handle, query): + """ + Finds line with query string in file + + Arguments: + handle = file handle in which to search for line + query = query string for which to search in file + + Returns: + line from handle in which query string was found + """ + for line in handle: + if(re.match(query, line)): + return line + + +def extractIMGT(imgt_output): + """ + Extract necessary files from IMGT results, zipped or unzipped + + Arguments: + imgt_output = zipped file or unzipped folder output by IMGT + + Returns: + sorted list of filenames from which information will be read + """ + #file_ext = os.path.splitext(imgt_output)[1].lower() + imgt_flags = ('1_Summary', '2_IMGT-gapped', '3_Nt-sequences', '6_Junction') + temp_dir = mkdtemp() + if zipfile.is_zipfile(imgt_output): + # Open zip file + imgt_zip = zipfile.ZipFile(imgt_output, 'r') + # Extract required files + imgt_files = sorted([n for n in imgt_zip.namelist() \ + if os.path.basename(n).startswith(imgt_flags)]) + imgt_zip.extractall(temp_dir, imgt_files) + # Define file list + imgt_files = [os.path.join(temp_dir, f) for f in imgt_files] + elif os.path.isdir(imgt_output): + # Find required files in folder + folder_files = [] + for root, dirs, files in os.walk(imgt_output): + folder_files.extend([os.path.join(os.path.abspath(root), f) for f in files]) + # Define file list + imgt_files = sorted([n for n in folder_files \ + if os.path.basename(n).startswith(imgt_flags)]) + elif tarfile.is_tarfile(imgt_output): + # Open zip file + imgt_tar = tarfile.open(imgt_output, 'r') + # Extract required files + imgt_files = sorted([n for n in imgt_tar.getnames() \ + if os.path.basename(n).startswith(imgt_flags)]) + imgt_tar.extractall(temp_dir, [imgt_tar.getmember(n) for n in imgt_files]) + # Define file list + imgt_files = [os.path.join(temp_dir, f) for f in imgt_files] + else: + sys.exit('ERROR: Unsupported IGMT output file. Must be either a zipped file (.zip), LZMA compressed tarfile (.txz) or a folder.') + + if len(imgt_files) > len(imgt_flags): # e.g. multiple 1_Summary files + sys.exit('ERROR: Wrong files in IMGT output %s.' % imgt_output) + elif len(imgt_files) < len(imgt_flags): + sys.exit('ERROR: Missing necessary file IMGT output %s.' % imgt_output) + + return temp_dir, imgt_files + + +# TODO: return a dictionary with keys determined by the comment strings in the blocks, thus avoiding problems with missing blocks +def readOneIgBlastResult(block): + """ + Parse a single IgBLAST query result + + Arguments: + block = itertools groupby object of single result + + Returns: + None if no results, otherwise list of DataFrames for each result block + """ + results = list() + i = 0 + for match, subblock in groupby(block, lambda l: l=='\n'): + if not match: + # Strip whitespace and comments + sub = [s.strip() for s in subblock if not s.startswith('#')] + + # Continue on empty block + if not sub: continue + else: i += 1 + + # Split by tabs + sub = [s.split('\t') for s in sub] + + # Append list for "V-(D)-J rearrangement summary" (i == 1) + # And "V-(D)-J junction details" (i == 2) + # Otherwise append DataFrame of subblock + if i == 1 or i == 2: + results.append(sub[0]) + else: + df = pd.DataFrame(sub) + if not df.empty: results.append(df) + + return results if results else None + + +# TODO: needs more speeds. pandas is probably to blame. +def readIgBlast(igblast_output, seq_dict, repo_dict, + score_fields=False, region_fields=False): + """ + Reads IgBlast output + + Arguments: + igblast_output = IgBlast output file (format 7) + seq_dict = a dictionary of {ID:Seq} from input fasta file + repo_dict = dictionary of IMGT gapped germline sequences + score_fields = if True parse alignment scores + region_fields = if True add FWR and CDR region fields + + Returns: + a generator of dictionaries containing alignment data + """ + + # Open IgBlast output file + with open(igblast_output) as f: + # Iterate over individual results (separated by # IGBLASTN) + for k1, block in groupby(f, lambda x: re.match('# IGBLASTN', x)): + block = list(block) + if not k1: + # TODO: move query name extraction into block parser readOneIgBlastResult(). + # Extract sequence ID + query_name = ' '.join(block[0].strip().split(' ')[2:]) + # Initialize db_gen to have ID and input sequence + db_gen = {'SEQUENCE_ID': query_name, + 'SEQUENCE_INPUT': seq_dict[query_name]} + + # Parse further sub-blocks + block_list = readOneIgBlastResult(block) + + # TODO: this is indented pretty far. should be a separate function. or several functions. + # If results exist, parse further to obtain full db_gen + if block_list is not None: + # Parse quality information + db_gen['STOP'] = 'T' if block_list[0][-4] == 'Yes' else 'F' + db_gen['IN_FRAME'] = 'T' if block_list[0][-3] == 'In-frame' else 'F' + db_gen['FUNCTIONAL'] = 'T' if block_list[0][-2] == 'Yes' else 'F' + if block_list[0][-1] == '-': + db_gen['SEQUENCE_INPUT'] = str(Seq(db_gen['SEQUENCE_INPUT'], + IUPAC.ambiguous_dna).reverse_complement()) + + # Parse V, D, and J calls + call_str = ' '.join(block_list[0]) + v_call = parseAllele(call_str, v_allele_regex, action='list') + d_call = parseAllele(call_str, d_allele_regex, action='list') + j_call = parseAllele(call_str, j_allele_regex, action='list') + db_gen['V_CALL'] = ','.join(v_call) if v_call is not None else 'None' + db_gen['D_CALL'] = ','.join(d_call) if d_call is not None else 'None' + db_gen['J_CALL'] = ','.join(j_call) if j_call is not None else 'None' + + # Parse junction sequence + # db_gen['JUNCTION_VDJ'] = re.sub('(N/A)|\[|\(|\)|\]', '', ''.join(block_list[1])) + # db_gen['JUNCTION_LENGTH_VDJ'] = len(db_gen['JUNCTION_VDJ']) + + # TODO: IgBLAST does a stupid and doesn't output block #3 sometimes. why? + # TODO: maybe we should fail these. they look craptastic. + #pd.set_option('display.width', 500) + #print query_name, len(block_list), hit_idx + #for i, x in enumerate(block_list): + # print '[%i]' % i + # print x + + # Parse segment start and stop positions + hit_df = block_list[-1] + + # Alignment info block + # 0: segment + # 1: query id + # 2: subject id + # 3: % identity + # 4: alignment length + # 5: mismatches + # 6: gap opens + # 7: gaps + # 8: q. start + # 9: q. end + # 10: s. start + # 11: s. end + # 12: evalue + # 13: bit score + # 14: query seq + # 15: subject seq + # 16: btop + + # If V call exists, parse V alignment information + seq_vdj = '' + if v_call is not None: + v_align = hit_df[hit_df[0] == 'V'].iloc[0] + # Germline positions + db_gen['V_GERM_START_VDJ'] = int(v_align[10]) + db_gen['V_GERM_LENGTH_VDJ'] = int(v_align[11]) - db_gen['V_GERM_START_VDJ'] + 1 + # Query sequence positions + db_gen['V_SEQ_START'] = int(v_align[8]) + db_gen['V_SEQ_LENGTH'] = int(v_align[9]) - db_gen['V_SEQ_START'] + 1 + + if int(v_align[6]) == 0: + db_gen['INDELS'] = 'F' + else: + db_gen['INDELS'] = 'T' + # Set functional to none so record gets tossed (junction will be wrong) + # db_gen['FUNCTIONAL'] = None + + # V alignment scores + if score_fields: + try: db_gen['V_SCORE'] = float(v_align[13]) + except (TypeError, ValueError): db_gen['V_SCORE'] = 'None' + + try: db_gen['V_IDENTITY'] = float(v_align[3]) / 100.0 + except (TypeError, ValueError): db_gen['V_IDENTITY'] = 'None' + + try: db_gen['V_EVALUE'] = float(v_align[12]) + except (TypeError, ValueError): db_gen['V_EVALUE'] = 'None' + + try: db_gen['V_BTOP'] = v_align[16] + except (TypeError, ValueError): db_gen['V_BTOP'] = 'None' + + # Update VDJ sequence, removing insertions + start = 0 + for m in re.finditer(r'-', v_align[15]): + ins = m.start() + seq_vdj += v_align[14][start:ins] + start = ins + 1 + seq_vdj += v_align[14][start:] + + # TODO: needs to check that the V results are present before trying to determine N1_LENGTH from them. + # If D call exists, parse D alignment information + if d_call is not None: + d_align = hit_df[hit_df[0] == 'D'].iloc[0] + + # TODO: this is kinda gross. not sure how else to fix the alignment overlap problem though. + # Determine N-region length and amount of J overlap with V or D alignment + overlap = 0 + if v_call is not None: + n1_len = int(d_align[8]) - (db_gen['V_SEQ_START'] + db_gen['V_SEQ_LENGTH']) + if n1_len < 0: + db_gen['N1_LENGTH'] = 0 + overlap = abs(n1_len) + else: + db_gen['N1_LENGTH'] = n1_len + n1_start = (db_gen['V_SEQ_START'] + db_gen['V_SEQ_LENGTH']-1) + n1_end = int(d_align[8])-1 + seq_vdj += db_gen['SEQUENCE_INPUT'][n1_start:n1_end] + + # Query sequence positions + db_gen['D_SEQ_START'] = int(d_align[8]) + overlap + db_gen['D_SEQ_LENGTH'] = max(int(d_align[9]) - db_gen['D_SEQ_START'] + 1, 0) + + # Germline positions + db_gen['D_GERM_START'] = int(d_align[10]) + overlap + db_gen['D_GERM_LENGTH'] = max(int(d_align[11]) - db_gen['D_GERM_START'] + 1, 0) + + # Update VDJ sequence, removing insertions + start = overlap + for m in re.finditer(r'-', d_align[15]): + ins = m.start() + seq_vdj += d_align[14][start:ins] + start = ins + 1 + seq_vdj += d_align[14][start:] + + # TODO: needs to check that the V results are present before trying to determine N1_LENGTH from them. + # If J call exists, parse J alignment information + if j_call is not None: + j_align = hit_df[hit_df[0] == 'J'].iloc[0] + + # TODO: this is kinda gross. not sure how else to fix the alignment overlap problem though. + # Determine N-region length and amount of J overlap with V or D alignment + overlap = 0 + if d_call is not None: + n2_len = int(j_align[8]) - (db_gen['D_SEQ_START'] + db_gen['D_SEQ_LENGTH']) + if n2_len < 0: + db_gen['N2_LENGTH'] = 0 + overlap = abs(n2_len) + else: + db_gen['N2_LENGTH'] = n2_len + n2_start = (db_gen['D_SEQ_START']+db_gen['D_SEQ_LENGTH']-1) + n2_end = int(j_align[8])-1 + seq_vdj += db_gen['SEQUENCE_INPUT'][n2_start:n2_end] + elif v_call is not None: + n1_len = int(j_align[8]) - (db_gen['V_SEQ_START'] + db_gen['V_SEQ_LENGTH']) + if n1_len < 0: + db_gen['N1_LENGTH'] = 0 + overlap = abs(n1_len) + else: + db_gen['N1_LENGTH'] = n1_len + n1_start = (db_gen['V_SEQ_START']+db_gen['V_SEQ_LENGTH']-1) + n1_end = int(j_align[8])-1 + seq_vdj += db_gen['SEQUENCE_INPUT'][n1_start:n1_end] + else: + db_gen['N1_LENGTH'] = 0 + + # Query positions + db_gen['J_SEQ_START'] = int(j_align[8]) + overlap + db_gen['J_SEQ_LENGTH'] = max(int(j_align[9]) - db_gen['J_SEQ_START'] + 1, 0) + + # Germline positions + db_gen['J_GERM_START'] = int(j_align[10]) + overlap + db_gen['J_GERM_LENGTH'] = max(int(j_align[11]) - db_gen['J_GERM_START'] + 1, 0) + + # J alignment scores + if score_fields: + try: db_gen['J_SCORE'] = float(j_align[13]) + except (TypeError, ValueError): db_gen['J_SCORE'] = 'None' + + try: db_gen['J_IDENTITY'] = float(j_align[3]) / 100.0 + except (TypeError, ValueError): db_gen['J_IDENTITY'] = 'None' + + try: db_gen['J_EVALUE'] = float(j_align[12]) + except (TypeError, ValueError): db_gen['J_EVALUE'] = 'None' + + try: db_gen['J_BTOP'] = j_align[16] + except (TypeError, ValueError): db_gen['J_BTOP'] = 'None' + + # Update VDJ sequence, removing insertions + start = overlap + for m in re.finditer(r'-', j_align[15]): + ins = m.start() + seq_vdj += j_align[14][start:ins] + start = ins + 1 + seq_vdj += j_align[14][start:] + + db_gen['SEQUENCE_VDJ'] = seq_vdj + + # Create IMGT-gapped sequence and infer IMGT junction + if v_call is not None: + db_gen = gapV(db_gen, repo_dict) + if j_call is not None: + db_gen = getIMGTJunc(db_gen, repo_dict) + + # FWR and CDR regions + if region_fields: getRegions(db_gen) + + yield IgRecord(db_gen) + + +# TODO: should be more readable +def readIMGT(imgt_files, score_fields=False, region_fields=False): + """ + Reads IMGT/HighV-Quest output + + Arguments: + imgt_files = IMGT/HighV-Quest output files 1, 2, 3, and 6 + score_fields = if True parse alignment scores + region_fields = if True add FWR and CDR region fields + + Returns: + a generator of dictionaries containing alignment data + """ + imgt_iters = [csv.DictReader(open(f, 'rU'), delimiter='\t') for f in imgt_files] + # Create a dictionary for each sequence alignment and yield its generator + for sm, gp, nt, jn in zip(*imgt_iters): + if len(set([sm['Sequence ID'], + gp['Sequence ID'], + nt['Sequence ID'], + jn['Sequence ID']])) != 1: + sys.exit('Error: IMGT files are corrupt starting with Summary file record %s' \ + % sm['Sequence ID']) + + db_gen = {'SEQUENCE_ID': sm['Sequence ID'], + 'SEQUENCE_INPUT': sm['Sequence']} + + if 'No results' not in sm['Functionality']: + db_gen['FUNCTIONAL'] = ['?','T','F'][('productive' in sm['Functionality']) + + ('unprod' in sm['Functionality'])] + db_gen['IN_FRAME'] = ['?','T','F'][('in-frame' in sm['JUNCTION frame']) + + ('out-of-frame' in sm['JUNCTION frame'])], + db_gen['STOP'] = ['F','?','T'][('stop codon' in sm['Functionality comment']) + + ('unprod' in sm['Functionality'])] + db_gen['MUTATED_INVARIANT'] = ['F','?','T'][(any(('missing' in sm['Functionality comment'], + 'missing' in sm['V-REGION potential ins/del']))) + + ('unprod' in sm['Functionality'])] + db_gen['INDELS'] = ['F','T'][any((sm['V-REGION potential ins/del'], + sm['V-REGION insertions'], + sm['V-REGION deletions']))] + + db_gen['SEQUENCE_VDJ'] = nt['V-D-J-REGION'] if nt['V-D-J-REGION'] else nt['V-J-REGION'] + db_gen['SEQUENCE_IMGT'] = gp['V-D-J-REGION'] if gp['V-D-J-REGION'] else gp['V-J-REGION'] + + db_gen['V_CALL'] = re.sub('\sor\s', ',', re.sub(',', '', gp['V-GENE and allele'])) + db_gen['D_CALL'] = re.sub('\sor\s', ',', re.sub(',', '', gp['D-GENE and allele'])) + db_gen['J_CALL'] = re.sub('\sor\s', ',', re.sub(',', '', gp['J-GENE and allele'])) + + v_seq_length = len(nt['V-REGION']) if nt['V-REGION'] else 0 + db_gen['V_SEQ_START'] = nt['V-REGION start'] + db_gen['V_SEQ_LENGTH'] = v_seq_length + db_gen['V_GERM_START_IMGT'] = 1 + db_gen['V_GERM_LENGTH_IMGT'] = len(gp['V-REGION']) if gp['V-REGION'] else 0 + + db_gen['N1_LENGTH'] = sum(int(i) for i in [jn["P3'V-nt nb"], + jn['N-REGION-nt nb'], + jn['N1-REGION-nt nb'], + jn["P5'D-nt nb"]] if i) + db_gen['D_SEQ_START'] = sum(int(i) for i in [1, v_seq_length, + jn["P3'V-nt nb"], + jn['N-REGION-nt nb'], + jn['N1-REGION-nt nb'], + jn["P5'D-nt nb"]] if i) + db_gen['D_SEQ_LENGTH'] = int(jn["D-REGION-nt nb"] or 0) + db_gen['D_GERM_START'] = int(jn["5'D-REGION trimmed-nt nb"] or 0) + 1 + db_gen['D_GERM_LENGTH'] = int(jn["D-REGION-nt nb"] or 0) + db_gen['N2_LENGTH'] = sum(int(i) for i in [jn["P3'D-nt nb"], + jn['N2-REGION-nt nb'], + jn["P5'J-nt nb"]] if i) + + db_gen['J_SEQ_START_IMGT'] = sum(int(i) for i in [1, v_seq_length, + jn["P3'V-nt nb"], + jn['N-REGION-nt nb'], + jn['N1-REGION-nt nb'], + jn["P5'D-nt nb"], + jn["D-REGION-nt nb"], + jn["P3'D-nt nb"], + jn['N2-REGION-nt nb'], + jn["P5'J-nt nb"]] if i) + db_gen['J_SEQ_LENGTH'] = len(nt['J-REGION']) if nt['J-REGION'] else 0 + db_gen['J_GERM_START'] = int(jn["5'J-REGION trimmed-nt nb"] or 0) + 1 + db_gen['J_GERM_LENGTH'] = len(gp['J-REGION']) if gp['J-REGION'] else 0 + + db_gen['JUNCTION_LENGTH'] = len(jn['JUNCTION']) if jn['JUNCTION'] else 0 + db_gen['JUNCTION'] = jn['JUNCTION'] + + # Alignment scores + if score_fields: + try: db_gen['V_SCORE'] = float(sm['V-REGION score']) + except (TypeError, ValueError): db_gen['V_SCORE'] = 'None' + + try: db_gen['V_IDENTITY'] = float(sm['V-REGION identity %']) / 100.0 + except (TypeError, ValueError): db_gen['V_IDENTITY'] = 'None' + + try: db_gen['J_SCORE'] = float(sm['J-REGION score']) + except (TypeError, ValueError): db_gen['J_SCORE'] = 'None' + + try: db_gen['J_IDENTITY'] = float(sm['J-REGION identity %']) / 100.0 + except (TypeError, ValueError): db_gen['J_IDENTITY'] = 'None' + + # FWR and CDR regions + if region_fields: getRegions(db_gen) + else: + db_gen['V_CALL'] = 'None' + db_gen['D_CALL'] = 'None' + db_gen['J_CALL'] = 'None' + + yield IgRecord(db_gen) + + +def getIDforIMGT(seq_file): + """ + Create a sequence ID translation using IMGT truncation + + Arguments: + seq_file = a fasta file of sequences input to IMGT + + Returns: + a dictionary of {truncated ID: full seq description} + """ + + # Create a seq_dict ID translation using IDs truncate up to space or 50 chars + ids = {} + for i, rec in enumerate(SeqIO.parse(seq_file, 'fasta', IUPAC.ambiguous_dna)): + if len(rec.description) <= 50: + id_key = rec.description + else: + id_key = re.sub('\||\s|!|&|\*|<|>|\?','_',rec.description[:50]) + ids.update({id_key:rec.description}) + + return ids + + +def writeDb(db_gen, file_prefix, total_count, id_dict={}, no_parse=True, + score_fields=False, region_fields=False, out_args=default_out_args): + """ + Writes tab-delimited database file in output directory + + Arguments: + db_gen = a generator of IgRecord objects containing alignment data + file_prefix = directory and prefix for CLIP tab-delim file + total_count = number of records (for progress bar) + id_dict = a dictionary of {IMGT ID: full seq description} + no_parse = if ID is to be parsed for pRESTO output with default delimiters + score_fields = if True add alignment score fields to output file + region_fields = if True add FWR and CDR region fields to output file + out_args = common output argument dictionary from parseCommonArgs + + Returns: + None + """ + pass_file = "%s_db-pass.tab" % file_prefix + fail_file = "%s_db-fail.tab" % file_prefix + ordered_fields = ['SEQUENCE_ID', + 'SEQUENCE_INPUT', + 'FUNCTIONAL', + 'IN_FRAME', + 'STOP', + 'MUTATED_INVARIANT', + 'INDELS', + 'V_CALL', + 'D_CALL', + 'J_CALL', + 'SEQUENCE_VDJ', + 'SEQUENCE_IMGT', + 'V_SEQ_START', + 'V_SEQ_LENGTH', + 'V_GERM_START_VDJ', + 'V_GERM_LENGTH_VDJ', + 'V_GERM_START_IMGT', + 'V_GERM_LENGTH_IMGT', + 'N1_LENGTH', + 'D_SEQ_START', + 'D_SEQ_LENGTH', + 'D_GERM_START', + 'D_GERM_LENGTH', + 'N2_LENGTH', + 'J_SEQ_START', + 'J_SEQ_LENGTH', + 'J_GERM_START', + 'J_GERM_LENGTH', + 'JUNCTION_LENGTH', + 'JUNCTION'] + + if score_fields: + ordered_fields.extend(['V_SCORE', + 'V_IDENTITY', + 'V_EVALUE', + 'V_BTOP', + 'J_SCORE', + 'J_IDENTITY', + 'J_EVALUE', + 'J_BTOP']) + + if region_fields: + ordered_fields.extend(['FWR1_IMGT', 'FWR2_IMGT', 'FWR3_IMGT', 'FWR4_IMGT', + 'CDR1_IMGT', 'CDR2_IMGT', 'CDR3_IMGT']) + + + # TODO: This is not the best approach. should pass in output fields. + # Initiate passed handle + pass_handle = None + + # Open failed file + if out_args['failed']: + fail_handle = open(fail_file, 'wt') + fail_writer = getDbWriter(fail_handle, add_fields=['SEQUENCE_ID', 'SEQUENCE_INPUT']) + else: + fail_handle = None + fail_writer = None + + # Initialize counters and file + pass_writer = None + start_time = time() + rec_count = pass_count = fail_count = 0 + for record in db_gen: + #printProgress(i + (total_count/2 if id_dict else 0), total_count, 0.05, start_time) + printProgress(rec_count, total_count, 0.05, start_time) + rec_count += 1 + + # Count pass or fail + if (record.v_call == 'None' and record.j_call == 'None') or \ + record.functional is None or \ + not record.seq_vdj or \ + not record.junction: + # print(record.v_call, record.j_call, record.functional, record.junction) + fail_count += 1 + if fail_writer is not None: fail_writer.writerow(record.toDict()) + continue + else: + pass_count += 1 + + # Build sample sequence description + if record.id in id_dict: + record.id = id_dict[record.id] + + # Parse sequence description into new columns + if not no_parse: + record.annotations = parseAnnotation(record.id, delimiter=out_args['delimiter']) + record.id = record.annotations['ID'] + del record.annotations['ID'] + + # TODO: This is not the best approach. should pass in output fields. + # If first sequence, use parsed description to create new columns and initialize writer + if pass_writer is None: + if not no_parse: ordered_fields.extend(list(record.annotations.keys())) + pass_handle = open(pass_file, 'wt') + pass_writer = getDbWriter(pass_handle, add_fields=ordered_fields) + + # Write row to tab-delim CLIP file + pass_writer.writerow(record.toDict()) + + # Print log + #printProgress(i+1 + (total_count/2 if id_dict else 0), total_count, 0.05, start_time) + printProgress(rec_count, total_count, 0.05, start_time) + + log = OrderedDict() + log['OUTPUT'] = pass_file + log['PASS'] = pass_count + log['FAIL'] = fail_count + log['END'] = 'MakeDb' + printLog(log) + + if pass_handle is not None: pass_handle.close() + if fail_handle is not None: fail_handle.close() + + +# TODO: may be able to merge with parseIMGT +def parseIgBlast(igblast_output, seq_file, repo, no_parse=True, score_fields=False, + region_fields=False, out_args=default_out_args): + """ + Main for IgBlast aligned sample sequences + + Arguments: + igblast_output = IgBlast output file to process + seq_file = fasta file input to IgBlast (from which to get sequence) + repo = folder with germline repertoire files + no_parse = if ID is to be parsed for pRESTO output with default delimiters + score_fields = if True add alignment score fields to output file + region_fields = if True add FWR and CDR region fields to output file + out_args = common output argument dictionary from parseCommonArgs + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'MakeDB' + log['ALIGNER'] = 'IgBlast' + log['ALIGN_RESULTS'] = os.path.basename(igblast_output) + log['SEQ_FILE'] = os.path.basename(seq_file) + log['NO_PARSE'] = no_parse + log['SCORE_FIELDS'] = score_fields + log['REGION_FIELDS'] = region_fields + printLog(log) + + # Get input sequence dictionary + seq_dict = getSeqforIgBlast(seq_file) + + # Formalize out_dir and file-prefix + if not out_args['out_dir']: + out_dir = os.path.split(igblast_output)[0] + else: + out_dir = os.path.abspath(out_args['out_dir']) + if not os.path.exists(out_dir): os.mkdir(out_dir) + if out_args['out_name']: + file_prefix = out_args['out_name'] + else: + file_prefix = os.path.basename(os.path.splitext(igblast_output)[0]) + file_prefix = os.path.join(out_dir, file_prefix) + + total_count = countSeqFile(seq_file) + + # Create + repo_dict = getRepo(repo) + igblast_dict = readIgBlast(igblast_output, seq_dict, repo_dict, + score_fields=score_fields, region_fields=region_fields) + writeDb(igblast_dict, file_prefix, total_count, no_parse=no_parse, + score_fields=score_fields, region_fields=region_fields, out_args=out_args) + + +# TODO: may be able to merge with parseIgBlast +def parseIMGT(imgt_output, seq_file=None, no_parse=True, score_fields=False, + region_fields=False, out_args=default_out_args): + """ + Main for IMGT aligned sample sequences + + Arguments: + imgt_output = zipped file or unzipped folder output by IMGT + seq_file = FASTA file input to IMGT (from which to get seqID) + no_parse = if ID is to be parsed for pRESTO output with default delimiters + score_fields = if True add alignment score fields to output file + region_fields = if True add FWR and CDR region fields to output file + out_args = common output argument dictionary from parseCommonArgs + + Returns: + None + """ + # Print parameter info + log = OrderedDict() + log['START'] = 'MakeDb' + log['ALIGNER'] = 'IMGT' + log['ALIGN_RESULTS'] = imgt_output + log['SEQ_FILE'] = os.path.basename(seq_file) if seq_file else '' + log['NO_PARSE'] = no_parse + log['SCORE_FIELDS'] = score_fields + log['REGION_FIELDS'] = region_fields + printLog(log) + + # Get individual IMGT result files + temp_dir, imgt_files = extractIMGT(imgt_output) + + # Formalize out_dir and file-prefix + if not out_args['out_dir']: + out_dir = os.path.dirname(os.path.abspath(imgt_output)) + else: + out_dir = os.path.abspath(out_args['out_dir']) + if not os.path.exists(out_dir): os.mkdir(out_dir) + if out_args['out_name']: + file_prefix = out_args['out_name'] + else: + file_prefix = os.path.splitext(os.path.split(os.path.abspath(imgt_output))[1])[0] + file_prefix = os.path.join(out_dir, file_prefix) + + total_count = countDbFile(imgt_files[0]) + + # Get (parsed) IDs from fasta file submitted to IMGT + id_dict = getIDforIMGT(seq_file) if seq_file else {} + + # Create + imgt_dict = readIMGT(imgt_files, score_fields=score_fields, + region_fields=region_fields) + writeDb(imgt_dict, file_prefix, total_count, id_dict=id_dict, no_parse=no_parse, + score_fields=score_fields, region_fields=region_fields, out_args=out_args) + + # Delete temp directory + rmtree(temp_dir) + + +def getArgParser(): + """ + Defines the ArgumentParser + + Arguments: + None + + Returns: + an ArgumentParser object + """ + fields = dedent( + ''' + output files: + db-pass + database of parsed alignment records. + db-fail + database with records failing alignment. + + output fields: + SEQUENCE_ID, SEQUENCE_INPUT, FUNCTIONAL, IN_FRAME, STOP, MUTATED_INVARIANT, + INDELS, V_CALL, D_CALL, J_CALL, SEQUENCE_VDJ and/or SEQUENCE_IMGT, + V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_VDJ and/or V_GERM_START_IMGT, + V_GERM_LENGTH_VDJ and/or V_GERM_LENGTH_IMGT, N1_LENGTH, + D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH, N2_LENGTH, + J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH, + JUNCTION_LENGTH, JUNCTION, V_SCORE, V_IDENTITY, V_EVALUE, V_BTOP, + J_SCORE, J_IDENTITY, J_EVALUE, J_BTOP, FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, + FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, CDR3_IMGT + ''') + + # Define ArgumentParser + parser = ArgumentParser(description=__doc__, epilog=fields, + formatter_class=CommonHelpFormatter) + parser.add_argument('--version', action='version', + version='%(prog)s:' + ' %s-%s' %(__version__, __date__)) + subparsers = parser.add_subparsers(title='subcommands', dest='command', + help='Aligner used', metavar='') + # TODO: This is a temporary fix for Python issue 9253 + subparsers.required = True + + # Parent parser + parser_parent = getCommonArgParser(seq_in=False, seq_out=False, log=False) + + # IgBlast Aligner + parser_igblast = subparsers.add_parser('igblast', help='Process IgBlast output', + parents=[parser_parent], + formatter_class=CommonHelpFormatter) + parser_igblast.set_defaults(func=parseIgBlast) + parser_igblast.add_argument('-i', nargs='+', action='store', dest='aligner_files', + required=True, + help='''IgBLAST output files in format 7 with query sequence + (IgBLAST argument \'-outfmt "7 std qseq sseq btop"\').''') + parser_igblast.add_argument('-r', nargs='+', action='store', dest='repo', required=True, + help='''List of folders and/or fasta files containing + IMGT-gapped germline sequences corresponding to the + set of germlines used in the IgBLAST alignment.''') + parser_igblast.add_argument('-s', action='store', nargs='+', dest='seq_files', + required=True, + help='List of input FASTA files containing sequences') + parser_igblast.add_argument('--noparse', action='store_true', dest='no_parse', + help='''Specify if input IDs should not be parsed to add + new columns to database.''') + parser_igblast.add_argument('--scores', action='store_true', dest='score_fields', + help='''Specify if alignment score metrics should be + included in the output. Adds the V_SCORE, V_IDENTITY, + V_EVALUE, V_BTOP, J_SCORE, J_IDENTITY, + J_BTOP, and J_EVALUE columns.''') + parser_igblast.add_argument('--regions', action='store_true', dest='region_fields', + help='''Specify if IMGT framework and CDR regions should be + included in the output. Adds the FWR1_IMGT, FWR2_IMGT, + FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and + CDR3_IMGT columns.''') + + # IMGT aligner + parser_imgt = subparsers.add_parser('imgt', help='Process IMGT/HighV-Quest output', + parents=[parser_parent], + formatter_class=CommonHelpFormatter) + imgt_arg_group = parser_imgt.add_mutually_exclusive_group(required=True) + imgt_arg_group.add_argument('-i', nargs='+', action='store', dest='aligner_files', + help='''Either zipped IMGT output files (.zip) or a folder + containing unzipped IMGT output files (which must + include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences, + and 6_Junction).''') + parser_imgt.add_argument('-s', nargs='*', action='store', dest='seq_files', + required=False, + help='List of input FASTA files containing sequences') + parser_imgt.add_argument('--noparse', action='store_true', dest='no_parse', + help='''Specify if input IDs should not be parsed to add new + columns to database.''') + parser_imgt.add_argument('--scores', action='store_true', dest='score_fields', + help='''Specify if alignment score metrics should be + included in the output. Adds the V_SCORE, V_IDENTITY, + J_SCORE and J_IDENTITY. Note, this will also add + the columns V_EVALUE, V_BTOP, J_EVALUE and J_BTOP, + but they will be empty for IMGT output.''') + parser_imgt.add_argument('--regions', action='store_true', dest='region_fields', + help='''Specify if IMGT framework and CDR regions should be + included in the output. Adds the FWR1_IMGT, FWR2_IMGT, + FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and + CDR3_IMGT columns.''') + parser_imgt.set_defaults(func=parseIMGT) + + return parser + + +if __name__ == "__main__": + """ + Parses command line arguments and calls main + """ + parser = getArgParser() + args = parser.parse_args() + args_dict = parseCommonArgs(args, in_arg='aligner_files') + + # Set no ID parsing if sequence files are not provided + if 'seq_files' in args_dict and not args_dict['seq_files']: + args_dict['no_parse'] = True + + # Delete + if 'seq_files' in args_dict: del args_dict['seq_files'] + if 'aligner_files' in args_dict: del args_dict['aligner_files'] + if 'command' in args_dict: del args_dict['command'] + if 'func' in args_dict: del args_dict['func'] + + if args.command == 'imgt': + for i in range(len(args.__dict__['aligner_files'])): + args_dict['imgt_output'] = args.__dict__['aligner_files'][i] + args_dict['seq_file'] = args.__dict__['seq_files'][i] \ + if args.__dict__['seq_files'] else None + args.func(**args_dict) + elif args.command == 'igblast': + for i in range(len(args.__dict__['aligner_files'])): + args_dict['igblast_output'] = args.__dict__['aligner_files'][i] + args_dict['seq_file'] = args.__dict__['seq_files'][i] + args.func(**args_dict) diff -r beaa487ecf43 -r 5ffd52fc35c4 change_o/define_clones.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/define_clones.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,15 @@ +args <- commandArgs(trailingOnly = TRUE) + +input=args[1] +output=args[2] + +change.o = read.table(input, header=T, sep="\t", quote="", stringsAsFactors=F) + +freq = data.frame(table(change.o$CLONE)) +freq2 = data.frame(table(freq$Freq)) + +freq2$final = as.numeric(freq2$Freq) * as.numeric(as.character(freq2$Var1)) + +names(freq2) = c("Clone size", "Nr of clones", "Nr of sequences") + +write.table(x=freq2, file=output, sep="\t",quote=F,row.names=F,col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 change_o/define_clones.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/define_clones.sh Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,43 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +#define_clones.sh $input $noparse $scores $regions $out_file + +type=$1 +input=$2 + +mkdir -p $PWD/outdir + +cp $input $PWD/input.tab #file has to have a ".tab" extension + +if [ "bygroup" == "$type" ] ; then + mode=$3 + act=$4 + model=$5 + norm=$6 + sym=$7 + link=$8 + dist=$9 + output=${10} + output2=${11} + + python3 $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + #/data/users/david/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + #/home/galaxy/anaconda3/bin/python $dir/DefineClones.py bygroup -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --mode $mode --act $act --model $model --dist $dist --norm $norm --sym $sym --link $link + + Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 +else + method=$3 + output=$4 + output2=$5 + + python3 $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + #/data/users/david/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + #/home/galaxy/anaconda3/bin/python $dir/DefineClones.py hclust -d $PWD/input.tab --nproc 4 --outdir $PWD/outdir --outname output --method $method + + Rscript $dir/define_clones.r $PWD/outdir/output_clone-pass.tab $output2 2>&1 +fi + +cp $PWD/outdir/output_clone-pass.tab $output + +rm -rf $PWD/outdir/ diff -r beaa487ecf43 -r 5ffd52fc35c4 change_o/makedb.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/change_o/makedb.sh Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,38 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +input=$1 +noparse=$2 +scores=$3 +regions=$4 +output=$5 + +if [ "true" == "$noparse" ] ; then + noparse="--noparse" +else + noparse="" +fi + +if [ "true" == "$scores" ] ; then + scores="--scores" +else + scores="" +fi + +if [ "true" == "$regions" ] ; then + regions="--regions" +else + regions="" +fi + +mkdir $PWD/outdir + +echo "makedb: $PWD/outdir" + +python3 $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions +#/data/users/david/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions +#/home/galaxy/anaconda3/bin/python $dir/MakeDb.py imgt -i $input --outdir $PWD/outdir --outname output $noparse $scores $regions + +mv $PWD/outdir/output_db-pass.tab $output + +rm -rf $PWD/outdir/ diff -r beaa487ecf43 -r 5ffd52fc35c4 complete.sh --- a/complete.sh Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -#!/bin/bash -set -e -inputFiles=($1) -outputDir=$3 -outputFile=$3/index.html #$1 -clonalType=$4 -species=$5 -locus=$6 -filterproductive=$7 -clonality_method=$8 - -html=$2 -dir="$(cd "$(dirname "$0")" && pwd)" -array=("$@") -echo "

Progress

" > $html -echo "" >> $html - -#mkdir $PWD/igblastdatabase -#unzip $dir/database.zip -d $PWD/igblastdatabase/ -#export IGDATA=$PWD/igblastdatabase/ - -id="" -forwardSlash="/" -mergerInput=() -echo "Before loop" -count=1 -for current in "${inputFiles[@]}" -do - if [[ "$current" != *"$forwardSlash"* ]]; then - id="$current" - mergerInput+=($id) - count=1 - continue - fi - echo "working on $current" - fileName=$(basename $current) - fileName="${fileName%.*}" - parsedFileName="$PWD/$fileName.parsed" - f=$(file $current) - zipType="Zip archive" - zxType="XZ compressed data" - if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"$zxType"* ]] - then - echo "" >> $html - fileName=$(basename $current) - bash ${dir}/imgt_loader/imgt_loader.sh $current $parsedFileName "${fileName}" - else - echo "" >> $html - bash ${dir}/igblast/igblast.sh $current $species $locus $parsedFileName - fi - mergerInput+=($parsedFileName) - count=$((count+1)) -done - -echo "" >> $html -echo "" >> $html - -bash $dir/experimental_design/experimental_design.sh ${mergerInput[*]} $PWD/merged.txt - -echo "" >> $html -echo "" >> $html -echo "" >> $html - -echo "after ED" - -bash $dir/report_clonality/r_wrapper.sh $PWD/merged.txt $2 $outputDir $clonalType "$species" "$locus" $filterproductive $clonality_method - diff -r beaa487ecf43 -r 5ffd52fc35c4 complete_immunerepertoire.xml --- a/complete_immunerepertoire.xml Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,203 +0,0 @@ - - - -complete.sh " -#for $i, $f in enumerate($patients) - "${f.id}" - #for $j, $g in enumerate($f.samples) - ${g.sample} - #end for -#end for -" $out_file $out_file.files_path "$clonaltype" -#if $gene_selection.source == "imgtdb" - "${gene_selection.species}" "${gene_selection.locus}" $filterproductive ${clonality_method} -#else - "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method -#end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - igblastwrp - weblogo - - - - The entire Immune Repertoire pipeline as a single tool, input several FASTA files or IMGT zip/txz files, give them an ID and it will BLAST/parse, merge and plot them. - - .. class:: warningmark - -Custom gene ordering based on position on genome: - -**Human** - -IGH:: - - V: - IGHV7-81,IGHV3-74,IGHV3-73,IGHV3-72,IGHV3-71,IGHV2-70,IGHV1-69,IGHV3-66,IGHV3-64,IGHV4-61,IGHV4-59,IGHV1-58,IGHV3-53,IGHV3-52,IGHV5-a,IGHV5-51,IGHV3-49,IGHV3-48,IGHV3-47,IGHV1-46,IGHV1-45,IGHV3-43,IGHV4-39,IGHV3-35,IGHV4-34,IGHV3-33,IGHV4-31,IGHV4-30-4,IGHV4-30-2,IGHV3-30-3,IGHV3-30,IGHV4-28,IGHV2-26,IGHV1-24,IGHV3-23,IGHV3-22,IGHV3-21,IGHV3-20,IGHV3-19,IGHV1-18,IGHV3-15,IGHV3-13,IGHV3-11,IGHV3-9,IGHV1-8,IGHV3-7,IGHV2-5,IGHV7-4-1,IGHV4-4,IGHV4-b,IGHV1-3,IGHV1-2,IGHV6-1 - D: - IGHD1-1,IGHD2-2,IGHD3-3,IGHD6-6,IGHD1-7,IGHD2-8,IGHD3-9,IGHD3-10,IGHD4-11,IGHD5-12,IGHD6-13,IGHD1-14,IGHD2-15,IGHD3-16,IGHD4-17,IGHD5-18,IGHD6-19,IGHD1-20,IGHD2-21,IGHD3-22,IGHD4-23,IGHD5-24,IGHD6-25,IGHD1-26,IGHD7-27 - J: - IGHJ1,IGHJ2,IGHJ3,IGHJ4,IGHJ5,IGHJ6 - - -IGK:: - - V: - IGKV3D-7,IGKV1D-8,IGKV1D-43,IGKV3D-11,IGKV1D-12,IGKV1D-13,IGKV3D-15,IGKV1D-16,IGKV1D-17,IGKV3D-20,IGKV2D-26,IGKV2D-28,IGKV2D-29,IGKV2D-30,IGKV1D-33,IGKV1D-39,IGKV2D-40,IGKV2-40,IGKV1-39,IGKV1-33,IGKV2-30,IGKV2-29,IGKV2-28,IGKV1-27,IGKV2-24,IGKV3-20,IGKV1-17,IGKV1-16,IGKV3-15,IGKV1-13,IGKV1-12,IGKV3-11,IGKV1-9,IGKV1-8,IGKV1-6,IGKV1-5,IGKV5-2,IGKV4-1 - J: - IGKJ1,IGKJ2,IGKJ3,IGKJ4,IGKJ5 - - -IGL:: - - V: - IGLV4-69,IGLV8-61,IGLV4-60,IGLV6-57,IGLV5-52,IGLV1-51,IGLV9-49,IGLV1-47,IGLV7-46,IGLV5-45,IGLV1-44,IGLV7-43,IGLV1-41,IGLV1-40,IGLV5-39,IGLV5-37,IGLV1-36,IGLV3-27,IGLV3-25,IGLV2-23,IGLV3-22,IGLV3-21,IGLV3-19,IGLV2-18,IGLV3-16,IGLV2-14,IGLV3-12,IGLV2-11,IGLV3-10,IGLV3-9,IGLV2-8,IGLV4-3,IGLV3-1 - J: - IGLJ1,IGLJ2,IGLJ3,IGLJ6,IGLJ7 - - -TRB:: - - V: - TRBV2,TRBV3-1,TRBV4-1,TRBV5-1,TRBV6-1,TRBV4-2,TRBV6-2,TRBV4-3,TRBV6-3,TRBV7-2,TRBV6-4,TRBV7-3,TRBV9,TRBV10-1,TRBV11-1,TRBV10-2,TRBV11-2,TRBV6-5,TRBV7-4,TRBV5-4,TRBV6-6,TRBV5-5,TRBV7-6,TRBV5-6,TRBV6-8,TRBV7-7,TRBV6-9,TRBV7-8,TRBV5-8,TRBV7-9,TRBV13,TRBV10-3,TRBV11-3,TRBV12-3,TRBV12-4,TRBV12-5,TRBV14,TRBV15,TRBV16,TRBV18,TRBV19,TRBV20-1,TRBV24-1,TRBV25-1,TRBV27,TRBV28,TRBV29-1,TRBV30 - D: - TRBD1,TRBD2 - J: - TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ1-6,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7 - - -TRA:: - - V: - TRAV1-1,TRAV1-2,TRAV2,TRAV3,TRAV4,TRAV5,TRAV6,TRAV7,TRAV8-1,TRAV9-1,TRAV10,TRAV12-1,TRAV8-2,TRAV8-3,TRAV13-1,TRAV12-2,TRAV8-4,TRAV13-2,TRAV14/DV4,TRAV9-2,TRAV12-3,TRAV8-6,TRAV16,TRAV17,TRAV18,TRAV19,TRAV20,TRAV21,TRAV22,TRAV23/DV6,TRAV24,TRAV25,TRAV26-1,TRAV27,TRAV29/DV5,TRAV30,TRAV26-2,TRAV34,TRAV35,TRAV36/DV7,TRAV38-1,TRAV38-2/DV8,TRAV39,TRAV40,TRAV41 - J: - TRAJ57,TRAJ56,TRAJ54,TRAJ53,TRAJ52,TRAJ50,TRAJ49,TRAJ48,TRAJ47,TRAJ46,TRAJ45,TRAJ44,TRAJ43,TRAJ42,TRAJ41,TRAJ40,TRAJ39,TRAJ38,TRAJ37,TRAJ36,TRAJ34,TRAJ33,TRAJ32,TRAJ31,TRAJ30,TRAJ29,TRAJ28,TRAJ27,TRAJ26,TRAJ24,TRAJ23,TRAJ22,TRAJ21,TRAJ20,TRAJ18,TRAJ17,TRAJ16,TRAJ15,TRAJ14,TRAJ13,TRAJ12,TRAJ11,TRAJ10,TRAJ9,TRAJ8,TRAJ7,TRAJ6,TRAJ5,TRAJ4,TRAJ3 - - -TRG:: - - V: - TRGV9,TRGV8,TRGV5,TRGV4,TRGV3,TRGV2 - J: - TRGJ2,TRGJP2,TRGJ1,TRGJP1 - - -TRD:: - - V: - TRDV1,TRDV2,TRDV3 - D: - TRDD1,TRDD2,TRDD3 - J: - TRDJ1,TRDJ4,TRDJ2,TRDJ3 - - -**Mouse** - -TRB:: - - V: - TRBV1,TRBV2,TRBV3,TRBV4,TRBV5,TRBV12-1,TRBV13-1,TRBV12-2,TRBV13-2,TRBV13-3,TRBV14,TRBV15,TRBV16,TRBV17,TRBV19,TRBV20,TRBV23,TRBV24,TRBV26,TRBV29,TRBV30,TRBV31 - D: - TRBD1,TRBD2 - J: - TRBJ1-1,TRBJ1-2,TRBJ1-3,TRBJ1-4,TRBJ1-5,TRBJ2-1,TRBJ2-2,TRBJ2-3,TRBJ2-4,TRBJ2-5,TRBJ2-6,TRBJ2-7 - - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,6 @@ + + + + + + diff -r beaa487ecf43 -r 5ffd52fc35c4 experimental_design.xml --- a/experimental_design.xml Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ - - - - experimental_design/experimental_design.sh - #for $i, $f in enumerate($patients) - "$f.id" - #for $j, $g in enumerate($f.samples) - ${g.sample} - #end for - #end for - $out_file - - - - - - - - - - - - - -Takes the ARGalaxy proprietary format and merges several samples and/or patients together. - - - - 10.1093/bioinformatics/btq281 - - - @ARTICLE{Kim07aninterior-point, - author = {Seung-jean Kim and Kwangmoo Koh and Michael Lustig and Stephen Boyd and Dimitry Gorinevsky}, - title = {An interior-point method for large-scale l1-regularized logistic regression}, - journal = {Journal of Machine Learning Research}, - year = {2007}, - volume = {8}, - pages = {1519-1555} - } - - - - - - - - - - - - - - - - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 experimental_design/experimental_design.py --- a/experimental_design/experimental_design.py Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -import sys -import pandas as pd - -def main(): - patients = {} - files = [] - sample_id = sys.argv[1] - imgt_files = 0 - blast_files = 0 - #organize files - for arg in sys.argv[2:-2]: - if arg.find("/") is -1: - patients[sample_id] = files - files = [] - sample_id = arg - else: - df = pd.read_csv(arg, sep="\t", dtype=object, error_bad_lines=False) - if "Functionality" in list(df.columns.values): - df["VDJ Frame"][df["Functionality"] != "productive"] = "In-frame with stop codon" - imgt_files += 1 - else: - blast_files += 1 - files.append(df) - patients[sample_id] = files - columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', - u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', - 'V-REGION identity nt', 'D-REGION reading frame', 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', - 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', - 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', '5J-REGION trimmed-nt nb', u'Sample', u'Replicate'] - if "N-REGION-nt nb" in files[0].columns: - columns.insert(30, "N-REGION-nt nb") - if blast_files is not 0: - print "Has a parsed blastn file, using limited columns." - columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Sample', u'Replicate'] - - result = None - for patient_id, samples in patients.iteritems(): - count = 1 - for sample in samples: - sample['Sample'] = patient_id - sample['Replicate'] = str(count) - count += 1 - if result is None: - result = sample[columns] - else: - result = result.append(sample[columns]) - result.to_csv(sys.argv[-1], sep="\t", index=False, index_label="index") - -if __name__ == "__main__": - main() diff -r beaa487ecf43 -r 5ffd52fc35c4 experimental_design/experimental_design.r --- a/experimental_design/experimental_design.r Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -print(args) - -inputs = args[1:(length(args) - 1)] -output = args[length(args)] - -current.id = "" -counter = 1 - -result = NULL - -for(current in inputs){ - if(grepl("/", current)){ #its a path to a file - print(paste("Adding file", counter, "to", current.id)) - dat = read.table(current, sep="\t", header=T, quote="", fill=T) - - #IMGT check - - dat$Sample = current.id - dat$Replicate = counter - - if(is.null(result)){ - result = dat[NULL,] - } - - result = rbind(result, dat) - - counter = counter + 1 - - } else { #its an ID of a patient - print(paste("New patient", current)) - current.id = current - counter = 1 - } -} - -write.table(result, output, sep="\t", quote=F, row.names=F, col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 experimental_design/experimental_design.sh --- a/experimental_design/experimental_design.sh Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - -dir="$(cd "$(dirname "$0")" && pwd)" - -Rscript --verbose $dir/experimental_design.r $@ 2>&1 diff -r beaa487ecf43 -r 5ffd52fc35c4 gene_identification.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_identification.py Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,226 @@ +import re +import argparse +import time +starttime= int(time.time() * 1000) + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file") +parser.add_argument("--output", help="The annotated output file to be merged back with the summary file") + +args = parser.parse_args() + +infile = args.input +#infile = "test_VH-Ca_Cg_25nt/1_Summary_test_VH-Ca_Cg_25nt_241013.txt" +output = args.output +#outfile = "identified.txt" + +dic = dict() +total = 0 + + +first = True +IDIndex = 0 +seqIndex = 0 + +with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence + for line in f: + total += 1 + linesplt = line.split("\t") + if first: + print "linesplt", linesplt + IDIndex = linesplt.index("Sequence ID") + seqIndex = linesplt.index("Sequence") + first = False + continue + + ID = linesplt[IDIndex] + if len(linesplt) < 28: #weird rows without a sequence + dic[ID] = "" + else: + dic[ID] = linesplt[seqIndex] + +print "Number of input sequences:", len(dic) + +#old cm sequence: gggagtgcatccgccccaacccttttccccctcgtctcctgtgagaattccc +#old cg sequence: ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggccctgggctgcctggtcaaggactacttccccgaaccggtgacggtgtcgtggaactcaggcgccctgaccag + +#lambda/kappa reference sequence +searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg", + "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc", + "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc", + "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence + +compiledregex = {"ca": [], + "cg": [], + "ce": [], + "cm": []} + +#lambda/kappa reference sequence variable nucleotides +ca1 = {38: 't', 39: 'g', 48: 'a', 49: 'g', 51: 'c', 68: 'a', 73: 'c'} +ca2 = {38: 'g', 39: 'a', 48: 'c', 49: 'c', 51: 'a', 68: 'g', 73: 'a'} +cg1 = {0: 'c', 33: 'a', 38: 'c', 44: 'a', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'} +cg2 = {0: 'c', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'g', 132: 't'} +cg3 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 't', 56: 'g', 58: 'g', 66: 'g', 132: 'c'} +cg4 = {0: 't', 33: 'g', 38: 'g', 44: 'g', 54: 'c', 56: 'a', 58: 'a', 66: 'c', 132: 'c'} + +#remove last snp for shorter cg sequence --- note, also change varsInCG +del cg1[132] +del cg2[132] +del cg3[132] +del cg4[132] + +#reference sequences are cut into smaller parts of 'chunklength' length, and with 'chunklength' / 2 overlap +chunklength = 8 + +#create the chunks of the reference sequence with regular expressions for the variable nucleotides +for i in range(0, len(searchstrings["ca"]) - chunklength, chunklength / 2): + pos = i + chunk = searchstrings["ca"][i:i+chunklength] + result = "" + varsInResult = 0 + for c in chunk: + if pos in ca1.keys(): + varsInResult += 1 + result += "[" + ca1[pos] + ca2[pos] + "]" + else: + result += c + pos += 1 + compiledregex["ca"].append((re.compile(result), varsInResult)) + +for i in range(0, len(searchstrings["cg"]) - chunklength, chunklength / 2): + pos = i + chunk = searchstrings["cg"][i:i+chunklength] + result = "" + varsInResult = 0 + for c in chunk: + if pos in cg1.keys(): + varsInResult += 1 + result += "[" + "".join(set([cg1[pos], cg2[pos], cg3[pos], cg4[pos]])) + "]" + else: + result += c + pos += 1 + compiledregex["cg"].append((re.compile(result), varsInResult)) + +for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2): + compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False)) + +for i in range(0, len(searchstrings["ce"]) - chunklength + 1, chunklength / 2): + compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False)) + +def removeAndReturnMaxIndex(x): #simplifies a list comprehension + m = max(x) + index = x.index(m) + x[index] = 0 + return index + + +start_location = dict() +hits = dict() +alltotal = 0 +for key in compiledregex.keys(): #for ca/cg/cm/ce + regularexpressions = compiledregex[key] #get the compiled regular expressions + for ID in dic.keys()[0:]: #for every ID + if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists + hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} + currentIDHits = hits[ID] + seq = dic[ID] + lastindex = 0 + start_zero = len(searchstrings[key]) #allows the reference sequence to start before search sequence (start_locations of < 0) + start = [0] * (len(seq) + start_zero) + for i, regexp in enumerate(regularexpressions): #for every regular expression + relativeStartLocation = lastindex - (chunklength / 2) * i + if relativeStartLocation >= len(seq): + break + regex, hasVar = regexp + matches = regex.finditer(seq[lastindex:]) + for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop + lastindex += match.start() + start[relativeStartLocation + start_zero] += 1 + if hasVar: #if the regex has a variable nt in it + chunkstart = chunklength / 2 * i #where in the reference does this chunk start + chunkend = chunklength / 2 * i + chunklength #where in the reference does this chunk end + if key == "ca": #just calculate the variable nt score for 'ca', cheaper + currentIDHits["ca1"] += len([1 for x in ca1 if chunkstart <= x < chunkend and ca1[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["ca2"] += len([1 for x in ca2 if chunkstart <= x < chunkend and ca2[x] == seq[lastindex + x - chunkstart]]) + elif key == "cg": #just calculate the variable nt score for 'cg', cheaper + currentIDHits["cg1"] += len([1 for x in cg1 if chunkstart <= x < chunkend and cg1[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]]) + currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]]) + else: #key == "cm" #no variable regions in 'cm' or 'ce' + pass + break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped + else: #only runs if there were no hits + continue + #print "found ", regex.pattern , "at", lastindex, "adding one to", (lastindex - chunklength / 2 * i), "to the start array of", ID, "gene", key, "it's now:", start[lastindex - chunklength / 2 * i] + currentIDHits[key + "_hits"] += 1 + start_location[ID + "_" + key] = str([(removeAndReturnMaxIndex(start) + 1 - start_zero) for x in range(5) if len(start) > 0 and max(start) > 1]) + #start_location[ID + "_" + key] = str(start.index(max(start))) + + +varsInCA = float(len(ca1.keys()) * 2) +varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice +varsInCM = 0 +varsInCE = 0 + +def round_int(val): + return int(round(val)) + +first = True +seq_write_count=0 +with open(infile, 'r') as f: #read all sequences into a dictionary as key = ID, value = sequence + with open(output, 'w') as o: + for line in f: + total += 1 + if first: + o.write("Sequence ID\tbest_match\tnt_hit_percentage\tchunk_hit_percentage\tstart_locations\n") + first = False + continue + linesplt = line.split("\t") + if linesplt[2] == "No results": + pass + ID = linesplt[1] + currentIDHits = hits[ID] + possibleca = float(len(compiledregex["ca"])) + possiblecg = float(len(compiledregex["cg"])) + possiblecm = float(len(compiledregex["cm"])) + possiblece = float(len(compiledregex["ce"])) + cahits = currentIDHits["ca_hits"] + cghits = currentIDHits["cg_hits"] + cmhits = currentIDHits["cm_hits"] + cehits = currentIDHits["ce_hits"] + if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene + ca1hits = currentIDHits["ca1"] + ca2hits = currentIDHits["ca2"] + if ca1hits >= ca2hits: + o.write(ID + "\tIGA1\t" + str(round_int(ca1hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") + else: + o.write(ID + "\tIGA2\t" + str(round_int(ca2hits / varsInCA * 100)) + "\t" + str(round_int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") + elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene + cg1hits = currentIDHits["cg1"] + cg2hits = currentIDHits["cg2"] + cg3hits = currentIDHits["cg3"] + cg4hits = currentIDHits["cg4"] + if cg1hits >= cg2hits and cg1hits >= cg3hits and cg1hits >= cg4hits: #cg1 gene + o.write(ID + "\tIGG1\t" + str(round_int(cg1hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + elif cg2hits >= cg1hits and cg2hits >= cg3hits and cg2hits >= cg4hits: #cg2 gene + o.write(ID + "\tIGG2\t" + str(round_int(cg2hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + elif cg3hits >= cg1hits and cg3hits >= cg2hits and cg3hits >= cg4hits: #cg3 gene + o.write(ID + "\tIGG3\t" + str(round_int(cg3hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + else: #cg4 gene + o.write(ID + "\tIGG4\t" + str(round_int(cg4hits / varsInCG * 100)) + "\t" + str(round_int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + else: #its a cm or ce gene + if cmhits >= cehits: + o.write(ID + "\tIGM\t100\t" + str(round_int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n") + else: + o.write(ID + "\tIGE\t100\t" + str(round_int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n") + seq_write_count += 1 + +print "Time: %i" % (int(time.time() * 1000) - starttime) + +print "Number of sequences written to file:", seq_write_count + + + + + diff -r beaa487ecf43 -r 5ffd52fc35c4 igblast/igblast.r --- a/igblast/igblast.r Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -infile=args[1] -outfile=args[2] - -blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="") - -blasted$ID = 1:nrow(blasted) -blasted$VDJ.Frame = "Out-of-frame" - -search = blasted$inFrame == "true" & blasted$noStop == "false" -if(sum(search) > 0){ - blasted[search ,]$VDJ.Frame = "In-frame with stop codon" -} - -search = blasted$inFrame == "true" & blasted$noStop == "true" -if(sum(search) > 0){ - blasted[search ,]$VDJ.Frame = "In-frame" -} - -blasted$Top.V.Gene = blasted$vSegment -blasted$Top.D.Gene = blasted$dSegment -blasted$Top.J.Gene = blasted$jSegment -blasted$CDR1.Seq = blasted$cdr1aa -blasted$CDR1.Length = nchar(blasted$CDR1.Seq) -blasted$CDR2.Seq = blasted$cdr2aa -blasted$CDR2.Length = nchar(blasted$CDR2.Seq) -blasted$CDR3.Seq = blasted$cdr3aa -blasted$CDR3.Length = nchar(blasted$CDR3.Seq) -blasted$CDR3.Seq.DNA = blasted$cdr3nt -blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA) -blasted$Strand = "+/-" -blasted$CDR3.Found.How = "found" - -search = blasted$cdr3nt == "" -if(sum(search) > 0){ - blasted[search,]$CDR3.Found.How = "NOT_FOUND" -} - -blasted$AA.JUNCTION = blasted$CDR3.Seq - -n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION") - -n[!(n %in% names(blasted))] - -blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")] - -names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION") - -#duplicate rows based on frequency.count -blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),] -blasted$ID = 1:nrow(blasted) - -blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")] - -write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 igblast/igblast.sh --- a/igblast/igblast.sh Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -set -e - -dir="$(cd "$(dirname "$0")" && pwd)" - -input=$1 -species=$2 -locus=$3 -output=$4 - - -echo "$input $species $locus $output" - -java -Xmx64G -jar $IGBLASTWRP/igblastwrp.jar -p 4 -S $species -R $locus ${input} $PWD/blasted_output 2>&1 - -Rscript --verbose $dir/igblast.r "$PWD/blasted_output.L2.txt" "$output" 2>&1 diff -r beaa487ecf43 -r 5ffd52fc35c4 igblastn.xml --- a/igblastn.xml Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ - - - - igblast/igblast.sh $input $species $locus $output - - - - - - - - - - - - - - - - - - - - - - - - - - igblastwrp - - -============ -iReport -============ - -This tool uses the online igBLAST website hosted by NCBI to blast a FASTA file, it retrieves the result and generates a convenient tabular format for further processing. - -**NOTE** - -.. class:: warningmark - -- Everything goes through the servers of NCBI, so if you have sensitive data that that isn't allowed to leave your local network, this isn't the tool the use. - -**USAGE** - -.. class:: infomark - -- This tool uses a free service provided by NCBI, and although there doesn't seem to be any restrictions on usage, avoid unnecessary usage to lighten the load on NCBI's servers. - - -**INPUT** - -This tool accepts FASTA files as input: - -:: - - >lcl|FLN1FA002RWEZA.1| - ggctggagtgggtttcatacattagtagtaatagtggtgccatatactacgcagactctgtgaagggccgattcaccatc - tccagaaacaatgccaaggactcactgtatctgcaaatgaacagcctgagagccgaggacacggctgtgtattactgtgc - gagagcgatcccccggtattactatgatactagtggcccaaacgactactggggccagggaaccctggtcaccgtctcct - cag - >lcl|FLN1FA001BLION.1| - aggcttgagtggatgggatggatcaacgctggcaatggtaacacaaaatattcacagaagttccagggcagagtcaccat - taccagggacacatccgcgagcacagcctacatggagctgagcagcctgagatctgaagacacggctgtgtattactgtg - cgagagtgggcagcagctggtctgatgcttttgattatctggggccaagggacaatggtcaccgtctcctcag - -**OUTPUT** - -The following data is used for ARGalaxy - -+-----------------+----------------------------------------------+ -| Column name | Column contents | -+-----------------+----------------------------------------------+ -| ID | The Sequence ID provided by the sequencer. | -+-----------------+----------------------------------------------+ -| VDJ Frame | In-frame/Out-frame | -+-----------------+----------------------------------------------+ -| Top V Gene | The best matching V gene found. | -+-----------------+----------------------------------------------+ -| Top D Gene | The best matching D gene found. | -+-----------------+----------------------------------------------+ -| Top J Gene | The best matching J gene found. | -+-----------------+----------------------------------------------+ -| CDR3 Seq | The CDR3 region. | -+-----------------+----------------------------------------------+ -| CDR3 Length | The length of the CDR3 region. | -+-----------------+----------------------------------------------+ -| CDR3 Seq DNA | The CDR3 sequence region. | -+-----------------+----------------------------------------------+ -| CDR3 Length DNA | The length of the CDR3 sequence region. | -+-----------------+----------------------------------------------+ -| Functionality | If sequence is productive/unproductive | -+-----------------+----------------------------------------------+ - - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 igblastparser/igparse.pl --- a/igblastparser/igparse.pl Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1252 +0,0 @@ -#!/usr/bin/perl -=head1 IGBLAST_simple.pl - -This version (1.4) has been heavily adapted since the original program was first created back in October 2012. -Bas Horsman (EMC, Rotterdam, The Netherlands) has contributed with minor - though important - code changes. - -From V 1.2 onwards a 'Change Log' is included at the end of the program - -=head2 Usage - -Requires no modules in general use; the Data::Dumper (supplied as part of the Perl Core module set) might be useful for debugging/adjustment -as it allows inspection of the data stores. - -The program takes a text file of the - - ./IGBLAST_simple.pl igBLASTOutput.txt <-optional: index of record to process-> - -Supply the text version of the igBLAST report in the format as in the example below. -The extra command line arugment is the record number (aka. BLAST report) to process. -If 0 or absent all are processed, if supplied that record (base 1) is processed and the program dies afterwards. - -=head2 Example Input - -A standard igBLAST record or set of them in a file; this being typical: - - BLASTN 2.2.27+ - - -Reference: Stephen F. Altschul, Thomas L. Madden, Alejandro A. -Schaffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. -Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of -protein database search programs", Nucleic Acids Res. 25:3389-3402. - - - -Database: human_gl_V; human_gl_D; human_gl_J - 674 sequences; 179,480 total letters - - - -Query= HL67IUI01D26LR length=433 xy=1559_1437 region=1 -run=R_2012_04_10_11_57_56_ - -Length=433 - Score E -Sequences producing significant alignments: (Bits) Value - -lcl|IGHV3-30*04 330 2e-92 -lcl|IGHV3-30-3*01 330 2e-92 -lcl|IGHV3-30*01 327 2e-91 -lcl|IGHD3-16*01 14.4 11 -lcl|IGHD3-16*02 14.4 11 -lcl|IGHD1-14*01 12.4 43 -lcl|IGHJ4*02 78.3 1e-18 -lcl|IGHJ5*02 70.3 4e-16 -lcl|IGHJ4*01 68.3 2e-15 - - -Domain classification requested: imgt - - -V(D)J rearrangement summary for query sequence (Top V gene match, Top D gene match, Top J gene match, Chain type, V-J Frame, Strand): -IGHV3-30*04 IGHD3-16*01 IGHJ4*02 VH In-frame + - -V(D)J junction details (V end, V-D junction, D region, D-J junction, J start). Note that possible overlapping nucleotides at VDJ junction (i.e, nucleotides that could be assigned to either joining gene segment) are indicated in parentheses (i.e., (TACT)) but are not included under V, D, or J gene itself -AGAGA TATGAGCCCCATCATGACA ACGTTTG CCGGAA ACTAC - -Alignment summary between query and top germline V gene hit (from, to, length, matches, mismatches, gaps, percent identity) -FWR1 27 38 12 11 1 0 91.7 -CDR1 39 62 24 22 2 0 91.7 -FWR2 63 113 51 50 1 0 98 -CDR2 114 137 24 23 1 0 95.8 -FWR3 138 251 114 109 5 0 95.6 -CDR3 (V region only) 252 259 8 7 1 0 87.5 -Total N/A N/A 233 222 11 0 95.3 - - -Alignments - - <----FWR1--><----------CDR1--------><-----------------------FWR2------ - W A A S G F T F N T Y A V H W V R Q A P G K G - Query_1 27 TGGGCAGCCTCTGGATTCACCTTCAATACCTATGCTGTGCACTGGGTCCGCCAGGCTCCAGGCAAGGGGC 96 -V 95.3% (222/233) IGHV3-30*04 64 ..T......................G..G.......A................................. 133 - C A A S G F T F S S Y A M H W V R Q A P G K G -V 95.7% (221/231) IGHV3-30-3*01 64 ..T......................G..G.......A................................. 133 -V 94.8% (221/233) IGHV3-30*01 64 ..T......................G..G.......A................................. 133 - - ----------------><----------CDR2--------><---------------------------- - L E W V A V I S Y D G S N K N Y A D S V K G R F - Query_1 97 TGGAGTGGGTGGCAGTTATATCATATGATGGAAGCAATAAAAACTACGCAGACTCCGTGAAGGGCCGATT 166 -V 95.3% (222/233) IGHV3-30*04 134 ..................................T......T............................ 203 - L E W V A V I S Y D G S N K Y Y A D S V K G R F -V 95.7% (221/231) IGHV3-30-3*01 134 .........................................T............................ 203 -V 94.8% (221/233) IGHV3-30*01 134 .A................................T......T............................ 203 - - ---------------------------FWR3--------------------------------------- - T I S R D N S K N T L Y L Q M N S L R V E D T - Query_1 167 CACCATCTCCAGAGACAATTCCAAGAACACGTTATATCTGCAAATGAACAGCCTGAGAGTTGAGGACACG 236 -V 95.3% (222/233) IGHV3-30*04 204 ...............................C.G.........................C.......... 273 - T I S R D N S K N T L Y L Q M N S L R A E D T -V 95.7% (221/231) IGHV3-30-3*01 204 ...............................C.G.........................C.......... 273 -V 94.8% (221/233) IGHV3-30*01 204 ...............................C.G.........................C.......... 273 - - --------------> - A V Y Y C T R D M S P I M T T F A G N Y W G Q - Query_1 237 GCTGTTTATTACTGTACGAGAGATATGAGCCCCATCATGACAACGTTTGCCGGAAACTACTGGGGCCAGG 306 -V 95.3% (222/233) IGHV3-30*04 274 .....G.........G.......----------------------------------------------- 296 - A V Y Y C A R -V 95.7% (221/231) IGHV3-30-3*01 274 .....G.........G.....------------------------------------------------- 294 -V 94.8% (221/233) IGHV3-30*01 274 .....G.........G.......----------------------------------------------- 296 -D 100.0% (7/7) IGHD3-16*01 12 ------------------------------------------.......--------------------- 18 -D 100.0% (7/7) IGHD3-16*02 12 ------------------------------------------.......--------------------- 18 -D 100.0% (6/6) IGHD1-14*01 8 -------------------------------------------------......--------------- 13 -J 100.0% (39/39) IGHJ4*02 10 -------------------------------------------------------............... 24 -J 100.0% (35/35) IGHJ5*02 17 -----------------------------------------------------------........... 27 -J 97.4% (38/39) IGHJ4*01 10 -------------------------------------------------------.............A. 24 - - - G T L V T V S S - Query_1 307 GAACCCTGGTCACCGTCTCCTCAG 330 -J 100.0% (39/39) IGHJ4*02 25 ........................ 48 -J 100.0% (35/35) IGHJ5*02 28 ........................ 51 -J 97.4% (38/39) IGHJ4*01 25 ........................ 48 - - -Lambda K H - 1.10 0.333 0.549 - -Gapped -Lambda K H - 1.08 0.280 0.540 - -Effective search space used: 64847385 - - -Query= HL67IUI01EQMLY length=609 xy=1826_1636 region=1 -run=R_2012_04_10_11_57_56_ - - -...etc... - -=head2 Example Output - - -Example output from the data above sent: - $ ./IGBLAST_simple.pl igBLASTOutput.txt 1 - D: Request to process just record '1' received - D: printOUTPUTData: Running - D: printOUTPUTData: HEADER Printout requested 'ID VDJ Frame Top V Gene Top D Gene Top J Gene CDR1 Seq CDR1 Length CDR2 Seq CDR2 Length CDR3 Seq CDR3 Length CDR3 Found How' - OUTPUT: # ID VDJ Frame Top V Gene Top D Gene Top J Gene CDR1 Seq CDR1 Length CDR2 Seq CDR2 Length CDR3 Seq CDR3 Length CDR3 Found How - D: ID is: 'HL67IUI01D26LR' - D: Minimum base marked-up (27) - aka. $AlignmentStart; maximum: (259) - D: Starting Search for CDR3 - D: markUpCDR3: Passed Parameters '251, 27, TGGGG....GG., WG.G' (& AA & DNA sequence) - D: markUpCDR3: returning: 223, 282, MOTIF_FOUND_IN_BOTH, (3) [NB: offset of :'+ 27' - D: CDR3 was found by pattern matching: 'MOTIF_FOUND_IN_BOTH' (250, 309) - D: Top Hits (raw)= 'IGHV3-30*04 IGHD3-16*01 IGHJ4*02 VH In-frame +' - D: Top Hits (parsed)= 'IGHV3-30*04, IGHD3-16*01, IGHJ4*02, VH, In-frame, +' - D: printOUTPUTData: Running - OUTPUT: HL67IUI01D26LR In-frame IGHV3-30*04 IGHD3-16*01 IGHJ4*02 GFTFNTYA 23 ISYDGSNK 23 CTRDMSPIMTTFAGNYWGQG 59 MOTIF_FOUND_IN_BOTH - -=head4 Usage notes: - -Designed to be easy to "grep -v D:" or "grep OUTPUT:" for to select the parts you need: - - ./IGBLAST_simple.pl igBLASTOutput.txt 1 | grep OUTPUT: - - OUTPUT: # ID VDJ Frame Top V Gene Top D Gene Top J Gene CDR1 Seq CDR1 Length CDR2 Seq CDR2 Length CDR3 Seq CDR3 Length CDR3 Found How - OUTPUT: HL67IUI01D26LR In-frame IGHV3-30*04 IGHD3-16*01 IGHJ4*02 GFTFNTYA 23 ISYDGSNK 23 CTRDMSPIMTTFAGNYWGQG 59 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01EQMLY In-frame IGHV4-39*01 IGHD2-8*01 IGHJ3*02 GGSISSSSYY 29 IYHSGST 20 CARDATYYSNGFDIWGQG 53 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01CDCLP Out-of-frame IGHV3-23*01 IGHD3-3*01 IGHJ4*02 FSNYAM 16 SGSGDRTY 23 AKAD*FLEWLFRIGDGERLLGPGN 72 MOTIF_FOUND_IN_DNA - OUTPUT: HL67IUI01AHRNH N/A IGHV3-33*01 N/A N/A WIHLQ*LW 23 YGMMEVI 23 NOT_FOUND - OUTPUT: HL67IUI01DZZ1V Out-of-frame IGHV3-23*01 IGHD5-12*01 IGHJ4*02 GFTFDKYA 23 ILASG 20 LYCASEGDIVASELLSTGARV 62 MOTIF_FOUND_IN_DNA - OUTPUT: HL67IUI01DTR2Y Out-of-frame IGHV3-23*01 IGHD5-12*01 IGHJ4*02 LDSPLTNM 23 LYLPVV 20 TVRVRGT*WLRSF*VLGPG 59 MOTIF_FOUND_IN_DNA - OUTPUT: HL67IUI01EQL3S In-frame IGHV7-4-1*02 IGHD6-19*01 IGHJ6*02 GYTFRTFT 23 INTNTGTP 23 CAKESGTGSAHFFYGMDVWGQG 65 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01AFG46 In-frame IGLV2-34*01 N/A IGHJ4*02 NOT_FOUND - OUTPUT: HL67IUI01EFFKO In-frame IGHV3-11*01 IGHD6-6*01 IGHJ4*02 GFTFSDYY 23 ISYSGGTI 23 CARASGAARHRPLDYWGQG 56 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01B18SG In-frame IGHV3-33*01 IGHD5-12*01 IGHJ4*02 VRQA 11 KYYANSVK 23 RLGGFDYWGQGTLVTVSS 53 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01D6LER In-frame IGHV1-24*01 IGHD3-22*01 IGHJ4*02 GYSLNELS 23 PDPEDDE 23 TVQPSRITMMAVVITRIHWGASGARE 76 MOTIF_FOUND_IN_DNA - OUTPUT: HL67IUI01CYCLF N/A IGHV4-39*01 N/A N/A GGSISSSSYY 29 IYYSGST 20 NOT_FOUND - OUTPUT: HL67IUI01B4LEE In-frame IGHV7-4-1*02 IGHD6-19*01 IGHJ6*02 GYTFRTFT 23 INTNTGTP 23 CAKESGTGSAHFFYGMDVWGQG 65 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01A4KW4 Out-of-frame IGHV3-23*01 IGHD5-12*01 IGHJ4*02 LDSPLTNM 23 LYLPVV 20 TVRVRGT*WLRSF*IWGQG 58 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01E05BV In-frame IGHV1-24*01 IGHD3-22*01 IGHJ2*01 GYSLNELS 23 PDPEDDE 23 NOT_FOUND - OUTPUT: HL67IUI01CVVKY In-frame IGHV1-3*01 IGHD2-15*01 IGHJ1*01 NOT_FOUND - OUTPUT: HL67IUI01CN5P2 In-frame IGHV7-4-1*02 IGHD2-21*02 IGHJ5*02 GYSITDYG 23 LNTRTGNP 23 CAVKDARDFVSWGQG 44 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01DUUJ5 In-frame IGHV3-21*01 IGHD1-7*01 IGHJ4*02 GYTFSTYS 23 ISSSSAYR 23 CARDIRLELRDWGQG 44 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01E1AIR Out-of-frame IGHV4-39*01 N/A IGHJ3*01 WGLHRRW**L 29 FVS*RAPR 23 NOT_FOUND - OUTPUT: HL67IUI01CCZ8D Out-of-frame IGHV3-23*01 IGHD5-12*01 IGHJ4*02 GFTFDKYA 23 ILASGR 20 YCASEGDIVASELLSTGARE 58 MOTIF_FOUND_IN_DNA - OUTPUT: HL67IUI01BT9IR N/A IGHV3-21*02 N/A N/A NOT_FOUND - OUTPUT: HL67IUI01COTO0 Out-of-frame IGHV4-39*01 N/A IGHJ3*01 GGFIGGGDNF 29 LYHDGRPA 23 NOT_FOUND - OUTPUT: HL67IUI01D994O In-frame IGHV7-4-1*02 IGHD2-21*02 IGHJ5*02 GYSITDYG 23 LNTRTGNP 23 CAVKDARDFVSWGQG 44 MOTIF_FOUND_IN_BOTH - OUTPUT: HL67IUI01A08CJ In-frame IGHV4-39*01 IGHD6-13*01 IGHJ5*02 GGSISSSSYY 29 IYYTWEH 21 CERARRGSSWGQLVRPLGPG 62 MOTIF_FOUN - - - - OUTPUT: # ID VDJ Frame Top V Gene Top D Gene Top J Gene CDR1 Seq CDR1 Length CDR2 Seq CDR2 Length CDR3 Seq CDR3 Length CDR3 Found How - OUTPUT: HL67IUI01D26LR In-frame IGHV3-30*04 IGHD3-16*01 IGHJ4*02 GFTFNTYA 23 ISYDGSNK 23 CTRDMSPIMTTFAGNYWGQG 59 MOTIF_FOUND_IN_BOTH - ...etc... - -=head4 Also, combined grep & sed: - - $ ./IGBLAST_simple.pl igBLASTOutput.txt | grep OUTPUT: | sed 's/OUTPUT:\t//' - -=cut - -=head3 CDR3 Patterns: - -We use these two variables to try to identify the end of the CDR3 region if igBLAST doesn't report it directly: - - my $DNACDR3_Pat = "TGGGG....GG."; - my $AASequenceMotifPattern = "WG.G"; - -They are treated as regex's when tested (so use "." to mean any DNA base, rather than 'N' or 'X'). - -[NB: These are original patterns used for testing, check the code for the current ones.] - -=cut - -my $DNACDR3_Pat = "TGGGG....GG."; -my $AACDR3_Pat = "WG.G"; - -use strict; -use Data::Dumper; -# Set this as to number of the result (aka "record") you want to process or 0 for all: -my $ProcessRecord =0; -if (defined $ARGV[1]) { $ProcessRecord = pop @ARGV; } #Also accept from the command line: -if ($ProcessRecord != 0) { print "D: Request to process just record '$ProcessRecord' received\n"; } - -#Adjust the record separator: -$/="Query= "; -my $Record=0; # A simple counter, that we might not use. -#Force-loaded header / version information: -my $Header = <>; -#At the moment we don't use this - so dump it immediately: -$Header = undef; -#print "D: Force-loaded header / version information: '$Header'\n"; - -#Print the Header for the output line (we need this once, at the start) -print &printOUTPUTData ({"HEADER" => 1})."\n"; - -while (<>) - { -=head4 First check - should we be processing this record at all? - -=cut - $Record++; #Increment the record counter: - #Do we process this record - or all records? - if ($ProcessRecord != $Record && $ProcessRecord != 0) - { next; } #We need to increment the record counter before we increment - -=head4 Setup the output line storage and print the header: - -We enter this initially and work to change it: - - $DomainBoundaries{"CDR3"}{"FoundHow"} = "NOT_FOUND"; - -=cut - - my %OUTPUT_Data; #To collect data for the output line in - #Assume the first and work to find better: - $OUTPUT_Data{"CDR3 Found How"} = "NOT_FOUND"; - #The whole record - one per read - is now stored in $_ - my @Lines =split (/[\r\n]+/,$_); # split on windows/linux/mac new lines - - #If you are interested enable either of the next lines depending on how curious you are as to how the splitting went: - #print "D: Record #$Record\n"; print $_; print "\n---------\n"; - print "D: ''$Lines[0]'\nD: ...etc...'\nD: ############\n"; - -=head3 Get the ID - -Quite easy: the first field on the first line: - - Query= HL67IUI01DTR2Y length=577 xy=1452_0984 region=1 - -=cut - - (my $ID) = $Lines[0]=~ m/^(\S+)/; - unless (defined $ID && $ID ne "") - { # So a near total failure...? - $OUTPUT_Data{"ID"} = "Unknown"; - print &printOUTPUTData (\%OUTPUT_Data)."\n"; - next; #No ID is terminal for this record - } - else - { - print "D: ID is: '$ID'\n"; - $OUTPUT_Data{"ID"} = $ID; - } -=head3 Declare the variables we will need here in the next few sections to store data - -=cut - - my $CurrentRegion; - my $RegionMarkup; - - #So we can sync the coordinated of the alignment up to the domains found: - my $Query_Start = -1; my $Query_End = -1; - - #Where on the Query Sequence (i.e. the 454 read) does the alignment start & stop? - my $ThisQueryStart =-1; my $ThisQueryEnd =-1; #Think $ThisQueryEnd isn't used at the moment. - my $DNAQuerySequence =""; #The actual DNA Query sequence... - my $AAQuerySequence = ""; - - #As this changes with the alleles identified: - my $CurrentAASequence; - #The main storage variables - - my %Alginments; my %Alleles; - my %DomainBoundaries; - -=head2 Stanza 1: Get the general structure of the sequence identified - -=head3 Method 1: Use the table supplied - -Technically this valid for the top hit...realistically this is the only information we have reported to us -so we use this or nothing. This is fine for the top hit which is likely what we are interested in....but for the 2nd or 3rd? Who knows! - -Targets this block: - - Alignment summary between query and top germline V gene hit (from, to, length, matches, mismatches, gaps, percent identity) - FWR1 167 240 75 72 2 1 96 - CDR1 241 264 24 20 4 0 83.3 - FWR2 265 315 51 48 3 0 94.1 - CDR2 316 336 24 15 6 3 62.5 - FWR3 337 450 114 106 8 0 93 - CDR3 (V region only) 451 454 4 4 0 0 100 - Total N/A N/A 292 265 23 4 90.8 - -Then we split out the lines inside it in a second scanning step - less optimal but easier to read: - - FWR1 167 240 75 72 2 1 96 - CDR1 241 264 24 20 4 0 83.3 - FWR2 265 315 51 48 3 0 94.1 - CDR2 316 336 24 15 6 3 62.5 - FWR3 337 450 114 106 8 0 93 - CDR3 (V region only) 451 454 4 4 0 0 100 - -into: - - (Section, from, to, length, matches, mismatches, gaps, percent identity) - -=head3 Method 2: Use the table supplied - -The other way to do this is to split the graphical markup out of the alignment. -This works for _any_ reported alignment, not just the top hits: - -In the main alignment table processing section collect the information, collect the information: - - #Is region mark-up: - if ($#InfoColumns == -1 && $#AlignmentColumns ==0) - { -# print ": Region Markup detected\n"; - $RegionMarkup = $RegionMarkup.$AlignmentPanel; #Collect the information, then re-synthesise it at the end of record - next; - } - -Then afterwards when all the region was collected, process it like this: -#Pad the CDER3 region: - - #Remove the trailing spaces: - $RegionMarkup =~ s/ *$//g; - #Calculate the length of the CDR3 region so we can add it in: - my $CDR3PaddingNeeded = ($Query_End-$Query_Start)-length ($RegionMarkup) -length ("<-CDR3>")+1; - #Build up the CDR3 region, the 'x' operator is very helpful here (implict foreach loop): - $RegionMarkup = $RegionMarkup."<-CDR3"."-" x $CDR3PaddingNeeded. ">"; - #print "D: Need to pad with:'$CDR3PaddingNeeded' characters\n"; - - #Now really process it: - my $C_Pos = 0; - my @Domains = split (/(<*-*...[123]-*>*)/,$RegionMarkup); # - foreach my $C_Domain (@Domains) - { - if (length ($C_Domain) <=0) {next;} - my $DomainStart= $C_Pos; - my $DomainEnd = $DomainStart + length ($C_Domain)-1; - my ($DomainType) = $C_Domain =~ m/(...[123])/; -# print "D: $DomainType \t($DomainStart-$DomainEnd=",$DomainEnd-$DomainStart,"):\t$C_Domain\n"; - $DomainBoundaries{$DomainType}{"Start"} = $DomainStart; - $DomainBoundaries{$DomainType}{"End"} = $DomainEnd; - $C_Pos = $DomainEnd+1; - } - -The two pieces of code are interchangable; the table version as used below, is neater, easier to understand and works nicely. -Why stress? - - -=head3 The end of the FWR3 is the start of CDR3? - -This is an assumption made. Hence the two variables: - - my $MaxDomainReported =0 ; # In nts / bps - my $FWR3_Found_Flag = 0; # Did we find the end of the FWR3 - which is the start of the CDR3. Set to 'false' initially. - - $MaxDomainBaseFound - -=cut - my $MaxDomainBaseFound =0 ; # In nts / bps - my $AlignmentStart ; # In nts /bp #Alternative name would be: '$MinDomainBaseFound'; set to null until primed -# my $FWR3_Found_Flag = 0; # Did we find the end of the FWR3 - which is the start of the CDR3. Set to 'false' initially. - - (my @StructureSummaryTable) = returnLinesBetween (\@Lines, "Alignment summary", "Total" ); -#Enable the next line if you want the raw data we are going to parse in this section: - #print Dumper @StructureSummaryTable; - foreach my $C_Section (@StructureSummaryTable) - { - my ($DomainType, $DomainStart, $DomainEnd, $SectLength, $Matches, $Mismatches, $Gaps, $PID) = split (/\t+/,$C_Section); - #print "D: Domain type: '$DomainType'\n"; - #$DomainType =~ s/ .*$//g; - $DomainBoundaries{$DomainType}{"Start"} = $DomainStart; - $DomainBoundaries{$DomainType}{"End"} = $DomainEnd; - -#So we can do a reality check on the length / start of the CDR3 if we have to go looking: - if ($MaxDomainBaseFound <= $DomainEnd) - { $MaxDomainBaseFound = $DomainEnd; } #Store the maximum base found - if ($AlignmentStart eq undef or $AlignmentStart >= $DomainStart) - { $AlignmentStart = $DomainStart; } - } -#print Dumper %DomainBoundaries; -#die "HIT BLOCK\n"; - -=head3 Did we find the CDR3 region specifically? - -If we did fine; otherwise try to find it using the FWR3 region if we found that; otherwise give up. - -=cut - print "D: Minimum base marked-up ($AlignmentStart) - aka. \$AlignmentStart; maximum: ($MaxDomainBaseFound)\n"; - -#my @WantedSections = qw (V D J); - -=head2 Second Stanza: Parse the main Alignment Table - -=head3 Get the table, then determine the character at which to split the 'Info' & 'Alignment' panels. - -As this is a little involved and comparamentalises nicely we sub-contract this to two functions"" - - (my @Table) = returnLinesBetween (\@Lines, "Alignment", "Lambda" ); - my $PanelSplitPoint = findSplitPoint (\@Table); #Why can't they just use a fixed field width or a tab as a delimiter? - -=cut - (my @Table) = returnLinesBetween (\@Lines, "Alignment", "Lambda" ); - my $PanelSplitPoint = findSplitPoint (\@Table); #Why can't they just use a fixed field width or a tab as a delimiter? -#If you are interested, enable this line: -# print "D: The info panel was detected at: '$splitPoint'\n"; - -=head3 - -=cut - - -foreach my $C_Line (0..$#Table) - { - -=head3 Call the line type we find: There are 4: - -These are distinguished by the number of fields (one or mores spacer is a field separator) in the Info & Alignment Panels (see values in brackets) - - | <- This split is ~40 chars. from the start of the line - * InfoPanel * | * Alignment Panel * - : is a "Blank" line (-1,-1) - <----FWR1--><----------CDR1--------><-----------------------FWR2------ : is "Region Markup" (-1,0) - W A A S G F T F N T Y A V H W V R Q A P G K G : is "AA Sequence" (-1, >=0) - Query_1 27 TGGGCAGCCTCTGGATTCACCTTCAATACCTATGCTGTGCACTGGGTCCGCCAGGCTCCAGGCAAGGGGC 96 : is "DNA Sequence" (2,1) - V 95.3% (222/233) IGHV3-30*04 64 ..T......................G..G.......A................................. 133 : is "" " - -So we split 40 chars in and then the two parts on spaces. - - -=cut - -# print "D: (sub) Line in parsed table: '$C_Line': \n"; - - my ($InfoPanel, $AlignmentPanel) = $Table[$C_Line] =~ /^(.{$PanelSplitPoint})(.*)$/; - - my @InfoColumns = split (/\s+/,$InfoPanel); - my @AlignmentColumns = split (/\s+/,$AlignmentPanel); - -#If you want to see how the line is being split enable either of these next two lines; the 2nd is more detailed than the first -# print "D: Line: $C_Line/t Number of Columns (Info, Alignment): \t$#InfoColumns \t $#AlignmentColumns\n"; -# print "D: For '$C_Line' \t line in the table there are parts: '$InfoPanel' [$#InfoColumns], '$AlignmentPanel [$#AlignmentColumns]'\n"; - -#Populate this so we can step through it - -=head4 Is a blank line: -=cut - if ($#InfoColumns == -1 && $#AlignmentColumns == -1) - { -# print ": Blank\n"; - next; - } #For now I think we just skip - is not needed (though might be implict mark-up) - -=head4 Is region mark-up: -=cut - if ($#InfoColumns == -1 && $#AlignmentColumns ==0) - { -# print ": Region Markup detected\n"; - $RegionMarkup = $RegionMarkup.$AlignmentPanel; #Collect the information, then re-synthesise it at the end of record - next; - } -=head4 Is query DNA Sequence: -=cut - if ($#InfoColumns == 2 && $#AlignmentColumns ==1) - { -# print ": DNA Query Sequence\n"; - #Detect the two coordinatates of alignment against the query sequence: (last two numbers of the two 'panels') - ($ThisQueryStart) = $InfoPanel =~ / (\d+) *$/; - ($ThisQueryEnd) = $AlignmentPanel =~ / (\d+) *$/; - my ($ThisDNASeq) = $AlignmentPanel =~ /^(.*?) /; - #If you want to know what we just found: - #print "D: This DNA Sequence: '$ThisDNASeq'\n"; - $DNAQuerySequence = $DNAQuerySequence. $ThisDNASeq; #Add it on to whatever we already have. - #Move the needle if there are smaller / greater; otherwise prime the 'needles': - if ($ThisQueryStart < $Query_Start or $Query_Start == -1) - { $Query_Start = $ThisQueryStart; } - if ($ThisQueryEnd > $Query_End or $Query_End == -1) - { $Query_End = $ThisQueryEnd; } -# print ": Query DNA Sequence detected This line: ($ThisQueryStart, $ThisQueryEnd) & Maximally: ($Query_Start, $Query_End)\n"; - next; - } -=head4 Is AA Sequence: - -This is complicated as it Need to decide whether this is the sequence of the read or that of the original V / D / J regions: - --------------> - A V Y Y C T R D M S P I M T T F A G N Y W G Q << Want this - Query_1 237 GCTGTTTATTACTGTACGAGAGATATGAGCCCCATCATGACAACGTTTGCCGGAAACTACTGGGGCCAGG 306 - V 95.3% (222/233) IGHV3-30*04 274 .....G.........G.......----------------------------------------------- 296 - A V Y Y C A R - V 95.7% (221/231) IGHV3-30-3*01 274 .....G.........G.....------------------------------------------------- 294 - - ...etc... - G T L V T V S S << Want this - Query_1 307 GAACCCTGGTCACCGTCTCCTCAG 330 - -To solve this we peak at the next line that it has the tag "Query" in it (we assume the line exists...) - -=cut - - if ($#InfoColumns == -1 && $#AlignmentColumns >=-1) - { - unless ($Table[$C_Line+1] =~ /Query/) { next; } #Is the next line the DNA sequence ? - # -# print ": AA sequence\n"; - - - $CurrentAASequence = $AlignmentPanel; - #print "D: Panel Split Point = $PanelSplitPoint, '$AlignmentPanel'\n"; - $CurrentAASequence =~ s/^ {$PanelSplitPoint}//; - #print "D: '$AAQuerySequence'\n"; -# print "D: Current AA Sequence: \t'$CurrentAASequence'\n"; - $AAQuerySequence = $AAQuerySequence.$CurrentAASequence; #Store the elongating AA Sequence as well - next; - } -=head4 Is Alignment: -=cut - if ($#InfoColumns == 4 && $#AlignmentColumns ==1) - { - #Not acutally interesting to us for this version of the parser. Delete ultimately? - next; - } - -#Is weird! Don't recognise it! - - warn "Weird! Don't recongnise this: '$ID' [$#InfoColumns,$#AlignmentColumns]// '",$Lines[$C_Line],15,"...'\n"; - } #End main iteration loop for alignment parsing. - - -=head2 The CDR3 is noted as problematic. Can we identify it? - -=cut - print "D: Starting Search for CDR3\n"; - #Do have the end of the FWR3 but not the CDR3? If so then it is worth trying to find the CDR3, otherwise...nothing we can do at this point - if (exists ($DomainBoundaries{"FWR3"}{"End"}) - && $AlignmentStart !=0 - && not (exists $DomainBoundaries{"CDR3"}{"End"}) ) #Guess we need to go looking for the end then... - { - #print "D: Placing call to markUpCDR3\n"; - my ($CDR3_Start, my $CDR3_End, my $CDR3_Found_Tag) = markUpCDR3 ($DNAQuerySequence, $AAQuerySequence, - $DomainBoundaries{"FWR3"}{"End"}, $AlignmentStart, - $DNACDR3_Pat, $AACDR3_Pat); - if ($CDR3_Start !=0 && $CDR3_End !=0) - { - $DomainBoundaries{"CDR3"}{"Start"} = $CDR3_Start; - $DomainBoundaries{"CDR3"}{"End"} = $CDR3_End ; - $DomainBoundaries{"CDR3"}{"FoundHow"} = $CDR3_Found_Tag; - print "D: CDR3 was found by pattern matching: '$CDR3_Found_Tag' ($CDR3_Start, $CDR3_End)\n"; - } - else - { print "D: CDR3 was not found [either by igBLAST or by pattern matching]\n"; - $DomainBoundaries{"CDR3"}{"FoundHow"} = "NOT_FOUND"; - } - } - else - { #Was reported by igBLAST - print "D: Found the FWR3 from the Domain Boundary Table\n"; - $DomainBoundaries{"CDR3"}{"FoundHow"} = "IGBLAST_NATIVE"; - } - -#print Dumper %DomainBoundaries; - -=head2 Get the top VDJ regions: - -=cut - -=head2 Extract General Features: - -=cut - (my $TopHit) = $_ =~ m/V-J Frame, Strand\):\n(.*?)\n/s; - print "D: Top Hits (raw)= '$TopHit' \n"; - my ($Top_V_gene_match, $Top_D_gene_match, $Top_J_gene_match, $Chain, $VJFrame, $Strand) = split (/\t/,$TopHit); - print "D: Top Hits (parsed)= '$Top_V_gene_match, $Top_D_gene_match, $Top_J_gene_match, $Chain, $VJFrame, $Strand'\n"; - -=head2 Store the V / D / J Genes used - -=cut - - if (defined $Top_V_gene_match && $Top_V_gene_match ne "") - { $OUTPUT_Data{"Top V Gene"} = $Top_V_gene_match; } - - if (defined $Top_D_gene_match && $Top_D_gene_match ne "") - { $OUTPUT_Data{"Top D Gene"} = $Top_D_gene_match; } - - if (defined $Top_J_gene_match && $Top_J_gene_match ne "") - { $OUTPUT_Data{"Top J Gene"} = $Top_J_gene_match; } - - if (defined $Strand && $Strand ne "") - { $OUTPUT_Data{"Strand"} = $Strand;} - -=head4 Preamble: ID, Frame, and V / D / J used: - -=cut - #Do a reality check: if we didn't get an ID, then skip: - unless (defined (defined $ID) && $ID ne "" && - defined $VJFrame && $VJFrame ne "") - { - print &printOUTPUTData (\%OUTPUT_Data)."\n"; - next; - } - -#Ok, so we have data...most likely: - #print "OUTPUT:\t",join ("\t", $ID, $VJFrame, $Top_V_gene_match, $Top_D_gene_match, $Top_J_gene_match); - - if (defined $VJFrame && defined $ID && $VJFrame ne "" && $ID ne "") - { $OUTPUT_Data{"VDJ Frame"} = $VJFrame;} - else - { - print &printOUTPUTData (\%OUTPUT_Data)."\n"; - next; - }#REALLY? We didn't find anything? Oh well, move to next record - -=head4 CDR1 - -=cut - #Remember that the alignment starts at the FWR1 start, not nt =0 on the read, hence we substract this off all future AA (& DNA coordinates) - - my $AlignmentOffset = $DomainBoundaries{"FWR1"}{"Start"}; - -# print "D: AA Seqeunce is: '$AAQuerySequence'\n"; - if (exists $DomainBoundaries{"CDR1"}{"Start"}) #It is very possible that it doesn't; assume the End does though if we find the Start - { -# my $VRegion = $Alginments{"V"}{$C_VRegion}; #Convenience.... - my $CDR1Start = $DomainBoundaries{"CDR1"}{"Start"}; - my $CDR1End = $DomainBoundaries{"CDR1"}{"End"}; - my $CDR1_Length = $CDR1End - $CDR1Start; -# print "D: CDR1 $CDR1Start $CDR1End = $CDR1_Length\n"; - #Remember that the alignment starts at the FWR1 start, not nt =0 on the read - my $CDR1_Seq_AA = substr ($AAQuerySequence, $CDR1Start - $AlignmentOffset, $CDR1_Length); -# print "D: '$CDR1_Seq_AA'\n"; - $CDR1_Seq_AA =~ s/ //g; - my $CDR1_Seq_AA_Length = length ($CDR1_Seq_AA); - #Add this data to the output store specifically: - $OUTPUT_Data{"CDR1 Seq"} = $CDR1_Seq_AA; - $OUTPUT_Data{"CDR1 Length"} = $CDR1_Length; - } - #What happens if there is no CDR1 found? Leave blank - the output routine can handle this - -=head4 CDR2 - -=cut - - if (exists $DomainBoundaries{"CDR2"}{"Start"}) #It is very possible that it doesn't; assume the End does though if we find the Start - { -# my $VRegion = $Alginments{"V"}{$C_VRegion}; #Convenience.... - my $CDR2Start = $DomainBoundaries{"CDR2"}{"Start"}; - my $CDR2End = $DomainBoundaries{"CDR2"}{"End"}; - my $CDR2_Length = $CDR2End - $CDR2Start; - my $CDR2_Seq_AA = substr ($AAQuerySequence, $CDR2Start - $AlignmentOffset , $CDR2_Length); - $CDR2_Seq_AA =~ s/ //g; - my $CDR2_Seq_AA_Length = length ($CDR2_Seq_AA); - #Add this data to the output store specifically: - $OUTPUT_Data{"CDR2 Seq"} = $CDR2_Seq_AA; - $OUTPUT_Data{"CDR2 Length"} = $CDR2_Length; - } - #What happens if there is no CDR2 found? Leave blank - the output routine can handle this. - -=head4 CDR3 - -=cut - if (exists $DomainBoundaries{"CDR3"}{"Start"}) #It is very possible that it doesn't; assume the End does though if we find the Start - { -# my $VRegion = $Alginments{"V"}{$C_VRegion}; #Convenience.... - my $CDR3Start = $DomainBoundaries{"CDR3"}{"Start"}; - my $CDR3End = $DomainBoundaries{"CDR3"}{"End"}; - my $CDR3_Length = $CDR3End - $CDR3Start; # This variable isn't used - delete it when safe to do so - my $CDR3_Seq_AA = substr ($AAQuerySequence, $CDR3Start - $AlignmentOffset, $CDR3_Length); - my $CDR3_Seq_DNA = substr ($DNAQuerySequence, $CDR3Start - $AlignmentOffset, $CDR3_Length); - $CDR3_Seq_AA =~ s/ //g; - $CDR3_Seq_DNA =~ s/ //g; - my $CDR3_Seq_AA_Length = length ($CDR3_Seq_AA); - my $CDR3_Seq_DNA_Length = length ($CDR3_Seq_DNA); - #Add this data to the output store specifically: - $OUTPUT_Data{"CDR3 Seq"} = $CDR3_Seq_AA; - $OUTPUT_Data{"CDR3 Length"} = $CDR3_Seq_AA_Length; - $OUTPUT_Data{"CDR3 Seq DNA"} = $CDR3_Seq_DNA; - $OUTPUT_Data{"CDR3 Length DNA"} = $CDR3_Seq_DNA_Length; - #And in the case of the CDR3 how we found it: - $OUTPUT_Data{"CDR3 Found How"} = $DomainBoundaries{"CDR3"}{"FoundHow"}; - } - #What happens if there is no CDR3 found? Leave blank - the output routine can handle this. -#die "HIT BLOCK\n"; -#End of the record; output the data we have collected and move on. -print &printOUTPUTData (\%OUTPUT_Data)."\n"; -} - - - -############ -sub returnLinesBetween { -=head3 SUB: returnLinesBetween ({reference to array Index array}, {regex for top of section}, {regex for bottom of section}) - -When passed a reference to an array and two strings - interpreted as REGEX's - will return the lines of the Array -that are bounded by these tags. - -If either of the tags are not found - or are found in the wrong order - then a null list is returned. - -=cut - -my ($Text_ref, $TopTag, $BotTag) = @_; - -my @Table; -#The two boundary conditions at which we will cut the table: -#print "D: [returnLinesBetween]: '$TopTag, $BotTag'\n"; -#How we record these: -my $AlignmentLine_Top=0; my $AlignmentLine_Bot=0; - -my $LineIndex=-1; #-1 As the loop increments this line counter first, then does its checks. -#If you care: -#print "D: Lines of text passed: $$#Lines\n"; - -#Iterate through until we find what we are looking for or run out of text to search: -while (($AlignmentLine_Bot ==0 or $AlignmentLine_Top==0) && $LineIndex <=$#{$Text_ref}) - { - $LineIndex++; - #Enable if you need to care: -# print "D: Line Index = $LineIndex\n"; - - if ($$Text_ref[$LineIndex] =~ m/$TopTag/) - { - $AlignmentLine_Top = $LineIndex; -# print "D: [returnLinesBetween]: TopTag found in Line: '$$Text_ref[$LineIndex]'\n"; #Enable if you are interested - } - if ($$Text_ref[$LineIndex] =~ m/$BotTag/) - { - $AlignmentLine_Bot = $LineIndex; -# print "D: [returnLinesBetween]: Bottom Tag found in Line: '$$Text_ref[$LineIndex]'\n"; #Enable if you are interested - } - } -#Reality check: did we find anything? If not then we return null. -if ($AlignmentLine_Top ==0 && $AlignmentLine_Bot ==0) - { return; } -#Again, enable if you care: -#print "D: [returnLinesBetween] Lines for section table: '$AlignmentLine_Top to $AlignmentLine_Bot'\n"; - -#We want the lines one down and one up - so polish these. -$AlignmentLine_Top++; $AlignmentLine_Bot--; - -#Return as an array slice: -return (@$Text_ref[$AlignmentLine_Top .. $AlignmentLine_Bot]); -} -############ - -sub findSplitPoint -{ -=head2 sub: $PanelBoundaryCahracter = findSplitPoint (\@Table) - -When passed a table with the alignment in it makes an educated guess as to the precise split point to -spearate the 'info' and 'alignment' panels. -This is a right olde faff because the field / panel boundaries change. - - ' Query_6 167 GAGGTGCAGTTGTTGGAGTCTGGGGGAGGCTTGGCACAGCC-GGGGGGTCCCTGAGACTCTCCTGTGCAG 235' - ' Query_6 236 CCTCTGGATTCACCTTTGACAAATATGCCATGACCTGGGTCCGCCAGGCTCCAGGGAAGGGTCTGGAGTG 305' - ' Query_6 306 GGTCTCAACTATACTTGCCAGTGGTCG---CACAGACGACGCAGACTCCGTGAAGGGCCGGTTTGCCATC 372' - ' Query_6 373 TCCAGAGACAATTCCAAGAACACTCTGTATCTGCAAATGAACAGCCTGAGAGTCGAGGACACGGCCCTTT 442' - ' Query_6 443 ATTACTGTGCGAGTGAGGGGGACATAGTGGCTTCGGAGCTTTTGAGTACTGGGGCCAGGGAAACCTGGTC 512' -MOTIF_FOUND_IN_AA -i.e to contain just ATGC + "X" bases & the gap "-" character but not the "." character (found in the alingment proper) and have 4 fields in total - -Returns either -1 or the location of the panel boundary, issues a warning and returns -1 if is the most frequent boundary -because the pattern match has been failing more often that it suceeded. - -=cut -#A rough guess is 38 for normal sequences, 48 for reversed ones: - -my $SplitPos = 0; - -(my $Table_ref) = @_; #Get the reference to the table -my @DNALines; #We populate this for mining in the next section -foreach my $C_Line (@{$Table_ref}) - { - #print "D: $C_Line\n"; -# (my $SplitLine) = $C_Line; - #Split on consecutive tabs or spaces: - my @LineFields = split (/[\t\s]+/,$C_Line); - #print "D: Split Line: '",join (",",@LineFields),"' : $#LineFields\n"; - unless ( $LineFields[3] =~ m/[^\.]/ - && $LineFields[3] =~ m/[ATGCX]{20,}/ - && $#LineFields==4) - { next; } -#Enable if you want to know the lines we think are the DNA Query strings: - #print "D: DNA Line: '$C_Line'\n"; - push @DNALines, $C_Line; #Note it down - } - -my %PanelBounds; #Will contain the positions of the panel boundaries - -foreach my $C_DNALine (@DNALines) - { - #print "D: '$C_DNALine'\n"; - $C_DNALine =~ m/[ATGC-]+ \d+$/; #Match the DNA string and the indexingMOTIF_FOUND_IN_AA numbers afterwards, allow gap characters. - my $MatchPos = $-[0]; #This is the position of the start of the last match because we can't get the index() function to work - #(my $MatchPos) = index ($C_DNALine, / [ATGCX-]{20}/,0); - #print "D: '$C_DNALine' DNA panel starts at:'$MatchPos'\n"; - $PanelBounds{$MatchPos}++; - } -#Sort the hash values in order and then return the most frequent (will offer some resistance to the occasion pattern failure) -#The brackets around "($SplitPos)" are really necessary it seems. -($SplitPos) = (sort { $a <=> $b } keys %PanelBounds); -#If you want -#print Dumper %PanelBounds; -#Tell people if we are having difficultlty: -if ($SplitPos == -1) { warn "Couldn't identify the panel boundaries\n"; } -#print "D: $SplitPos: Returning the split position of: '$SplitPos'\n"; -return $SplitPos; -} - - -## -# -# -### - - - - - -##### -# -# -##### -sub markUpCDR3 -{ -=head3 Sub: (Start, End, Found How) = markUpCDR3 (DNASeq, AASeq, FWR3 End, FWR1 Offset, DNA Regex, AA Regex) - -Tries to identify the end of the CDR3 using the DNA and RNA Sequence patterns MOTIF_FOUND_IN_AAsupplied. The CDR3 is assumed to start -at the end of the FWR3. -To reduce FP matches only the sequences (DNA & AA) after the FWR3 are tested with the pattern. -The position of the first matching pattern is reported. - -=head4 Fuller Usage: - -my ($CDR3_Start, my $CDR3_End) = markUpCDR3 ($DNAQuerySequence, $AAQuerySequence, - $DomainBoundaries{"FWR3"}{"End"}, $DomainBoundaries{"FWR1"}{"Start"}, - $DNACDR3_Pat, $AACDR3_Pat); - - - -=head4 Returned Values - -If the CDR3 was found then we we signal like this: - - $MotifFound ==0 : Nope, didn't find either motif - $MotifFound ==1 : Found at the DNA level, not the AA level - $MotifFound ==2 : Found at the the AA level, not the DNA level - $MotifFound ==3 : Found at the the AA level & the DNA level - -(Also remember that if the FWR3 region couldn't be identified in the sequence there is a 4th option: not tested; this routine isn't called therefore) - -The Start and Ends returned are from the first sucessful match (MotifFound==3): though hopefully they are the same. -Formally the test order is: - - 1) DNA - 2) AA - -i.e. DNA bp locations have priority. - -Technically the locations are determined by a regex match then the $+[0] array (i.e. the end of the pattern match). -See pages like this: http://stackoverflow.com/questions/87380/how-can-i-find-the-location-of-a-regex-match-in-perl for an explanation. - -=head3 Manipulation of AA patternsMOTIF_FOUND_IN_AA - -Note that patterns are assumed to require white space inserting in them between the letters. -This could be a serious limitation - - -=cut - -#Get the parameters passed: -my ($DNA, $AA, $FWR3_End, $FWR1_Start, $DNAPat, $AAPat) = @_; -print "D: markUpCDR3: Passed Parameters '$FWR3_End, $FWR1_Start, $DNAPat, $AAPat' (& AA & DNA sequence)\n"; - - -#Setup our return values: -my $Start = 0; my $End =0; my $MotifFound = 0; -my $How; #Literally How the motif was found (or not if blank) - - -=head4 Prepare the sequences and the patterns for use - -Specifically: trim off the start of the AA & DNA string already allocated to other CDRs or FWRs - -Add in spaces into the AA regex pattern because we can't get regex-ex freespacing mode i.e. "$Var =~ m/$AAPat/x" working. - - -We take the "-1" as the CropPoint position to include the previous 3 nucleotides / AAs; remember to add this back on -in position calculations. - - -=cut - -#Because igBLAST doesn't always report from the start of the read (primers and things are upstream): - -my $CropPoint = $FWR3_End - $FWR1_Start - 1 ; -#print "D: markUpCDR3: Crop point is: '$CropPoint'\n"; - -#print "D: markUpCDR3: Cropping point is: '$CropPoint' characters from start\n"; -#We trim off the parts we expect to find the CDR3 motifs in leaving at extra 3nts on to allow for base miss-calling: - -my $AA_Trimmed = substr ($AA, $CropPoint); -my $DNA_Trimmed = substr ($DNA ,$CropPoint); -#print "D: markUpCDR3: AA = '$AA' (untrimmed)\nD: markUpCDR3: TR = '$AA_Trimmed' (Trimmed) ", length ($AA_Trimmed)," nts long\n"; -#print "D: markUpCDR3: Testing: AA = '$AA_Trimmed', DNA = '$DNA_Trimmed'\n"; - -#This lovely hack is to account for the spaces in the AA sequence and we can't get the "$Var =~ m/$AAPat/x" working -my $AAPat_Spaced; -foreach my $C_Char (0..length($AAPat)-1) #The -1 is because we don't want trailing spaces until the next nt -> AA translation. - { $AAPat_Spaced = $AAPat_Spaced.'\s+'.substr ($AAPat,$C_Char,1); } -#And write this back into the main pattern we were passed: -$AAPat = $AAPat_Spaced; - -#temp hack: -#$AA_Trimmed = $AA; -my $MotifFound=0; #So we can record which patterns we found -my $MotifPositionDNA =-1; -my $MotifPositionAA =-1; - -#print "D: markUpCDR3: Pattern: '$AAPat_Spaced'\n"; -=head4 At DNA level: "TGG GGx xxx GGx" [+1] - -=cut - -#print "D: markUpCDR3: '$DNA_Trimmed' (Trimmed DNA string)\n"; - -if ($DNA_Trimmed =~ m/$DNAPat/) - { - $MotifPositionDNA = $+[0]; #Just the easiest way to do this in Perl -# print "D: markUpCDR3:: Found Motif match on DNA at bp: '$MotifPositionDNA'\n"; - $MotifFound = $MotifFound + 1; - #Any more matches further on? - my $LaterString = substr ($DNA_Trimmed, $MotifPositionDNA); -# print "D: markUpCDR3: '$AA_Trimmed' (AA Trimmed string)\n"; -# print "D: markUpCDR3: '", substr ($DNA_Trimmed,0, $MotifPositionDNA)," (DNA until pattern match string)\n"; -# print "D: markUpCDR3: '$DNA_Trimmed' (Trimmed DNA string)\n"; -# print "D: markUpCDR3: '$LaterString' (Later part of DNA string)\n"; - if ($LaterString =~ m/$DNAPat/) - { print "D: markUPCDR3: Also got a match further down the DNA String: at ", $-[0] ," to ", $+ [0], " - which might be worrying\n"; } - } - -=head4 At AA level: "WGxG" [+2] - -=cut - -if ($AA_Trimmed=~ m/$AAPat/) - { - $MotifPositionAA = $+[0]; #Just the easiest way to do this in Perl - $MotifFound = $MotifFound + 2; -# print "D: markUpCDR3: Found Motif match on AA at position (on DNA remember): '$MotifPositionAA' (ie.)\n"; - (my $CDR3_seq) = substr ($AA_Trimmed, 0, $MotifPositionAA); -# print "D: markUpCDR3: Seq ='$CDR3_seq' - as detected\n"; - - } - -=head4 Assess the results of motif position finding - -=cut - -#print "D: markUpCDR3: MotifFound = '$MotifFound'\n"; - -if ($MotifFound ==0) - { return ($Start, $End, $MotifFound); } #The easy one really: return we didn't find the CDR3 - -# -$Start = $FWR3_End; #We assume the end of the FWR3 is the start of CDR3: -#Just found in DNA: -if ($MotifFound ==1) - { - $Start = $FWR3_End; #We assume the end of the FWR3 is the start of CDR3: - $End = $MotifPositionDNA; - $How = "MOTIF_FOUND_IN_DNA"; - } -#Just found in AA: -if ($MotifFound ==2) - { - $End = $MotifPositionAA; - $How = "MOTIF_FOUND_IN_AA"; - } - -#Found in both, DNA has priority: -if ($MotifFound ==3) - { - $Start = $FWR3_End ; #We assume the end of the FWR3 is the start of CDR3: - $End = $MotifPositionDNA; - $How = "MOTIF_FOUND_IN_BOTH"; - } - -#print "D: markUpCDR3: Motif found = $MotifFound\n"; - -=head4 These next few lines are for testing / diagnostics only - disable for general use - -If you are interested in getting the CDR3 directly then remember the main coordinate system is defined such that -the start of FWR1 is unlikely to be at nt 1. - -=cut - -$Start = $FWR3_End - $FWR1_Start -1; -$End = $End + $CropPoint; -my $CDR3_RegionLength = $End - $Start; -#print "D: markUpCDR3: CDR3 Length= $Start - $End = '$CDR3_RegionLength'\n"; -(my $CDR3_seq) = substr ($AA, $Start, $CDR3_RegionLength); - -#Add onto the coordinates what we trimmed off: - - -#print "D: markUpCDR3: Seq ='$CDR3_seq'\n"; - -print "D: markUpCDR3: returning: $Start, $End, $How, ($MotifFound) [NB: offset of :'+ $FWR1_Start'\n"; -#die "HIT BLOCK\n"; -return ($Start + $FWR1_Start, $End + $FWR1_Start, $How); -} - - -sub printOUTPUTData { -=head2 sub: $OutputDataString = printOUTPUTData {\%OutputData} - -When passed an array containing the appropriate CDR, Top V / D/ J genes and the seqeunce ID. -This prepared and then returned as a text string that can then be printed to STDOUT: - - print (printOUTPUTData (\%OutputData)); - -Any missing data in the Hash array it polietly ignored and a null string printed in place. -The text field is tab delimited; there are no extra trailing tabs or carriage returns in place. - -Actually the fields printed out are stored in an index array. - -=head3 Header output - -If the routine is passed a key 'HEADER' then the header columns are returned as that string. -This is tested first - so don't add this unless you mean to. - -=cut - -my @HeaderFields = ("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", - "CDR1 Seq", "CDR1 Length", - "CDR2 Seq", "CDR2 Length", - "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", - "CDR3 Found How"); - -my $OutputString = "OUTPUT:"; #What we are going to build the output into. - -=head4 Print Header & Exit? - -=cut - -my ($Data_ref) = @_; -#print "D: printOUTPUTData: Running\n"; - -if (exists $$Data_ref {"HEADER"}) - { - $OutputString .= "\t"; - for(my $n = 0; $n <= $#HeaderFields; $n++) - { - $OutputString .= $HeaderFields[$n]; - $OutputString .= "\t" if($n < $#HeaderFields); - } - - # foreach my $C_Header (@HeaderFields) - # { $OutputString .= "$C_Header"; } # - - print "D: printOUTPUTData: HEADER Printout requested '@HeaderFields'\n"; - return ($OutputString); - } - -=head3 Assemble whatever data we have - and tab delimit the null fields - -=cut -#print "D: printOUTPUTData: Will pretty print this:\n", Dumper $Data_ref; -foreach my $C_Header (@HeaderFields) - { - - if (exists ($$Data_ref {$C_Header})) - { $OutputString .= "\t". $$Data_ref{$C_Header}; } #We have data to print out - else - { $OutputString .="\t"; } #Add a trailing space - } # - -return ($OutputString); -} - - -######################################### Code Junk ######################## - - -=head2 Code Junk Attic - -=head3 Demonstrates how to reverse translate an amino acid sequence into DNA: - -use Bio::Tools::CodonTable; -use Bio::Seq; - -# print possible codon tables - my $tables = Bio::Tools::CodonTable->tables; - while ( (my $id, my $name) = each %{$tables} ) { - print "$id = $name\n"; - } - my $CodonTable = Bio::Tools::CodonTable->new(); - - my $ExampleSeq = Bio::PrimarySeq->new(-seq=>"WGxG", -alphabet => 'protein') or die "Cannot create sequence object\n"; - - -my $rvSeq = $CodonTable->reverse_translate_all($ExampleSeq); -print "D: '$rvSeq'\n"; -die "TEST OVER\n"; - -=cut - - -=head3 For processing the 'Alignment lines' section of the alginment table - - #If we are ever interested; then enable the code below: -# print ": Alignment\n"; -# $InfoPanel =~ s/^ +//; $InfoPanel =~ s/ +$//; #Clean off trailing spaces -# my ($Germclass, $PID, $PID_Counts, $Allele) = split (/\s+/,$InfoPanel); #Split on spaces -##Enable if you need to know what we just found: -# #print "D: Fields are (Germclass, PID, PID_Counts, Allele) \t$Germclass, $PID, $PID_Counts, $Allele\n"; -# #A reality check: we should have an Allele - or some text here. -# unless (defined $Allele && $Allele ne "") -# { warn "Cannot get Allele for Line '$C_Line' - implies improper parsing: '",substr ($Lines[$C_Line],0,15),"...'\n"; } -# if (exists ($Alginments {$Germclass}{$Allele})) -# { $Alginments {$Germclass}{$Allele} = $Alginments {$Germclass}{$Allele}.$CurrentAASequence; } #Carry on adding -# else #more work needed as we need to 'pad' the sequence with fake gap characters) -# { -##Do we still need this padding? I don't think so -# -# -# my $PaddingChars = ($ThisQueryStart-$Query_Start); -# print "D: New gene found: need to pad it with ($ThisQueryStart-$Query_Start) i.e. '$PaddingChars' characters\n"; -# #To help testing, calculate this first: -# my $PaddingString = " "x $PaddingChars; -# $Alginments {$Germclass}{$Allele} = $CurrentAASequence; -# } -# next - -=head3 Demonstration of Pattern match positions - -my $Text = "12345TTT TTAAAAA"; -my $TestPat = "TTT\\s+TT"; -(my $Result)= $Text =~ m/$TestPat/; -print "D: Two vars are: - = ",$-[0], " & + =", $+[0]," for test pattern '$TestPat'\n"; - -sub printCDR3 { - -=head3 Subroutine: printCDR3 ($CDR3_Start, $CDR3_End, "SUMMARY_TABLE", $AAQuerySequence, $DNAQuerySequence); - -???? IS THIS FUNCTION IN USE ????? - -Handles the printing of the output when passed information about the CDR3 region. - - -The result is sent returned as a text string in this version hence use it like this if you want to send it to STDOUT: - - print printCDR3 ($CDR3_Start, $CDR3_End, "SUMMARY_TABLE", $AAQuerySequence, $DNAQuerySequence), "\n"; - -#=cut - -#Despite the similarity in names, these are all local copies passed to us: - -my ($Start, $End, $Tag, $FullAAQuerySequence, $FullDNAQuerySequence) = @_; - -#For DNA: -my ($CDR_DNA_Seq) = substr ($FullDNAQuerySequence, $Start, $Start+$End); -my ($CDR_DNA_Length) = length ($CDR_DNA_Seq); - -#For AA: -my ($CDR_AA_Seq) = substr ($FullAAQuerySequence, $Start, $Start+$End); -my ($CDR_AA_Length) = length ($CDR_AA_Seq); - -my $ReturnString = join ("\t", $CDR_DNA_Seq, $CDR_DNA_Length, $CDR_AA_Seq, $CDR_AA_Length, $Tag); #Create here so we can inspect it / post process it if needed: -print "D: SUB: printCDR3: As returned: '$ReturnString'\n"; -return ($ReturnString); - -} - -=cut - - - -=head2 Change Log - -=head3 Version 1.2 - - 1) Fixed the 'Process recrod request' feature' [was failed increment in $Record] - 2) Deleted / Deactivated the function 'printCDR3' [wasn't in used; kept if useful for parts]. - This function is replaced by the more general printOUTPUTData() - 3) A tag for the CDR3 status is now output for every record / read. - Initially this is set to "NOT_FOUND" and changed if evidence for the CDR3 is found. - -=head4 Version 1.3 - - 1) The tophit line was split on whitespace, however sometimes the VJFrame is something like “In-frame with stop codon”, - which means the line is also split on the spaces therein. It now splits on tabs only, and this seems to work properly. - - found by Bas Horsman. - -=head4 Version 1.3a - - 1) "MOTIF_FOUND_IN_AA" reported correctly (was impossible previously due to addition error to the $MotifFound var (never could == 3) - -=cut - -=head4 Version 1.4 - - 1) Now processes files using Mac/Unix/MS-DOS newline characters: - - $_ =~ s/\r\n/\n/g; #In case line ends are MS-DOS - $_ =~ s/\r/\n/g; #In case line ends are Mac - #The whole record - one per read - is now stored in $_ - my @Lines =split (/\R/,$_); #Split on new lines - -=head4 Version 1.4a - -1) Fixed the length of the CDR3 AA string being reported correctly: - - $OUTPUT_Data{"CDR3 Length"} = $CDR3_Length; - to: - $OUTPUT_Data{"CDR3 Length"} = $CDR3_Seq_AA_Length; - \ No newline at end of file diff -r beaa487ecf43 -r 5ffd52fc35c4 igparse.xml --- a/igparse.xml Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ - - - - igblastparser/igparse.pl $input 0 2>/dev/null | grep -v "D:" | cut -f2- > $output - - - - - - - - - Step 2 of the Immune Repertoire tools, extracts the relevant information needed from the reports generated by igblast (Step 1) - - diff -r beaa487ecf43 -r 5ffd52fc35c4 imgt_loader.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/imgt_loader.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,82 @@ +args <- commandArgs(trailingOnly = TRUE) + +summ.file = args[1] +aa.file = args[2] +junction.file = args[3] +out.file = args[4] + +summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T) +aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T) +junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T) + +old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation') +old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT') +old_junction_columns=c('JUNCTION') + +added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence') +added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT') + +added_junction_columns=c('P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION') +added_junction_columns=c(added_junction_columns, 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') + +out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")] + +out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"] +out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"] + +out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"] +out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"] + +out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"] +out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"] + +out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"] +out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"])) +out[,"Strand"] = summ[,"Orientation"] +out[,"CDR3.Found.How"] = "a" + +out[,added_summary_columns] = summ[,added_summary_columns] + +out[,added_sequence_columns] = aa[,added_sequence_columns] + +out[,added_junction_columns] = junction[,added_junction_columns] + +out[,"Top V Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"])) +out[,"Top D Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"])) +out[,"Top J Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"])) + +out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')] + +names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') + +out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"]) + +fltr = out[,"VDJ Frame"] == "in-frame" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "In-frame" +} + +fltr = out[,"VDJ Frame"] == "null" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +fltr = out[,"VDJ Frame"] == "out-of-frame" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +fltr = out[,"VDJ Frame"] == "" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +for(col in c('Top V Gene','Top D Gene','Top J Gene')){ + out[,col] = as.character(out[,col]) + fltr = out[,col] == "" + if(any(fltr)){ + out[fltr,col] = "NA" + } +} + +write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 imgt_loader.xml --- a/imgt_loader.xml Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ - - - - imgt_loader/imgt_loader.sh $in_file $out_file "tmp" - - - - - - - - -**INPUT** - -This tool accepts an IMGT/HIGHV-QUEST ZIP file - -**OUTPUT** - -The following data is used for ARGalaxy - -+-----------------+----------------------------------------------+ -| Column name | Column contents | -+-----------------+----------------------------------------------+ -| ID | The Sequence ID provided by the sequencer. | -+-----------------+----------------------------------------------+ -| VDJ Frame | In-frame/Out-frame | -+-----------------+----------------------------------------------+ -| Top V Gene | The best matching V gene found. | -+-----------------+----------------------------------------------+ -| Top D Gene | The best matching D gene found. | -+-----------------+----------------------------------------------+ -| Top J Gene | The best matching J gene found. | -+-----------------+----------------------------------------------+ -| CDR3 Seq | The CDR3 region. | -+-----------------+----------------------------------------------+ -| CDR3 Length | The length of the CDR3 region. | -+-----------------+----------------------------------------------+ -| CDR3 Seq DNA | The CDR3 sequence region. | -+-----------------+----------------------------------------------+ -| CDR3 Length DNA | The length of the CDR3 sequence region. | -+-----------------+----------------------------------------------+ -| Functionality | If sequence is productive/unproductive | -+-----------------+----------------------------------------------+ - - - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 imgt_loader/imgt_loader.py --- a/imgt_loader/imgt_loader.py Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -import pandas as pd -try: - pd.options.mode.chained_assignment = None # default='warn' -except: - pass -import re -import argparse -import os - -def stop_err( msg, ret=1 ): - sys.stderr.write( msg ) - sys.exit( ret ) - -#docs.python.org/dev/library/argparse.html -parser = argparse.ArgumentParser() -parser.add_argument("--summ", help="The 1_Summary file from the imgt output") -parser.add_argument("--aa", help="The 5_AA-Sequence file from the imgt output") -parser.add_argument("--junction", help="The 6_Junction file from the imgt output") -parser.add_argument("--output", help="Output file") - -args = parser.parse_args() - -old_summary_columns = [u'Sequence ID', u'JUNCTION frame', u'V-GENE and allele', u'D-GENE and allele', u'J-GENE and allele', u'CDR1-IMGT length', u'CDR2-IMGT length', u'CDR3-IMGT length', u'Orientation'] -old_sequence_columns = [u'CDR1-IMGT', u'CDR2-IMGT', u'CDR3-IMGT'] -old_junction_columns = [u'JUNCTION'] - -added_summary_columns = [u'Functionality', u'V-REGION identity %', u'V-REGION identity nt', u'D-REGION reading frame', u'AA JUNCTION', u'Functionality comment', u'Sequence'] -added_sequence_columns = [u'FR1-IMGT', u'FR2-IMGT', u'FR3-IMGT', u'CDR3-IMGT', u'JUNCTION', u'J-REGION', u'FR4-IMGT'] -added_junction_columns = [u"P3'V-nt nb", u'N-REGION-nt nb', u'N1-REGION-nt nb', u"P5'D-nt nb", u"P3'D-nt nb", u'N2-REGION-nt nb', u"P5'J-nt nb", u"3'V-REGION trimmed-nt nb", - u"5'D-REGION trimmed-nt nb", u"3'D-REGION trimmed-nt nb", u"5'J-REGION trimmed-nt nb", u"N-REGION", u"N1-REGION", u"N2-REGION"] - -outFile = args.output - -#fSummary = pd.read_csv(triplets[0][0], sep="\t", low_memory=False) -fSummary = pd.read_csv(args.summ, sep="\t", dtype=object) -#fSequence = pd.read_csv(triplets[0][1], sep="\t", low_memory=False) -fSequence = pd.read_csv(args.aa, sep="\t", dtype=object) -#fJunction = pd.read_csv(triplets[0][2], sep="\t", low_memory=False) -fJunction = pd.read_csv(args.junction, sep="\t", dtype=object) -tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]] - -tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"] -tmp["CDR1 Length"] = fSummary["CDR1-IMGT length"] - -tmp["CDR2 Seq"] = fSequence["CDR2-IMGT"] -tmp["CDR2 Length"] = fSummary["CDR2-IMGT length"] - -tmp["CDR3 Seq"] = fSequence["CDR3-IMGT"] -tmp["CDR3 Length"] = fSummary["CDR3-IMGT length"] - -tmp["CDR3 Seq DNA"] = fJunction["JUNCTION"] -tmp["CDR3 Length DNA"] = '1' -tmp["Strand"] = fSummary["Orientation"] -tmp["CDR3 Found How"] = 'a' - -for col in added_summary_columns: - tmp[col] = fSummary[col] - -for col in added_sequence_columns: - tmp[col] = fSequence[col] - -for col in added_junction_columns: - tmp[col] = fJunction[col] - -outFrame = tmp - -outFrame.columns = [u'ID', u'VDJ Frame', u'Top V Gene', u'Top D Gene', u'Top J Gene', u'CDR1 Seq', u'CDR1 Length', u'CDR2 Seq', u'CDR2 Length', u'CDR3 Seq', u'CDR3 Length', - u'CDR3 Seq DNA', u'CDR3 Length DNA', u'Strand', u'CDR3 Found How', u'Functionality', 'V-REGION identity %', 'V-REGION identity nt', 'D-REGION reading frame', - 'AA JUNCTION', 'Functionality comment', 'Sequence', 'FR1-IMGT', 'FR2-IMGT', 'FR3-IMGT', 'CDR3-IMGT', 'JUNCTION', 'J-REGION', 'FR4-IMGT', 'P3V-nt nb', - 'N-REGION-nt nb', 'N1-REGION-nt nb', 'P5D-nt nb', 'P3D-nt nb', 'N2-REGION-nt nb', 'P5J-nt nb', '3V-REGION trimmed-nt nb', '5D-REGION trimmed-nt nb', '3D-REGION trimmed-nt nb', - '5J-REGION trimmed-nt nb', "N-REGION", "N1-REGION", "N2-REGION"] - -""" -IGHV[0-9]-[0-9ab]+-?[0-9]?D? -TRBV[0-9]{1,2}-?[0-9]?-?[123]? -IGKV[0-3]D?-[0-9]{1,2} -IGLV[0-9]-[0-9]{1,2} -TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])? -TRGV[234589] -TRDV[1-3] - -IGHD[0-9]-[0-9ab]+ -TRBD[12] -TRDD[1-3] - -IGHJ[1-6] -TRBJ[12]-[1-7] -IGKJ[1-5] -IGLJ[12367] -TRAJ[0-9]{1,2} -TRGJP?[12] -TRDJ[1-4] -""" - -vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?)", - r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)", - r"(IGKV[0-3]D?-[0-9]{1,2})", - r"(IGLV[0-9]-[0-9]{1,2})", - r"(TRAV[0-9]{1,2}(-[1-46])?(/DV[45678])?)", - r"(TRGV[234589])", - r"(TRDV[1-3])", - r"(IGHV[0-9]S[0-9]+)"] - -dPattern = [r"(IGHD[0-9]-[0-9ab]+)", - r"(TRBD[12])", - r"(TRDD[1-3])"] - -jPattern = [r"(IGHJ[1-6])", - r"(TRBJ[12]-[1-7])", - r"(IGKJ[1-5])", - r"(IGLJ[12367])", - r"(TRAJ[0-9]{1,2})", - r"(TRGJP?[12])", - r"(TRDJ[1-4])"] - -vPattern = re.compile(r"|".join(vPattern)) - -dPattern = re.compile(r"|".join(dPattern)) - -jPattern = re.compile(r"|".join(jPattern)) - - -def filterGenes(s, pattern): - if type(s) is not str: - return "NA" - res = pattern.search(s) - if res: - return res.group(0) - return "NA" - - - -outFrame["Top V Gene"] = outFrame["Top V Gene"].apply(lambda x: filterGenes(x, vPattern)) -outFrame["Top D Gene"] = outFrame["Top D Gene"].apply(lambda x: filterGenes(x, dPattern)) -outFrame["Top J Gene"] = outFrame["Top J Gene"].apply(lambda x: filterGenes(x, jPattern)) - - -tmp = outFrame["VDJ Frame"] -tmp = tmp.replace("in-frame", "In-frame") -tmp = tmp.replace("null", "Out-of-frame") -tmp = tmp.replace("out-of-frame", "Out-of-frame") -outFrame["VDJ Frame"] = tmp -outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) -safeLength = lambda x: len(x) if type(x) == str else 0 -#outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? -#outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? -outFrame.to_csv(outFile, sep="\t", index=False, index_label="index") diff -r beaa487ecf43 -r 5ffd52fc35c4 imgt_loader/imgt_loader.r --- a/imgt_loader/imgt_loader.r Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -args <- commandArgs(trailingOnly = TRUE) - -summ.file = args[1] -aa.file = args[2] -junction.file = args[3] -out.file = args[4] - -summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T) -aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T) -junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T) - -old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation') -old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT') -old_junction_columns=c('JUNCTION') - -added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence') -added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT') - -added_junction_columns=c('P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION') -added_junction_columns=c(added_junction_columns, 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') - -out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")] - -out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"] -out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"] - -out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"] -out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"] - -out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"] -out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"] - -out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"] -out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"])) -out[,"Strand"] = summ[,"Orientation"] -out[,"CDR3.Found.How"] = "a" - -out[,added_summary_columns] = summ[,added_summary_columns] - -out[,added_sequence_columns] = aa[,added_sequence_columns] - -out[,added_junction_columns] = junction[,added_junction_columns] - -out[,"Top V Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"])) -out[,"Top D Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"])) -out[,"Top J Gene"] = gsub(".* ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"])) - -out = out[!grepl("Less than", summ[,"V.GENE.and.allele"]),] -out = out[!grepl("Less than", summ[,"D.GENE.and.allele"]),] -out = out[!grepl("Less than", summ[,"J.GENE.and.allele"]),] - -out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb')] - -names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION', 'P5.D1.nt.nb', 'P3.D1.nt.nb', 'N2.REGION.nt.nb', 'P5.D2.nt.nb', 'P3.D2.nt.nb', 'N3.REGION.nt.nb', 'P5.D3.nt.nb', 'P3.D2.nt.nb', 'N4.REGION.nt.nb', 'X5.D1.REGION.trimmed.nt.nb', 'X3.D1.REGION.trimmed.nt.nb', 'X5.D2.REGION.trimmed.nt.nb', 'X3.D2.REGION.trimmed.nt.nb', 'X5.D3.REGION.trimmed.nt.nb', 'X3.D3.REGION.trimmed.nt.nb', 'D.REGION.nt.nb', 'D1.REGION.nt.nb', 'D2.REGION.nt.nb', 'D3.REGION.nt.nb') - -out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"]) - -fltr = out[,"VDJ Frame"] == "in-frame" -if(any(fltr)){ - out[fltr, "VDJ Frame"] = "In-frame" -} - -fltr = out[,"VDJ Frame"] == "null" -if(any(fltr)){ - out[fltr, "VDJ Frame"] = "Out-of-frame" -} - -fltr = out[,"VDJ Frame"] == "out-of-frame" -if(any(fltr)){ - out[fltr, "VDJ Frame"] = "Out-of-frame" -} - -fltr = out[,"VDJ Frame"] == "" -if(any(fltr)){ - out[fltr, "VDJ Frame"] = "Out-of-frame" -} - -for(col in c('Top V Gene','Top D Gene','Top J Gene')){ - out[,col] = as.character(out[,col]) - fltr = out[,col] == "" - fltr[is.na(fltr)] = T - if(any(fltr)){ - out[fltr,col] = "NA" - } -} - -write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 imgt_loader/imgt_loader.sh --- a/imgt_loader/imgt_loader.sh Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -#!/bin/bash -input=$1 -output=$2 -name=$3 -dir="$(cd "$(dirname "$0")" && pwd)" -mkdir -p $PWD/$name/files -f=$(file $input) -zip7Type="7-zip archive" -tarType="tar archive" -bzip2Type="bzip2 compressed" -gzipType="gzip compressed" -zipType="Zip archive" -rarType="RAR archive" -zxType="XZ compressed data" - -if [[ "$f" == *"$zip7Type"* ]]; then - echo "7-zip" - echo "Trying: 7za e $input -o$PWD/files/" - 7za e $input -o$PWD/$name/files -fi - -if [[ "$f" == *"$tarType"* ]] -then - echo "tar archive" - echo "Trying: tar xvf $input -C $PWD/files/" - tar -xvf $input -C $PWD/$name/files -fi - -if [[ "$f" == *"$bzip2Type"* ]] -then - echo "bzip2 compressed data" - echo "Trying: tar jxf $input -C $PWD/files/" - tar -jxf $input -C $PWD/$name/files -fi - -if [[ "$f" == *"$gzipType"* ]] -then - echo "gzip compressed data" - echo "Trying: tar xvzf $input -C $PWD/files/" - tar -xvzf $input -C $PWD/$name/files -fi - -if [[ "$f" == *"$zipType"* ]] -then - echo "Zip archive" - echo "Trying: unzip $input -d $PWD/files/" - unzip $input -d $PWD/$name/files > $PWD/unziplog.log -fi - -if [[ "$f" == *"$rarType"* ]] -then - echo "RAR archive" - echo "Trying: unrar e $input $PWD/files/" - unrar e $input $PWD/$name/files -fi - -if [[ "$f" == *"$zxType"* ]] -then - echo "xz compressed data" - echo "Trying: tar -xJf $input -C $PWD/files/" - tar xJf $input -C $PWD/$name/files -fi -find $PWD/$name/files -iname "1_*" -exec cat {} + > $PWD/$name/summ.txt -find $PWD/$name/files -iname "5_*" -exec cat {} + > $PWD/$name/aa.txt -find $PWD/$name/files -iname "6_*" -exec cat {} + > $PWD/$name/junction.txt - -#python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output - -Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $output 2>&1 diff -r beaa487ecf43 -r 5ffd52fc35c4 merge.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,27 @@ +args <- commandArgs(trailingOnly = TRUE) + +input.1 = args[1] +input.2 = args[2] + +fields.1 = args[3] +fields.2 = args[4] + +field.1 = args[5] +field.2 = args[6] + +output = args[7] + +dat1 = read.table(input.1, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL) +if(fields.1 != "all"){ + fields.1 = unlist(strsplit(fields.1, ",")) + dat1 = dat1[,fields.1] +} +dat2 = read.table(input.2, header=T, sep="\t", quote="", stringsAsFactors=F, fill=T, row.names=NULL) +if(fields.2 != "all"){ + fields.2 = unlist(strsplit(fields.2, ",")) + dat2 = dat2[,fields.2] +} + +dat3 = merge(dat1, dat2, by.x=field.1, by.y=field.2) + +write.table(dat3, output, sep="\t",quote=F,row.names=F,col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 merge_and_filter.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_and_filter.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,229 @@ +args <- commandArgs(trailingOnly = TRUE) + + +summaryfile = args[1] +sequencesfile = args[2] +mutationanalysisfile = args[3] +mutationstatsfile = args[4] +hotspotsfile = args[5] +aafile = args[6] +gene_identification_file= args[7] +output = args[8] +before.unique.file = args[9] +unmatchedfile = args[10] +method=args[11] +functionality=args[12] +unique.type=args[13] +filter.unique=args[14] +class.filter=args[15] +empty.region.filter=args[16] + +summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") + +if(method == "blastn"){ + #"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" + gene_identification = gene_identification[!duplicated(gene_identification$qseqid),] + ref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52)) + gene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T) + gene_identification$chunk_hit_percentage = (gene_identification$length / gene_identification$ref.length) * 100 + gene_identification = gene_identification[,c("qseqid", "chunk_hit_percentage", "pident", "qstart", "sseqid")] + colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match") +} + +input.sequence.count = nrow(summ) +print(paste("Number of sequences in summary file:", input.sequence.count)) + +filtering.steps = data.frame(character(0), numeric(0)) + +filtering.steps = rbind(filtering.steps, c("Input", input.sequence.count)) + +filtering.steps[,1] = as.character(filtering.steps[,1]) +filtering.steps[,2] = as.character(filtering.steps[,2]) +#filtering.steps[,3] = as.numeric(filtering.steps[,3]) + +summ = merge(summ, gene_identification, by="Sequence.ID") + +summ = summ[summ$Functionality != "No results",] + +print(paste("Number of sequences after 'No results' filter:", nrow(summ))) + +filtering.steps = rbind(filtering.steps, c("After 'No results' filter", nrow(summ))) + +if(functionality == "productive"){ + summ = summ[summ$Functionality == "productive (see comment)" | summ$Functionality == "productive",] +} else if (functionality == "unproductive"){ + summ = summ[summ$Functionality == "unproductive (see comment)" | summ$Functionality == "unproductive",] +} else if (functionality == "remove_unknown"){ + summ = summ[summ$Functionality != "No results" & summ$Functionality != "unknown (see comment)" & summ$Functionality != "unknown",] +} + +print(paste("Number of sequences after functionality filter:", nrow(summ))) + +filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ))) + +result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID") + +print(paste("Number of sequences after merging with mutation analysis file:", nrow(result))) + +result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID") + +print(paste("Number of sequences after merging with mutation stats file:", nrow(result))) + +result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID") + +print(paste("Number of sequences after merging with hotspots file:", nrow(result))) + +sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")] +names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") +result = merge(result, sequences, by="Sequence.ID", all.x=T) + +AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")] +names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA") +result = merge(result, AAs, by="Sequence.ID", all.x=T) + +print(paste("Number of sequences in result after merging with sequences:", nrow(result))) + +result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele) +result$VGene = gsub("[*].*", "", result$VGene) +result$DGene = gsub("^Homsap ", "", result$D.GENE.and.allele) +result$DGene = gsub("[*].*", "", result$DGene) +result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele) +result$JGene = gsub("[*].*", "", result$JGene) + +splt = strsplit(class.filter, "_")[[1]] +chunk_hit_threshold = as.numeric(splt[1]) +nt_hit_threshold = as.numeric(splt[2]) + +higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold) + +if(!all(higher_than, na.rm=T)){ #check for no unmatched + result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"]) +} + +if(class.filter == "101_101"){ + result$best_match = "all" +} + +write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) + +print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == ""))) +print(paste("Number of empty FR2 sequences:", sum(result$FR2.IMGT.seq == ""))) +print(paste("Number of empty CDR2 sequences:", sum(result$CDR2.IMGT.seq == ""))) +print(paste("Number of empty FR3 sequences:", sum(result$FR3.IMGT.seq == ""))) + +if(empty.region.filter == "leader"){ + result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} else if(empty.region.filter == "FR1"){ + result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} else if(empty.region.filter == "CDR1"){ + result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} else if(empty.region.filter == "FR2"){ + result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] +} + +print(paste("After removal sequences that are missing a gene region:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) + +if(empty.region.filter == "leader"){ + result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} else if(empty.region.filter == "FR1"){ + result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} else if(empty.region.filter == "CDR1"){ + result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} else if(empty.region.filter == "FR2"){ + result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] +} + +print(paste("Number of sequences in result after n filtering:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) + +cleanup_columns = c("FR1.IMGT.Nb.of.mutations", + "CDR1.IMGT.Nb.of.mutations", + "FR2.IMGT.Nb.of.mutations", + "CDR2.IMGT.Nb.of.mutations", + "FR3.IMGT.Nb.of.mutations") + +for(col in cleanup_columns){ + result[,col] = gsub("\\(.*\\)", "", result[,col]) + result[,col] = as.numeric(result[,col]) + result[is.na(result[,col]),] = 0 +} + +write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) + +if(filter.unique != "no"){ + clmns = names(result) + + if(empty.region.filter == "leader"){ + result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR1"){ + result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "CDR1"){ + result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR2"){ + result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } + + if(filter.unique == "remove"){ + result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] + } + result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it + + result = result[!duplicated(result$unique.def),] +} + +write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) + +filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) + +if(nrow(summ) == 0){ + stop("No data remaining after filter") +} + +result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it + +result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) + + + + +result.matched = result[!grepl("unmatched", result$best_match),] +result.unmatched = result[grepl("unmatched", result$best_match),] + +result = rbind(result.matched, result.unmatched) + +result = result[!(duplicated(result$past)), ] + +result = result[,!(names(result) %in% c("past", "best_match_class"))] + +print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result))) + +filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result))) + +unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] + +print(paste("Number of rows in result:", nrow(result))) +print(paste("Number of rows in unmatched:", nrow(unmatched))) + +matched.sequences = result[!grepl("^unmatched", result$best_match),] + +write.table(x=matched.sequences, file=gsub("merged.txt$", "filtered.txt", output), sep="\t",quote=F,row.names=F,col.names=T) + +matched.sequences.count = nrow(matched.sequences) +unmatched.sequences.count = sum(grepl("^unmatched", result$best_match)) + +filtering.steps = rbind(filtering.steps, c("Number of matched sequences", matched.sequences.count)) +filtering.steps = rbind(filtering.steps, c("Number of unmatched sequences", unmatched.sequences.count)) +filtering.steps[,2] = as.numeric(filtering.steps[,2]) +filtering.steps$perc = round(filtering.steps[,2] / input.sequence.count * 100, 2) + +write.table(x=filtering.steps, file=gsub("unmatched", "filtering_steps", unmatchedfile), sep="\t",quote=F,row.names=F,col.names=F) + +write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T) +write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T) diff -r beaa487ecf43 -r 5ffd52fc35c4 naive_output.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/naive_output.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,45 @@ +args <- commandArgs(trailingOnly = TRUE) + +naive.file = args[1] +shm.file = args[2] +output.file.ca = args[3] +output.file.cg = args[4] +output.file.cm = args[5] + +naive = read.table(naive.file, sep="\t", header=T, quote="", fill=T) +shm.merge = read.table(shm.file, sep="\t", header=T, quote="", fill=T) + + +final = merge(naive, shm.merge[,c("Sequence.ID", "best_match")], by.x="ID", by.y="Sequence.ID") +print(paste("nrow final:", nrow(final))) +names(final)[names(final) == "best_match"] = "Sample" +final.numeric = final[,sapply(final, is.numeric)] +final.numeric[is.na(final.numeric)] = 0 +final[,sapply(final, is.numeric)] = final.numeric + +final.ca = final[grepl("^ca", final$Sample),] +final.cg = final[grepl("^cg", final$Sample),] +final.cm = final[grepl("^cm", final$Sample),] + +if(nrow(final.ca) > 0){ + final.ca$Replicate = 1 +} + +if(nrow(final.cg) > 0){ + final.cg$Replicate = 1 +} + +if(nrow(final.cm) > 0){ + final.cm$Replicate = 1 +} + +#print(paste("nrow final:", nrow(final))) +#final2 = final +#final2$Sample = gsub("[0-9]", "", final2$Sample) +#final = rbind(final, final2) +#final$Replicate = 1 + +write.table(final.ca, output.file.ca, quote=F, sep="\t", row.names=F, col.names=T) +write.table(final.cg, output.file.cg, quote=F, sep="\t", row.names=F, col.names=T) +write.table(final.cm, output.file.cm, quote=F, sep="\t", row.names=F, col.names=T) + diff -r beaa487ecf43 -r 5ffd52fc35c4 new_imgt.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/new_imgt.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,29 @@ +args <- commandArgs(trailingOnly = TRUE) + +imgt.dir = args[1] +merged.file = args[2] +gene = args[3] + +merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F) + +if(gene != "-"){ + merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),] +} else { + merged = merged[!grepl("unmatched", merged$best_match),] +} + +merged = merged[!grepl("unmatched", merged$best_match),] + +for(f in list.files(imgt.dir, pattern="*.txt$")){ + #print(paste("filtering", f)) + path = paste(imgt.dir, f, sep="") + dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE) + + dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] + + if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file + dat[,grepl("^FR1", names(dat))] = 0 + } + + write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="") +} diff -r beaa487ecf43 -r 5ffd52fc35c4 pattern_plots.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pattern_plots.r Mon Dec 12 05:22:37 2016 -0500 @@ -0,0 +1,159 @@ +library(ggplot2) +library(reshape2) +library(scales) + +args <- commandArgs(trailingOnly = TRUE) + +input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt" + +plot1.path = args[2] +plot1.png = paste(plot1.path, ".png", sep="") +plot1.txt = paste(plot1.path, ".txt", sep="") + +plot2.path = args[3] +plot2.png = paste(plot2.path, ".png", sep="") +plot2.txt = paste(plot2.path, ".txt", sep="") + +plot3.path = args[4] +plot3.png = paste(plot3.path, ".png", sep="") +plot3.txt = paste(plot3.path, ".txt", sep="") + +clean.output = args[5] + +dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1) + + + +classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE") +xyz = c("x", "y", "z") +new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) + +names(dat) = new.names + +clean.dat = dat +clean.dat = clean.dat[,c(paste(rep(classes, each=3), xyz, sep="."), paste("all", xyz, sep="."), paste("un", xyz, sep="."))] + +write.table(clean.dat, clean.output, quote=F, sep="\t", na="", row.names=T, col.names=NA) + +dat["RGYW.WRCY",] = colSums(dat[c(13,14),], na.rm=T) +dat["TW.WA",] = colSums(dat[c(15,16),], na.rm=T) + +data1 = dat[c("RGYW.WRCY", "TW.WA"),] + +data1 = data1[,names(data1)[grepl(".z", names(data1))]] +names(data1) = gsub("\\..*", "", names(data1)) + +data1 = melt(t(data1)) + +names(data1) = c("Class", "Type", "value") + +data1 = data1[order(data1$Type),] + +write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) +#p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4")) +png(filename=plot1.png, width=480, height=300) +print(p) +dev.off() + +data2 = dat[c(1, 5:8),] + +data2 = data2[,names(data2)[grepl("\\.x", names(data2))]] +names(data2) = gsub(".x", "", names(data2)) + +data2["A/T",] = dat["Targeting of A T (%)",names(dat)[grepl("\\.z", names(dat))]] + +data2["G/C transitions",] = round(data2["Transitions at G C (%)",] / data2["Number of Mutations (%)",] * 100, 1) + +data2["mutation.at.gc",] = dat["Transitions at G C (%)",names(dat)[grepl("\\.y", names(dat))]] +data2["G/C transversions",] = round((data2["mutation.at.gc",] - data2["Transitions at G C (%)",]) / data2["Number of Mutations (%)",] * 100, 1) + +data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0 +data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0 +data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0 + +data2 = melt(t(data2[c("A/T","G/C transitions","G/C transversions"),])) + +names(data2) = c("Class", "Type", "value") + +data2 = data2[order(data2$Type),] + +write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) +png(filename=plot2.png, width=480, height=300) +print(p) +dev.off() + +data3 = dat[c(5, 6, 8, 17:20),] +data3 = data3[,names(data3)[grepl("\\.x", names(data3))]] +names(data3) = gsub(".x", "", names(data3)) + +data3[is.na(data3)] = 0 +#data3[is.infinite(data3)] = 0 + +data3["G/C transitions",] = round(data3["Transitions at G C (%)",] / (data3["C",] + data3["G",]) * 100, 1) + +data3["G/C transversions",] = round((data3["Targeting of G C (%)",] - data3["Transitions at G C (%)",]) / (data3["C",] + data3["G",]) * 100, 1) + +data3["A/T",] = round(data3["Targeting of A T (%)",] / (data3["A",] + data3["T",]) * 100, 1) + +data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0 +data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0 + +data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0 +data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0 + +data3["A/T",is.nan(unlist(data3["A/T",]))] = 0 +data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0 + +data3 = melt(t(data3[8:10,])) +names(data3) = c("Class", "Type", "value") + +data3 = data3[order(data3$Type),] + +write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) + +p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +#p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) +png(filename=plot3.png, width=480, height=300) +print(p) +dev.off() + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/RScript.r --- a/report_clonality/RScript.r Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,845 +0,0 @@ -# ---------------------- load/install packages ---------------------- - -if (!("gridExtra" %in% rownames(installed.packages()))) { - install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") -} -library(gridExtra) -if (!("ggplot2" %in% rownames(installed.packages()))) { - install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") -} -library(ggplot2) -if (!("plyr" %in% rownames(installed.packages()))) { - install.packages("plyr", repos="http://cran.xl-mirror.nl/") -} -library(plyr) - -if (!("data.table" %in% rownames(installed.packages()))) { - install.packages("data.table", repos="http://cran.xl-mirror.nl/") -} -library(data.table) - -if (!("reshape2" %in% rownames(installed.packages()))) { - install.packages("reshape2", repos="http://cran.xl-mirror.nl/") -} -library(reshape2) - -if (!("lymphclon" %in% rownames(installed.packages()))) { - install.packages("lymphclon", repos="http://cran.xl-mirror.nl/") -} -library(lymphclon) - -# ---------------------- parameters ---------------------- - -args <- commandArgs(trailingOnly = TRUE) - -infile = args[1] #path to input file -outfile = args[2] #path to output file -outdir = args[3] #path to output folder (html/images/data) -clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering -ct = unlist(strsplit(clonaltype, ",")) -species = args[5] #human or mouse -locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD -filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) -clonality_method = args[8] - - -# ---------------------- Data preperation ---------------------- - -print("Report Clonality - Data preperation") - -inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") - -print(paste("nrows: ", nrow(inputdata))) - -setwd(outdir) - -# remove weird rows -inputdata = inputdata[inputdata$Sample != "",] - -print(paste("nrows: ", nrow(inputdata))) - -#remove the allele from the V,D and J genes -inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) -inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) -inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) - -print(paste("nrows: ", nrow(inputdata))) - -#filter uniques -inputdata.removed = inputdata[NULL,] - -print(paste("nrows: ", nrow(inputdata))) - -inputdata$clonaltype = 1:nrow(inputdata) - -#keep track of the count of sequences in samples or samples/replicates for the front page overview -input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")]) -input.rep.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample", "Replicate")]) - -PRODF = inputdata -UNPROD = inputdata -if(filterproductive){ - if("Functionality" %in% colnames(inputdata)) { # "Functionality" is an IMGT column - #PRODF = inputdata[inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)", ] - PRODF = inputdata[inputdata$Functionality %in% c("productive (see comment)","productive"),] - - PRODF.count = data.frame(data.table(PRODF)[, list(count=.N), by=c("Sample")]) - - UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ] - } else { - PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] - UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] - } -} - -prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")]) -prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")]) - -unprod.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive=.N), by=c("Sample")]) -unprod.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive=.N), by=c("Sample", "Replicate")]) - -clonalityFrame = PRODF - -#remove duplicates based on the clonaltype -if(clonaltype != "none"){ - clonaltype = paste(clonaltype, ",Sample", sep="") #add sample column to clonaltype, unique within samples - PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":")) - PRODF = PRODF[!duplicated(PRODF$clonaltype), ] - - UNPROD$clonaltype = do.call(paste, c(UNPROD[unlist(strsplit(clonaltype, ","))], sep = ":")) - UNPROD = UNPROD[!duplicated(UNPROD$clonaltype), ] - - #again for clonalityFrame but with sample+replicate - clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":")) - clonalityFrame$clonality_clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(paste(clonaltype, ",Replicate", sep=""), ","))], sep = ":")) - clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$clonality_clonaltype), ] -} - -print("SAMPLE TABLE:") -print(table(PRODF$Sample)) - -prod.unique.sample.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample")]) -prod.unique.rep.count = data.frame(data.table(PRODF)[, list(Productive_unique=.N), by=c("Sample", "Replicate")]) - -unprod.unique.sample.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample")]) -unprod.unique.rep.count = data.frame(data.table(UNPROD)[, list(Unproductive_unique=.N), by=c("Sample", "Replicate")]) - -PRODF$freq = 1 - -if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*" - PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) - PRODF$freq = gsub("_.*", "", PRODF$freq) - PRODF$freq = as.numeric(PRODF$freq) - if(any(is.na(PRODF$freq))){ #if there was an "_" in the ID, but not the frequency, go back to frequency of 1 for every sequence - PRODF$freq = 1 - } -} - - - -#write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive -write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) -write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) -write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) - -#write the samples to a file -sampleFile <- file("samples.txt") -un = unique(inputdata$Sample) -un = paste(un, sep="\n") -writeLines(un, sampleFile) -close(sampleFile) - -# ---------------------- Counting the productive/unproductive and unique sequences ---------------------- - -print("Report Clonality - counting productive/unproductive/unique") - -#create the table on the overview page with the productive/unique counts per sample/replicate -#first for sample -sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T) -sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) -sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) -sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) - -sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) -sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) -sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) -sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) - -#then sample/replicate -rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) -rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) -rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) -rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) - -rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T) -rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100) -rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) -rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100) - -rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_") -rep.count = rep.count[,names(rep.count) != "Replicate"] - -count = rbind(sample.count, rep.count) - - - -write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) - -# ---------------------- V+J+CDR3 sequence count ---------------------- - -VJCDR3.count = data.frame(table(clonalityFrame$Top.V.Gene, clonalityFrame$Top.J.Gene, clonalityFrame$CDR3.Seq.DNA)) -names(VJCDR3.count) = c("Top.V.Gene", "Top.J.Gene", "CDR3.Seq.DNA", "Count") - -VJCDR3.count = VJCDR3.count[VJCDR3.count$Count > 0,] -VJCDR3.count = VJCDR3.count[order(-VJCDR3.count$Count),] - -write.table(x=VJCDR3.count, file="VJCDR3_count.txt", sep="\t",quote=F,row.names=F,col.names=T) - -# ---------------------- Frequency calculation for V, D and J ---------------------- - -print("Report Clonality - frequency calculation V, D and J") - -PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) -Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) -PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) -PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) - -PRODFD = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.D.Gene")]) -Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) -PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) -PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) - -PRODFJ = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.J.Gene")]) -Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) -PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) -PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) - -# ---------------------- Setting up the gene names for the different species/loci ---------------------- - -print("Report Clonality - getting genes for species/loci") - -Vchain = "" -Dchain = "" -Jchain = "" - -if(species == "custom"){ - print("Custom genes: ") - splt = unlist(strsplit(locus, ";")) - print(paste("V:", splt[1])) - print(paste("D:", splt[2])) - print(paste("J:", splt[3])) - - Vchain = unlist(strsplit(splt[1], ",")) - Vchain = data.frame(v.name = Vchain, chr.orderV = 1:length(Vchain)) - - Dchain = unlist(strsplit(splt[2], ",")) - if(length(Dchain) > 0){ - Dchain = data.frame(v.name = Dchain, chr.orderD = 1:length(Dchain)) - } else { - Dchain = data.frame(v.name = character(0), chr.orderD = numeric(0)) - } - - Jchain = unlist(strsplit(splt[3], ",")) - Jchain = data.frame(v.name = Jchain, chr.orderJ = 1:length(Jchain)) - -} else { - genes = read.table("genes.txt", sep="\t", header=TRUE, fill=T, comment.char="") - - Vchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "V",c("IMGT.GENE.DB", "chr.order")] - colnames(Vchain) = c("v.name", "chr.orderV") - Dchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "D",c("IMGT.GENE.DB", "chr.order")] - colnames(Dchain) = c("v.name", "chr.orderD") - Jchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "J",c("IMGT.GENE.DB", "chr.order")] - colnames(Jchain) = c("v.name", "chr.orderJ") -} -useD = TRUE -if(nrow(Dchain) == 0){ - useD = FALSE - cat("No D Genes in this species/locus") -} -print(paste(nrow(Vchain), "genes in V")) -print(paste(nrow(Dchain), "genes in D")) -print(paste(nrow(Jchain), "genes in J")) - -# ---------------------- merge with the frequency count ---------------------- - -PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) - -PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) - -PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) - -# ---------------------- Create the V, D and J frequency plots and write the data.frame for every plot to a file ---------------------- - -print("Report Clonality - V, D and J frequency plots") - -pV = ggplot(PRODFV) -pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") -write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - -png("VPlot.png",width = 1280, height = 720) -pV -dev.off(); - -if(useD){ - pD = ggplot(PRODFD) - pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) - pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") - write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - - png("DPlot.png",width = 800, height = 600) - print(pD) - dev.off(); -} - -pJ = ggplot(PRODFJ) -pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") -write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - -png("JPlot.png",width = 800, height = 600) -pJ -dev.off(); - -pJ = ggplot(PRODFJ) -pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") -write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - -png("JPlot.png",width = 800, height = 600) -pJ -dev.off(); - -# ---------------------- Now the frequency plots of the V, D and J families ---------------------- - -print("Report Clonality - V, D and J family plots") - -VGenes = PRODF[,c("Sample", "Top.V.Gene")] -VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) -VGenes = data.frame(data.table(VGenes)[, list(Count=.N), by=c("Sample", "Top.V.Gene")]) -TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) -VGenes = merge(VGenes, TotalPerSample, by="Sample") -VGenes$Frequency = VGenes$Count * 100 / VGenes$total -VPlot = ggplot(VGenes) -VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - ggtitle("Distribution of V gene families") + - ylab("Percentage of sequences") -png("VFPlot.png") -VPlot -dev.off(); -write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - -if(useD){ - DGenes = PRODF[,c("Sample", "Top.D.Gene")] - DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) - DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) - TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample]) - DGenes = merge(DGenes, TotalPerSample, by="Sample") - DGenes$Frequency = DGenes$Count * 100 / DGenes$total - DPlot = ggplot(DGenes) - DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - ggtitle("Distribution of D gene families") + - ylab("Percentage of sequences") - png("DFPlot.png") - print(DPlot) - dev.off(); - write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) -} - -JGenes = PRODF[,c("Sample", "Top.J.Gene")] -JGenes$Top.J.Gene = gsub("-.*", "", JGenes$Top.J.Gene) -JGenes = data.frame(data.table(JGenes)[, list(Count=.N), by=c("Sample", "Top.J.Gene")]) -TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample]) -JGenes = merge(JGenes, TotalPerSample, by="Sample") -JGenes$Frequency = JGenes$Count * 100 / JGenes$total -JPlot = ggplot(JGenes) -JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - ggtitle("Distribution of J gene families") + - ylab("Percentage of sequences") -png("JFPlot.png") -JPlot -dev.off(); -write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - -# ---------------------- Plotting the cdr3 length ---------------------- - -print("Report Clonality - CDR3 length plot") - -CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")]) -TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) -CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") -CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total -CDR3LengthPlot = ggplot(CDR3Length) -CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - ggtitle("Length distribution of CDR3") + - xlab("CDR3 Length") + - ylab("Percentage of sequences") -png("CDR3LengthPlot.png",width = 1280, height = 720) -CDR3LengthPlot -dev.off() -write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T) - -# ---------------------- Plot the heatmaps ---------------------- - -#get the reverse order for the V and D genes -revVchain = Vchain -revDchain = Dchain -revVchain$chr.orderV = rev(revVchain$chr.orderV) -revDchain$chr.orderD = rev(revDchain$chr.orderD) - -if(useD){ - print("Report Clonality - Heatmaps VD") - plotVD <- function(dat){ - if(length(dat[,1]) == 0){ - return() - } - - img = ggplot() + - geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + - xlab("D genes") + - ylab("V Genes") - - png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) - print(img) - dev.off() - write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) - } - - VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) - - VandDCount$l = log(VandDCount$Length) - maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) - VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) - VandDCount$relLength = VandDCount$l / VandDCount$max - - cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name) - - completeVD = merge(VandDCount, cartegianProductVD, by.x=c("Top.V.Gene", "Top.D.Gene"), by.y=c("Top.V.Gene", "Top.D.Gene"), all=TRUE) - - completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) - - completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) - - fltr = is.nan(completeVD$relLength) - if(all(fltr)){ - completeVD[fltr,"relLength"] = 0 - } - - VDList = split(completeVD, f=completeVD[,"Sample"]) - lapply(VDList, FUN=plotVD) -} - -print("Report Clonality - Heatmaps VJ") - -plotVJ <- function(dat){ - if(length(dat[,1]) == 0){ - return() - } - cat(paste(unique(dat[3])[1,1])) - img = ggplot() + - geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + - xlab("J genes") + - ylab("V Genes") - - png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) - print(img) - dev.off() - write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) -} - -VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) - -VandJCount$l = log(VandJCount$Length) -maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) -VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T) -VandJCount$relLength = VandJCount$l / VandJCount$max - -cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name) - -completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) -completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) -completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) - -fltr = is.nan(completeVJ$relLength) -if(any(fltr)){ - completeVJ[fltr,"relLength"] = 1 -} - -VJList = split(completeVJ, f=completeVJ[,"Sample"]) -lapply(VJList, FUN=plotVJ) - - - -if(useD){ - print("Report Clonality - Heatmaps DJ") - plotDJ <- function(dat){ - if(length(dat[,1]) == 0){ - return() - } - img = ggplot() + - geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + - theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + - xlab("J genes") + - ylab("D Genes") - - png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) - print(img) - dev.off() - write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) - } - - - DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) - - DandJCount$l = log(DandJCount$Length) - maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) - DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) - DandJCount$relLength = DandJCount$l / DandJCount$max - - cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name) - - completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) - completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) - completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) - - fltr = is.nan(completeDJ$relLength) - if(any(fltr)){ - completeDJ[fltr, "relLength"] = 1 - } - - DJList = split(completeDJ, f=completeDJ[,"Sample"]) - lapply(DJList, FUN=plotDJ) -} - - -# ---------------------- output tables for the circos plots ---------------------- - -print("Report Clonality - Circos data") - -for(smpl in unique(PRODF$Sample)){ - PRODF.sample = PRODF[PRODF$Sample == smpl,] - - fltr = PRODF.sample$Top.V.Gene == "" - if(any(fltr, na.rm=T)){ - PRODF.sample[fltr, "Top.V.Gene"] = "NA" - } - - fltr = PRODF.sample$Top.D.Gene == "" - if(any(fltr, na.rm=T)){ - PRODF.sample[fltr, "Top.D.Gene"] = "NA" - } - - fltr = PRODF.sample$Top.J.Gene == "" - if(any(fltr, na.rm=T)){ - PRODF.sample[fltr, "Top.J.Gene"] = "NA" - } - - v.d = table(PRODF.sample$Top.V.Gene, PRODF.sample$Top.D.Gene) - v.j = table(PRODF.sample$Top.V.Gene, PRODF.sample$Top.J.Gene) - d.j = table(PRODF.sample$Top.D.Gene, PRODF.sample$Top.J.Gene) - - write.table(v.d, file=paste(smpl, "_VD_circos.txt", sep=""), sep="\t", quote=F, row.names=T, col.names=NA) - write.table(v.j, file=paste(smpl, "_VJ_circos.txt", sep=""), sep="\t", quote=F, row.names=T, col.names=NA) - write.table(d.j, file=paste(smpl, "_DJ_circos.txt", sep=""), sep="\t", quote=F, row.names=T, col.names=NA) -} - -# ---------------------- calculating the clonality score ---------------------- - -if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available -{ - print("Report Clonality - Clonality") - write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) - if(clonality_method == "boyd"){ - samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) - - for (sample in samples){ - res = data.frame(paste=character(0)) - sample_id = unique(sample$Sample)[[1]] - for(replicate in unique(sample$Replicate)){ - tmp = sample[sample$Replicate == replicate,] - clone_table = data.frame(table(tmp$clonaltype)) - clone_col_name = paste("V", replicate, sep="") - colnames(clone_table) = c("paste", clone_col_name) - res = merge(res, clone_table, by="paste", all=T) - } - - res[is.na(res)] = 0 - infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) - - print(infer.result) - - write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) - - res$type = rowSums(res[,2:ncol(res)]) - - coincidence.table = data.frame(table(res$type)) - colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") - write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) - } - } else { - clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) - - #write files for every coincidence group of >1 - samples = unique(clonalFreq$Sample) - for(sample in samples){ - clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] - if(max(clonalFreqSample$Type) > 1){ - for(i in 2:max(clonalFreqSample$Type)){ - clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,] - clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,] - clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),] - write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) - } - } - } - - clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) - clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count - clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) - clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") - - ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') - tcct = textConnection(ct) - CT = read.table(tcct, sep="\t", header=TRUE) - close(tcct) - clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) - clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight - - ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) - ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) - clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) - ReplicateReads$Reads = as.numeric(ReplicateReads$Reads) - ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads) - - ReplicatePrint <- function(dat){ - write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) - lapply(ReplicateSplit, FUN=ReplicatePrint) - - ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) - clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) - - ReplicateSumPrint <- function(dat){ - write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) - lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) - - clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) - clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) - clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow - clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) - clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) - - ClonalityScorePrint <- function(dat){ - write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - clonalityScore = clonalFreqCount[c("Sample", "Result")] - clonalityScore = unique(clonalityScore) - - clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) - lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) - - clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] - - - - ClonalityOverviewPrint <- function(dat){ - dat = dat[order(dat[,2]),] - write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) - } - - clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) - lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) - } -} - -bak = PRODF - -imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") -if(all(imgtcolumns %in% colnames(inputdata))) -{ - print("found IMGT columns, running junction analysis") - - if(locus %in% c("IGK","IGL", "TRA", "TRG")){ - print("VJ recombination, no filtering on absent D") - } else { - print("VDJ recombination, using N column for junction analysis") - fltr = nchar(PRODF$Top.D.Gene) < 4 - print(paste("Removing", sum(fltr), "sequences without a identified D")) - PRODF = PRODF[!fltr,] - } - - - #ensure certain columns are in the data (files generated with older versions of IMGT Loader) - col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb") - for(col.check in col.checks){ - if(!(col.check %in% names(PRODF))){ - print(paste(col.check, "not found adding new column")) - if(nrow(PRODF) > 0){ #because R is anoying... - PRODF[,col.check] = 0 - } else { - PRODF = cbind(PRODF, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) - } - if(nrow(UNPROD) > 0){ - UNPROD[,col.check] = 0 - } else { - UNPROD = cbind(UNPROD, data.frame(N3.REGION.nt.nb=numeric(0), N4.REGION.nt.nb=numeric(0))) - } - } - } - - num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } - - newData = data.frame(data.table(PRODF)[,list(unique=.N, - VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), - P1=mean(.SD$P3V.nt.nb, na.rm=T), - N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), - P2=mean(.SD$P5D.nt.nb, na.rm=T), - DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), - DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), - P3=mean(.SD$P3D.nt.nb, na.rm=T), - N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - P4=mean(.SD$P5J.nt.nb, na.rm=T), - DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), - Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), - Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), - by=c("Sample")]) - newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) - write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) - - newData = data.frame(data.table(PRODF)[,list(unique=.N, - VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), - P1=num_median(.SD$P3V.nt.nb, na.rm=T), - N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), - P2=num_median(.SD$P5D.nt.nb, na.rm=T), - DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), - DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), - P3=num_median(.SD$P3D.nt.nb, na.rm=T), - N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - P4=num_median(.SD$P5J.nt.nb, na.rm=T), - DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), - Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), - Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), - by=c("Sample")]) - newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) - write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) - - newData = data.frame(data.table(UNPROD)[,list(unique=.N, - VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), - P1=mean(.SD$P3V.nt.nb, na.rm=T), - N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), - P2=mean(.SD$P5D.nt.nb, na.rm=T), - DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), - DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), - P3=mean(.SD$P3D.nt.nb, na.rm=T), - N2=mean(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - P4=mean(.SD$P5J.nt.nb, na.rm=T), - DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), - Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), - Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), - by=c("Sample")]) - newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) - write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) - - newData = data.frame(data.table(UNPROD)[,list(unique=.N, - VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), - P1=num_median(.SD$P3V.nt.nb, na.rm=T), - N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), - P2=num_median(.SD$P5D.nt.nb, na.rm=T), - DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), - DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), - P3=num_median(.SD$P3D.nt.nb, na.rm=T), - N2=num_median(rowSums(.SD[,c("N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - P4=num_median(.SD$P5J.nt.nb, na.rm=T), - DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), - Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), - Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), - Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), - by=c("Sample")]) - - newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) - write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) -} - -PRODF = bak - - -# ---------------------- D reading frame ---------------------- - -D.REGION.reading.frame = PRODF$D.REGION.reading.frame - -D.REGION.reading.frame[is.na(D.REGION.reading.frame)] = "No D" - -D.REGION.reading.frame = data.frame(table(D.REGION.reading.frame)) - -write.table(D.REGION.reading.frame, "DReadingFrame.csv" , sep="\t",quote=F,row.names=F,col.names=T) - -D.REGION.reading.frame = ggplot(D.REGION.reading.frame) -D.REGION.reading.frame = D.REGION.reading.frame + geom_bar(aes( x = D.REGION.reading.frame, y = Freq), stat='identity', position='dodge' ) + ggtitle("D reading frame") + xlab("Frequency") + ylab("Frame") - -png("DReadingFrame.png") -D.REGION.reading.frame -dev.off() - - - - -# ---------------------- AA composition in CDR3 ---------------------- - -AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] - -TotalPerSample = data.frame(data.table(AACDR3)[, list(total=sum(nchar(as.character(.SD$CDR3.Seq)))), by=Sample]) - -AAfreq = list() - -for(i in 1:nrow(TotalPerSample)){ - sample = TotalPerSample$Sample[i] - AAfreq[[i]] = data.frame(table(unlist(strsplit(as.character(AACDR3[AACDR3$Sample == sample,c("CDR3.Seq")]), "")))) - AAfreq[[i]]$Sample = sample -} - -AAfreq = ldply(AAfreq, data.frame) -AAfreq = merge(AAfreq, TotalPerSample, by="Sample", all.x = T) -AAfreq$freq_perc = as.numeric(AAfreq$Freq / AAfreq$total * 100) - - -AAorder = read.table(sep="\t", header=TRUE, text="order.aa\tAA\n1\tR\n2\tK\n3\tN\n4\tD\n5\tQ\n6\tE\n7\tH\n8\tP\n9\tY\n10\tW\n11\tS\n12\tT\n13\tG\n14\tA\n15\tM\n16\tC\n17\tF\n18\tL\n19\tV\n20\tI") -AAfreq = merge(AAfreq, AAorder, by.x='Var1', by.y='AA', all.x=TRUE) - -AAfreq = AAfreq[!is.na(AAfreq$order.aa),] - -AAfreqplot = ggplot(AAfreq) -AAfreqplot = AAfreqplot + geom_bar(aes( x=factor(reorder(Var1, order.aa)), y = freq_perc, fill = Sample), stat='identity', position='dodge' ) -AAfreqplot = AAfreqplot + annotate("rect", xmin = 0.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) -AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) -AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) -AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) -AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") - -png("AAComposition.png",width = 1280, height = 720) -AAfreqplot -dev.off() -write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) - - diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/LTe50046.ttf Binary file report_clonality/circos/LTe50046.ttf has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/LTe50048.ttf Binary file report_clonality/circos/LTe50048.ttf has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/LTe50050.ttf Binary file report_clonality/circos/LTe50050.ttf has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/LTe50054.ttf Binary file report_clonality/circos/LTe50054.ttf has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/circos.conf --- a/report_clonality/circos/circos.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,148 +0,0 @@ - -# This is the main configuration file for the Circos tableviewer. This file also -# depends on colors.conf (definition on basic colors), ideogram.conf (size and spacing of -# row/cell segments), and ticks.conf (tick spacing and label definitions - these are turned -# off by default). -# -# In addition to these configuration files, the bin/make-conf script creates -# colors.conf (colors of row/col segments) and colors_percentile.conf (colors based on -# cell percentile values). These configuration files are also included via the <> directive. -# -# Some elements of the output image are toggled off by default (e.g. row and column highlights, -# anchor links to segment labels, tick marks). - - -<> -<> -<> - - - -<> - - -<> -<> - -karyotype = DATA_DIR/karyotype.txt - - -dir = DATA_DIR -file = circos.png -24bit = yes -svg = no -png = yes -pdf = no -# radius of inscribed circle in image -radius = 1500p -background = white -# by default angle=0 is at 3 o'clock position -angle_offset = -180 -auto_alpha_colors = yes -auto_alpha_steps = 5 - - -chromosomes_units = 10 -chromosomes_display_default = yes -chromosomes_order_by_karyotype = yes - - - -show = yes - - -show = no -file = DATA_DIR/row.txt -r0 = 1r+200p -r1 = 1r+220p -stroke_color = black -stroke_thickness = 2 - - - -show = no -file = DATA_DIR/col.txt -r0 = 1r+230p -r1 = 1r+250p -stroke_color = black -stroke_thickness = 2 - - - -show = no -file = DATA_DIR/all.txt -r0 = 1r+10p -r1 = 1r+35p -stroke_color = black -stroke_thickness = 2 - - - - - - - -type = text -file = DATA_DIR/segmentlabel.txt -label_font = condensedbold -color = black -label_size = 30p -r0 = 1r+50p -r1 = 1r+500p -rpadding = 0p -padding = 0p - -show_links = no -link_dims = 0p,10p,32p,10p,5p -link_thickness = 3p -link_color = black - -label_snuggle = no -# shift label up to its height in pixels in the angular direction -max_snuggle_distance = 2r -snuggle_sampling = 2 -snuggle_tolerance = 0.25r - - - - - - - - -ribbon = yes -flat = yes -file = DATA_DIR/cells.txt -bezier_radius = 0.0r -radius = 0.999r-15p -thickness = 1 -color = grey -stroke_color = black -stroke_thickness = 1 - - - -importance = 95 -condition = 1 -radius1 = 0.999r+2p -flow = continue - - - - - - - - -#anglestep = 0.5 -#minslicestep = 10 -#beziersamples = 40 -#debug = no -#warnings = no -#imagemap = no - -#units_ok = bupr -#units_nounit = n - -<> - diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/circos.tar.gz Binary file report_clonality/circos/circos.tar.gz has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/etc_colors.conf --- a/report_clonality/circos/etc_colors.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,142 +0,0 @@ -# RGB color definition. Colors are refered to within configuration files -# by their name. In order to use a color, you must define it here. -# -# e.g. if you really must use 'bisque', then add -# -# bisque = 255,228,196 -# -# Many useful colors are already defined. In general, given a HUE, these -# colors are defined -# -# vlHUE (very light HUE, e.g. vlred) -# lHUE (light HUE, e.g. red) -# HUE (e.g. red) -# dHUE (dark HUE, e.g. dred) -# -# In addition to hues, two other color groups are defined. -# -# - cytogenetic band colors (e.g. gposNNN, acen, stalk, etc.) which -# correspond to colors on ideogram bands -# - UCSC chromosome color palette (e.g. chrNN, chrUn, chrNA) - -optblue = 55,133,221 -optgreen = 55,221,125 -optyellow = 221,215,55 -optorange = 221,164,55 -optred = 221,55,55 -optviolet = 145,55,221 -optpurple = 219,55,221 - -white = 255,255,255 -vvvvlgrey = 250,250,250 -vvvlgrey = 240,240,240 -vvlgrey = 230,230,230 -vlgrey = 220,220,220 -lgrey = 210,210,210 -grey = 200,200,200 -dgrey = 170,170,170 -vdgrey = 140,140,140 -vvdgrey = 100,100,100 -vvvdgrey = 70,70,70 -vvvvdgrey = 40,40,40 -black = 0,0,0 - -vlred = 255,193,200 -lred = 255,122,137 -red = 247,42,66 -dred = 205,51,69 - -vlgreen = 204,255,218 -lgreen = 128,255,164 -green = 51,204,94 -dgreen = 38,153,71 - -vlblue = 128,176,255 -lblue = 64,137,255 -blue = 54,116,217 -dblue = 38,82,153 - -vlpurple= 242,128,255 -lpurple = 236,64,255 -purple = 189,51,204 -dpurple = 118,32,128 - -vlyellow = 255,253,202 -lyellow = 255,252,150 -yellow = 255,255,0 -dyellow = 191,186,48 - -lime = 186,255,0 - -vlorange = 255,228,193 -lorange = 255,187,110 -orange = 255,136,0 -dorange = 221,143,55 - -# karyotype colors - -gpos100 = 0,0,0 -gpos = 0,0,0 -gpos75 = 130,130,130 -gpos66 = 160,160,160 -gpos50 = 200,200,200 -gpos33 = 210,210,210 -gpos25 = 200,200,200 -gvar = 220,220,220 -gneg = 255,255,255 -acen = 217,47,39 -stalk = 100,127,164 - -# others - -select = 135,177,255 - -# new york times cmyk-safe - -# roygbiv - normal -nyt_blue = 104,152,178 -nyt_green = 137,129,96 -nyt_yellow = 241,221,117 -nyt_orange = 230,146,57 -nyt_red = 217,47,39 - -# chromosome color map (UCSC) - -chr1 = 153,102,0 -chr2 = 102,102,0 -chr3 = 153,153,30 -chr4 = 204,0,0 -chr5 = 255,0,0 -chr6 = 255,0,204 -chr7 = 255,204,204 -chr8 = 255,153,0 -chr9 = 255,204,0 -chr10 = 255,255,0 -chr11 = 204,255,0 -chr12 = 0,255,0 -chr13 = 53,128,0 -chr14 = 0,0,204 -chr15 = 102,153,255 -chr16 = 153,204,255 -chr17 = 0,255,255 -chr18 = 204,255,255 -chr19 = 153,0,204 -chr20 = 204,51,255 -chr21 = 204,153,255 -chr22 = 102,102,102 -chr23 = 153,153,153 -chrX = 153,153,153 -chr24 = 204,204,204 -chrY = 204,204,204 -chrM = 204,204,153 -chr0 = 204,204,153 -chrUn = 121,204,61 -chrNA = 255,255,255 - - - - - - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/fonts.conf --- a/report_clonality/circos/fonts.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ - -default = LTe50046.ttf -normal = LTe50046.ttf -bold = LTe50048.ttf -condensed = LTe50050.ttf -condensedbold = LTe50054.ttf -mono = pragmata.ttf -glyph = wingding.ttf diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/housekeeping.conf --- a/report_clonality/circos/housekeeping.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,230 +0,0 @@ -# Housekeeping parameters, which must be included -# at the top level of the configuration. -# -# Don't adjust these, unless you know what you are doing, or -# feel like experimenting - -anglestep = 0.5 -minslicestep = 10 -beziersamples = 40 # bezier curves are drawn piece-wise - # linear, with this many samples -debug = no -warnings = no -imagemap = no -paranoid = yes - -units_ok = bupr -units_nounit = n - -# \t tab -# \s any whitespace -file_delim = \s -# collapse adjacent whitespace -# e.g. two spaces are treated as one, not as a missing field -file_delim_collapse = yes - -# Record delimiter for parameter values that are lists, such as -# hs1:0.25;hs2:0.10. By default, both ; and , are accepted -# -# e.g. hs1:0.25,hs2:0.10 -# hs1:0.25;hs2:0.10 -list_record_delim = \s*[;,]\s* -# Field delimiter specifies the assignment operator, e.g. -list_field_delim = \s*[:=]\s*] - -# Rule fields and other parameters accept var(VARIABLE) syntax -# to reference parameters of data points. By default, if -# VARIABLE does not exist, Circos quits with an error, unless -# the skip parameter below is set. -# -# This feature is useful when you have data that don't always -# have the same options. For example, -# -# chr1 10 20 a=10 -# chr1 50 60 b=10 -skip_missing_expression_vars = no - -# In old versions, data point parameters were referenced using _NAME_ -# syntax. This has been replaced with var(NAME). The _NAME_ syntax is -# deprecated (for example, it will break when dealing with fields like -# gene_a_1). If you must use it, set the parameter below. - -legacy_underline_expression_syntax = no - -# Magnification factor for text in SVG files. -svg_font_scale = 1.3 - -# default font - pick one of the keys from block -default_font = default -# default font name is used for SVG files for cases where -# the font definition does not include a name -# see etc/fonts.conf for details -default_font_name = Arial -default_font_color = black - -# default color for cases when color is not specified -default_color = black - - -thickness = 1 -size = 5 -type = outline - -all = no -ideogram = no -ideogram_label = no - - -default = lblue -text = red - - - -# Receive debug messages about actions -# -# Comma-separated list of one or more of the following -# -# summary - top level indications of what's happening -# chrfilter - ideogram filtering (parsing 'chromosomes' parameter) -# conf - configuration file -# counter - plot counters -# spacing - ideogram spacing -# scale - ideogram scaling -# ideogram - ideogram reporting -# anglepos - report angle positions for base pair coordinates -# zoom - zoom regions -# layers - specific plot z-layers -# rules - dynamic rules -# text - text tracks -# heatmap - detailed heatmap reports -# brush - brushes -# color - color allocation and definition -# ticks - tick marks and labels -# timers - some benchmark timings -# cache - caches -# _all - turn on all groups -debug_group = summary - -# run length duration after which timing report is automatically -# generated at the end of the run -debug_auto_timer_report = 30 - -debug_word_separator = " " -debug_undef_text = _undef_ -debug_empty_text = _emptylist_ - -# parameters passed to functions can be -# validated to check consistency. turn this -# off to speed things up -debug_validate = yes - -# Reformat numbers in debug output for consistency. -# If you have a lot of debug output (e.g. -debug_group _all) -# this will slow things considerably. -debug_output_tidy = no - -# pixel sub-sampling for text tracks -text_pixel_subsampling = 1 -# array or span - use 'span' if applying snuggle refinement -text_snuggle_method = array - -# restrict names of parameters? -# if 'yes' then only certain named parameters are allowed within -# blocks and option fields for data -# -# if 'no' then you can define parameters with any name what-so-ever, -# useful if you wish to define states or labels for your data -# -# e.g. hs1 10 20 0.5 paired=yes,special=no,myvar=0.5 -# -# ordinarily, 'paired', 'special' and 'myvar' would not be allowed -restrict_parameter_names = no - -# Unless set to 'yes', parameter names will be converted to lowercase -case_sensitive_parameter_names = no - -# The location of configuration and data files will be guessed if -# (a) configuration file is not specified -# (b) data file paths are relative -# Circos will look in the following locations, where -# SCRIPTPATH is the location of the 'circos' script (e.g. /usr/local/bin) and -# CWD is the current directory (where the 'circos' command was executed). -# All paths under CWD will be scanned first, then under SCRIPTPATH. -# -# {CWD,SCRIPTPATH}/. -# {CWD,SCRIPTPATH}/.. -# {CWD,SCRIPTPATH}/etc/ -# {CWD,SCRIPTPATH}/../etc -# {CWD,SCRIPTPATH}/../../etc -# {CWD,SCRIPTPATH}/data -# {CWD,SCRIPTPATH}/../data -# {CWD,SCRIPTPATH}/../../data -# -# If you would like to prepend this list with custom directories for -# data files, enter them as a CSV list here -# data_path = /home/martink/circos-tutorials -# If the cache is static, it will always be used and will not be updated -# unless it is deleted (use -color_cache_rebuild on the command line). -# Otherwise, the cache will be updated if -# - config file is newer than cache file -# - list of colors in config file is different than in cache file -color_cache_static = yes -color_cache_file = circos.colorlist -color_lists_use = yes -# if the directory is not defined, then the system will guess a temporary -# directory compatible with your operating system (using File::Temp) -# color_cache_dir = /tmp - -# Make some functions faster. This should always be 'yes' unless you -# want things to run slowly or suspect deep issues. -memoize = yes - -# This is a debugging flag and should be set to 'no' for regular use -quit_on_dump = yes - -offsets = 0,0 - -# Maximum number of image and data elements. If these are exceeded, -# Circos will quit with an error. These values are arbitrary, but in -# my experience images with significantly more data points than this -# are uninterpretable. - -max_ticks = 5000 -max_ideograms = 200 -max_links = 25000 -max_points_per_track = 25000 - -# What to do when data is found for an ideogram that does not appear in the karyotype file. - -# Set to 'skip' or 'exit' -undefined_ideogram = skip - -# Number of iterations for determining ideogram sizes when -# relative scale is used. -relative_scale_iterations = 10 - -# min, max, average, mode - based on scale statistics of ALL ideograms -# minadj, maxadj, averageadj, modeadj - based on scale statistics of adjacent ideograms -# -# You can specify a fixed scale for spacing using a floating value -# -# e.g. relative_scale_spacing = 1.5 -relative_scale_spacing = mode - -# What to do with out-of-range data. Either 'clip' or 'hide' -data_out_of_range = clip - -# Track default directory -track_defaults = etc/tracks - -# Use round brushes for elements with thickness greater than round_brush_min_thickness? -round_brush_use = yes -round_brush_min_thickness = 5 - -# Use anti aliasing, where possible? I've seen bugs in some gd libraries -# that cause artefacts to appear when lines are anti-aliased. If your -# image contains unexpected elements, turn aa off. -anti_aliasing = yes - -# A parameter that must be set. Checks whether this file was imported. -housekeeping = yes diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/ideogram.conf --- a/report_clonality/circos/ideogram.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ - - - - - -default = 0.0025r - - - -thickness = 50p -stroke_thickness = 0 -# ideogram border color -stroke_color = black -fill = yes -# the default chromosome color is set here and any value -# defined in the karyotype file overrides it -fill_color = black - -# fractional radius position of chromosome ideogram within image -radius = 0.85r -show_label = no -label_font = condensedbold -label_radius = 0.99r -label_size = 36 - -# cytogenetic bands -band_stroke_thickness = 2 - -# show_bands determines whether the outline of cytogenetic bands -# will be seen -show_bands = no -# in order to fill the bands with the color defined in the karyotype -# file you must set fill_bands -fill_bands = yes - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/parse-table.conf --- a/report_clonality/circos/parse-table.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,391 +0,0 @@ - -################################################################ -# -# This is a fairly complicated configuration file. Take your time in -# experimenting and adjust one thing at a time :) -# -################################################################ - -max_col_num = 200 -max_row_num = 200 - -# skip this many rows before reading in header and data -skip_rows = 0 - -# is there a header line that identifies the columns? -header = yes - -# is there a row that specifies the order of columns in the image? -# - if so, this must be the first line of the header -# - if the line exists (col_order_row=yes), employ the use_col_order_row to toggle whether it is used -col_order_row = no -use_col_order_row = no - -# is there a row that specifies the size of columns in the image? -# - if so, this must be the next line of the header -# - if the line exists (col_size_row=yes), employ the use_col_size_row to toggle whether it is used -col_size_row = no -use_col_size_row = no - -# is there a row that specifies the color of each column segment in the image? -# - if so, this must be the next line of the header -# - if the line exists (col_color_row=yes), employ the use_col_color_row to toggle whether it is used -col_color_row = no -use_col_color_row = no - -# is there a column that specifies the order of rows in the image? -# - if so, this must be the first column -# - if the line exists (row_order_col=yes), employ the use_row_order_col to toggle whether it is used -row_order_col = no -use_row_order_col = no - -# is there a column that specifies the color of each row segment in the image? -# - if so, this must be the second column -# - if the line exists (row_color_col=yes), employ the use_row_color_col to toggle whether it is used -row_color_col = no -use_row_color_col = no - -# if you do not have a column/row that explicitly defines order -# of segments in the image, you can set this here. Use one (or more) of -# these values to specify how segments should be ordered. -# - row_major (row segments first, then column) -# - col_major (col segments first, then row) -# - ascii (asciibetic order) -# - row_size (total of rows for the segment - useful if the segment has both row and column contributions) -# - col_size (total of colums for the segment - useful if the segment has both row and column contributions) -# - row_to_col_ratio (ratio of total of rows to columns for the segment) -# - col_to_row_ratio (ratio of total of rows to columns for the segment) -# - size_asc (size, in ascending order) -# - size_desc (size, in descending order) - -#segment_order = row_to_col_ratio,size_desc # col_major,size_desc -#segment_order = size_desc -segment_order = row_major,size_desc -#segment_order = ascii -#segment_order = file:etc/order-by-table-remapped.txt -#segment_order = size_desc,row_to_col_ratio -segment_color_order = row_major,size_desc - -# values for segments can be normalized if the use_segment_normalization is set to yes -use_segment_normalization = no - -# the normalization function can be one of the following, and is applied to -# all values that correspond to the segment's label -# total - sum of cell values for the segment label (row and col) -# average - average of cell values for the segment label (row and col) -# row_total, row_average - sum or average for cell values for the segment row -# col_total, col_average - sum or average for cell values for the segment col -# row_size, col_size, total_size - based on the optional size column (see col_size_row and row_size_col above) -# VALUE - segments are scaled to a constant VALUE (e.g. 1000) -segment_normalization_function = 1000 - -# normalization can be performed by either altering the actual data values or -# by applying a visual scaling of the segments. When 'value' is used, the data -# is changed. When 'visual' is used, then a chromosomes_scale line is reported -# by this script which you must include in circos.conf for the scaling to be applied -segment_normalization_scheme = value - -################################################################ -# placement of cell ribbons on row/column segments -# -# for segments that share both column and row ribbons, the -# order of ribbon position can be adjusted with placement_order - -placement_order = row,col # col,row or row,col - -# within the row/column ribbon bundle for each segment, -# ribbon_bundle_order determines how the ribbons will be -# ordered -# - size - by value of the cell -# - ascii - sorted by destination label -# - native - sorted by order of destination segment - -ribbon_bundle_order = native # size, ascii, native - -# reverse the position of links in table/row segments? - -reverse_rows = no -reverse_columns = no - -# values for cells with the same row/column name can be treated -# independently. You can -# show - show these cells and not filter them at all -# hide - hide these cells from the image, but not resize the row/columns -# remove - entirely remove these cells from the data set (equivalent to setting cells to missing value) -intra_cell_handling = show - -# ribbon layering - order in which the ribbons are drawn on the image -# size_asc - ascending by ribbon size (small ribbons drawn first, therefore large ribbons will be at front) -# size_desc - descending by ribbon size (large ribbons drawn first, therefore small ribbons will be at front) - -ribbon_layer_order = size_asc - -# if both (A,B)=x and (B,A)=y cells exist, you can choose to have the ribbon -# ends sized variably so that ribbon at A has width x and at B has width y - -ribbon_variable = no -ribbon_variable_intra_collapse = yes - -################################################################ -# cell value mapping allows you to remap the cell values using -# any Perl expression that uses X as the cell value. For example, -# -# cell_remap_formula = log(X) -# = sqrt(X) -# = X/10 -# = X ? log(X) : 0 -# -# This remapping takes place before any filters or scaling is applied. Its effect -# is the same as remapping the cell values in the input file. - -use_cell_remap = no -cell_remap_formula = round(10*X) - -################################################################ -# scale your values with a power rule (useful if the range of values -# is very large) to -# - atten_large: attenuate large values and maintain visibility -# of ribbons corresponding to small values, or -# - atten_small: attenuate small values to increase visibility -# of ribbons corresponding to large values -# -# given a value, v, and a maximum, m -# -# atten_small: -# -# v_new = m * ( exp(scale_factor * v / m) - 1 ) / ( exp(scale_factor) - 1 ) -# -# atten_large: -# -# v_new = m * ( log(scale_factor * v ) ) / ( log(scale_factor * m ) ) -# -# essentially the values are remapped to a log-type scale -# with the range 0..m - -use_scaling = no -scaling_type = atten_large -scale_factor = 1 - -blank_means_missing = no -missing_cell_value = - - -################################################################ -# Value cutoffs for cell values and ribbon formatting. -# -# You can toggle the visibility of ribbons for cells outside -# a min/max range. You can define one or more of these cutoffs. -# The cutoffs are applied to unscaled cell values. - -#cell_min_value = 10 -#cell_min_percentile = 10 -#cell_max_value = 100 -#cell_max_percentile = 100 - -# For cell values that do not pass the min/max filters above, -# you can specify whether they are hidden or removed. If the -# parameter is not defined, "hide" will be assumed. -# hide - cell values won't be shown, but row/col will not be resized -# remove - entirely remove these cells from the data set (equivalent to setting cells to missing value) - -cutoff_cell_handling = hide - -# The color of ribbons is by default the color of the row segment from -# which they originate. The block below allows you to remap the color -# of the ribbons based on cell percentile values. There are two ways -# to remap colors -# -# - color_remap=yes, color_autoremap=no -# Uses blocks to define the percentile values and associated -# color/stroke_color characteristics for ribbons. Percentile value defined -# in the block (e.g. ) is the max percentile value for -# cells associated with this block. -# - color_remap=yes, color_autoremap=yes -# Uses colors associated with each percentile window of size -# percentile_sampling for each cell - - -color_source = row -percentile_source = larger -color_transparency = 1 -color_remap = yes -color_autoremap = no - - -color = dgrey -transparency = 5 - - - -color = dgrey -transparency = 5 - - - -transparency = 1 - - - -transparency = 1 - - - -transparency = 1 -stroke_color = black -stroke_thickness = 1p - - - -transparency = 1 -stroke_color = black -stroke_thickness = 3p - - - - - -color = vdgrey -#stroke_color = black -#stroke_thickness = 1p - - -# If you are using color_autoremap=yes above, then -# define the percentile sampling window and -# the start/end HSV color values. Percentile window -# colors are interpolated between this HSV pair. -# -# HSV = (hue saturation value) -# hue=(0..360) saturation=(0..1) value=(0..1) - -percentile_sampling = 5 - -# count - percentile based on counts -# value - percentile based on value - -percentile_method = count - -# use all values or only unique values when -# calculating percentiles -percentile_unique_only = yes - -# use a function, f(X), to remap cell values when calculating percentiles -# for the purpose of color mapping. This allows you to apply a remapping to how -# colors are calculated, without actually changing the values. The remap -# applies only if percentile_method=value - -# percentile_remap = sqrt(X) - -# Which cell value set to use for percentile color mapping -# raw - original values -# filtered - values that pass min/max filters -# scaled - filtered values that have been scaled if use_scaling is set -percentile_data_domain = raw - - -h0 = 0 -s0 = 1 -v0 = 1 -h1 = 300 -s1 = 1 -v1 = 1 - - -# You can control the color and stroke of ribbons for each -# quartile (q1, q2, q3, q4). Any values defined here will -# overwrite colors determined by remapping. -# -# For example, if you have a lot of cells and wish to attenuate -# the visibility of ribbons associated with small values, you can -# set cell_q1_color=vvlgrey,cell_q1_nostroke=yes to fade the -# ribbons into the background. - -#cell_q1_color = vvlgrey -#cell_q2_color = vlgrey -#cell_q3_color = lgrey -#cell_q4_color = red -#cell_q1_nostroke = yes -#cell_q2_nostroke = yes -#cell_q3_nostroke = yes -#cell_q4_nostroke = yes - -# cell value multiplier, required when all data is small (e.g. <1), in which -# case set the multiplier to something like 1000 because Circos -# works only with integer scales - -data_mult = 1 - -################################################################ -# Segment labels can be optionally set to a size that is -# proportional to the size of the segment. Set min/max size -# values here. If this line is commented out, then the label -# size is determined by the circos.conf file used to draw the image - -#segment_label_size_range = 60,60 - -# progression controls how fast the label size changes from -# min to max (larger value of progression means values close to max -# are achieved for smaller segments) - -segment_label_size_progression = 4 - -segment_label_uppercase = no - -################################################################ -# Segment colors can be specified in the data file (in this -# case use row_color_col and col_color_row), otherwise colors -# are interpolated within an HSV range. Color interpolation can be -# done in two ways: based on segment index (interpolation steps through -# colors uniformly for each segment) and total size (interpolation -# steps through colors in proportion to segment size). - - -interpolate_type = size # size or count -h0 = 0 -s0 = 0.8 -v0 = 0.9 -h1 = 300 -s1 = 0.8 -v1 = 0.9 - - -################################################################ -# Shorten the labels of segments. Specify whether to do this -# with shorten_text=yes|no parameter and provide regular -# expressions in string_replace which define the text to -# replace. - -shorten_text = yes - - -IGH = - - -# exit on any error -strict_sanity = yes - -################################################################ -# if the segment_prefix is set, then rows and columns will be -# renamed to internal fields segment_prefix + DIGIT - -#segment_prefix = id -color_prefix = color - -################################################################ -# Delimiters - -# field delimiter regular expression -# if this is not defined, any whitespace will be considered a delimiter -field_delim = \s - -# collapse adjacent delimiters? -field_delim_collapse = yes - -# remove any leading space in the input file -# by default, this is on - if you set this to "no", make sure that you don't have any leading spaces in your table! -strip_leading_space = yes - -# remove quotes and thousand separators - concatenate characters to remove -# -# e.g. to remove characters a b c set remove_cell_rx=abc -# e.g. to remove characters " ' , set remove_cell_rx="', -remove_cell_rx = "', - diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/pragmata.ttf Binary file report_clonality/circos/pragmata.ttf has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/ticks.conf --- a/report_clonality/circos/ticks.conf Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ - -show_ticks = no -show_tick_labels = no -chrticklabels = yes -chrticklabelfont = default - -grid_start = dims(ideogram,radius_inner)-0.5r -grid_end = dims(ideogram,radius_outer)+100 - - -skip_first_label = no -skip_last_label = no -radius = dims(ideogram,radius_outer) -label_offset = 0p -color = black - - -spacing = 5u -spacing_type = relative -rspacing = 0.1 -size = 3p -thickness = 2p -color = dgrey -show_label = yes -label_size = 16p -label_offset = 3p -label_relative = yes -format = %d -grid = yes -grid_color = dgrey -grid_thickness = 1p -suffix = % -rmultiplier = 100 -offset = 40p - - - -spacing = 50u -size = 3p -thickness = 2p -color = black -show_label = yes -label_size = 12p -label_offset = 3p -format = %d -grid = yes -grid_color = dgrey -grid_thickness = 1p - - - -spacing = 10u -size = 2p -thickness = 1p -color = black -show_label = no -label_size = 12p -label_offset = 3p -format = %d -grid = yes -grid_color = dgrey -grid_thickness = 1p - - - diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/circos/wingding.ttf Binary file report_clonality/circos/wingding.ttf has changed diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/genes.txt --- a/report_clonality/genes.txt Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3306 +0,0 @@ -Species IMGT.GENE.DB locus region functional chr.order -Bos taurus functional TRAJ1 TRA J TRUE 1 -Bos taurus functional TRAJ10 TRA J TRUE 2 -Bos taurus functional TRAJ11 TRA J TRUE 3 -Bos taurus non-functional TRAJ12 TRA J FALSE 4 -Bos taurus functional TRAJ13 TRA J TRUE 5 -Bos taurus functional TRAJ14 TRA J TRUE 6 -Bos taurus functional TRAJ15 TRA J TRUE 7 -Bos taurus functional TRAJ16 TRA J TRUE 8 -Bos taurus functional TRAJ17 TRA J TRUE 9 -Bos taurus functional TRAJ18 TRA J TRUE 10 -Bos taurus functional TRAJ19 TRA J TRUE 11 -Bos taurus functional TRAJ2 TRA J TRUE 12 -Bos taurus functional TRAJ20 TRA J TRUE 13 -Bos taurus functional TRAJ21 TRA J TRUE 14 -Bos taurus functional TRAJ22 TRA J TRUE 15 -Bos taurus functional TRAJ23 TRA J TRUE 16 -Bos taurus functional TRAJ24 TRA J TRUE 17 -Bos taurus functional TRAJ25 TRA J TRUE 18 -Bos taurus functional TRAJ26 TRA J TRUE 19 -Bos taurus non-functional TRAJ27 TRA J FALSE 20 -Bos taurus functional TRAJ28 TRA J TRUE 21 -Bos taurus functional TRAJ29 TRA J TRUE 22 -Bos taurus functional TRAJ3 TRA J TRUE 23 -Bos taurus non-functional TRAJ30 TRA J FALSE 24 -Bos taurus functional TRAJ31 TRA J TRUE 25 -Bos taurus functional TRAJ32 TRA J TRUE 26 -Bos taurus functional TRAJ33 TRA J TRUE 27 -Bos taurus functional TRAJ34 TRA J TRUE 28 -Bos taurus functional TRAJ35 TRA J TRUE 29 -Bos taurus functional TRAJ36 TRA J TRUE 30 -Bos taurus functional TRAJ37 TRA J TRUE 31 -Bos taurus functional TRAJ38 TRA J TRUE 32 -Bos taurus functional TRAJ39 TRA J TRUE 33 -Bos taurus non-functional TRAJ4 TRA J FALSE 34 -Bos taurus functional TRAJ40 TRA J TRUE 35 -Bos taurus functional TRAJ41 TRA J TRUE 36 -Bos taurus functional TRAJ42 TRA J TRUE 37 -Bos taurus functional TRAJ43 TRA J TRUE 38 -Bos taurus functional TRAJ44 TRA J TRUE 39 -Bos taurus functional TRAJ45 TRA J TRUE 40 -Bos taurus functional TRAJ46 TRA J TRUE 41 -Bos taurus non-functional TRAJ47 TRA J FALSE 42 -Bos taurus functional TRAJ48 TRA J TRUE 43 -Bos taurus functional TRAJ49 TRA J TRUE 44 -Bos taurus functional TRAJ5 TRA J TRUE 45 -Bos taurus functional TRAJ50 TRA J TRUE 46 -Bos taurus functional TRAJ51 TRA J TRUE 47 -Bos taurus non-functional TRAJ52 TRA J FALSE 48 -Bos taurus functional TRAJ6 TRA J TRUE 49 -Bos taurus functional TRAJ7 TRA J TRUE 50 -Bos taurus functional TRAJ8 TRA J TRUE 51 -Bos taurus functional TRAJ9 TRA J TRUE 52 -Bos taurus functional TRDD1 TRD D TRUE 53 -Bos taurus functional TRDD2 TRD D TRUE 54 -Bos taurus functional TRDD3 TRD D TRUE 55 -Bos taurus functional TRDD4 TRD D TRUE 56 -Bos taurus functional TRDD5 TRD D TRUE 57 -Bos taurus functional TRDJ1 TRD J TRUE 58 -Bos taurus functional TRDJ2 TRD J TRUE 59 -Bos taurus functional TRDJ3 TRD J TRUE 60 -Bos taurus functional TRDV1S1 TRD V TRUE 61 -Bos taurus functional TRDV1S10 TRD V TRUE 62 -Bos taurus functional TRDV1S11 TRD V TRUE 63 -Bos taurus functional TRDV1S12 TRD V TRUE 64 -Bos taurus functional TRDV1S13-1 TRD V TRUE 65 -Bos taurus functional TRDV1S13-2 TRD V TRUE 66 -Bos taurus functional TRDV1S14 TRD V TRUE 67 -Bos taurus functional TRDV1S15-1 TRD V TRUE 68 -Bos taurus functional TRDV1S15-2 TRD V TRUE 69 -Bos taurus functional TRDV1S16 TRD V TRUE 70 -Bos taurus functional TRDV1S17 TRD V TRUE 71 -Bos taurus functional TRDV1S18-1 TRD V TRUE 72 -Bos taurus functional TRDV1S18-2 TRD V TRUE 73 -Bos taurus functional TRDV1S19 TRD V TRUE 74 -Bos taurus non-functional TRDV1S2-1 TRD V FALSE 75 -Bos taurus non-functional TRDV1S2-2 TRD V FALSE 76 -Bos taurus functional TRDV1S20 TRD V TRUE 77 -Bos taurus functional TRDV1S21-1 TRD V TRUE 78 -Bos taurus functional TRDV1S21-2 TRD V TRUE 79 -Bos taurus non-functional TRDV1S22 TRD V FALSE 80 -Bos taurus functional TRDV1S23 TRD V TRUE 81 -Bos taurus functional TRDV1S24 TRD V TRUE 82 -Bos taurus non-functional TRDV1S25-1 TRD V FALSE 83 -Bos taurus non-functional TRDV1S25-2 TRD V FALSE 84 -Bos taurus functional TRDV1S26 TRD V TRUE 85 -Bos taurus functional TRDV1S27-1 TRD V TRUE 86 -Bos taurus functional TRDV1S27-2 TRD V TRUE 87 -Bos taurus functional TRDV1S28 TRD V TRUE 88 -Bos taurus functional TRDV1S29 TRD V TRUE 89 -Bos taurus functional TRDV1S3 TRD V TRUE 90 -Bos taurus functional TRDV1S30 TRD V TRUE 91 -Bos taurus functional TRDV1S31 TRD V TRUE 92 -Bos taurus functional TRDV1S32 TRD V TRUE 93 -Bos taurus functional TRDV1S33 TRD V TRUE 94 -Bos taurus non-functional TRDV1S34 TRD V FALSE 95 -Bos taurus functional TRDV1S35 TRD V TRUE 96 -Bos taurus functional TRDV1S36 TRD V TRUE 97 -Bos taurus functional TRDV1S37 TRD V TRUE 98 -Bos taurus functional TRDV1S38 TRD V TRUE 99 -Bos taurus functional TRDV1S39 TRD V TRUE 100 -Bos taurus functional TRDV1S4-1 TRD V TRUE 101 -Bos taurus functional TRDV1S4-2 TRD V TRUE 102 -Bos taurus functional TRDV1S4-3 TRD V TRUE 103 -Bos taurus functional TRDV1S40 TRD V TRUE 104 -Bos taurus functional TRDV1S41 TRD V TRUE 105 -Bos taurus functional TRDV1S42 TRD V TRUE 106 -Bos taurus functional TRDV1S43 TRD V TRUE 107 -Bos taurus non-functional TRDV1S44 TRD V FALSE 108 -Bos taurus functional TRDV1S45 TRD V TRUE 109 -Bos taurus functional TRDV1S46 TRD V TRUE 110 -Bos taurus functional TRDV1S47-1 TRD V TRUE 111 -Bos taurus functional TRDV1S47-2 TRD V TRUE 112 -Bos taurus functional TRDV1S48 TRD V TRUE 113 -Bos taurus functional TRDV1S49 TRD V TRUE 114 -Bos taurus functional TRDV1S5-1 TRD V TRUE 115 -Bos taurus functional TRDV1S5-2 TRD V TRUE 116 -Bos taurus functional TRDV1S50 TRD V TRUE 117 -Bos taurus functional TRDV1S51-1 TRD V TRUE 118 -Bos taurus functional TRDV1S51-2 TRD V TRUE 119 -Bos taurus functional TRDV1S52-1 TRD V TRUE 120 -Bos taurus functional TRDV1S52-2 TRD V TRUE 121 -Bos taurus non-functional TRDV1S53 TRD V FALSE 122 -Bos taurus non-functional TRDV1S54 TRD V FALSE 123 -Bos taurus non-functional TRDV1S55 TRD V FALSE 124 -Bos taurus non-functional TRDV1S56 TRD V FALSE 125 -Bos taurus functional TRDV1S6-1 TRD V TRUE 126 -Bos taurus functional TRDV1S6-2 TRD V TRUE 127 -Bos taurus functional TRDV1S7-1 TRD V TRUE 128 -Bos taurus functional TRDV1S7-2 TRD V TRUE 129 -Bos taurus functional TRDV1S8 TRD V TRUE 130 -Bos taurus non-functional TRDV1S9 TRD V FALSE 131 -Bos taurus functional TRDV2S1 TRD V TRUE 132 -Bos taurus functional TRDV2S2 TRD V TRUE 133 -Bos taurus functional TRDV2S3 TRD V TRUE 134 -Bos taurus non-functional TRDV2S4 TRD V FALSE 135 -Bos taurus functional TRDV3S1 TRD V TRUE 136 -Bos taurus functional TRDV3S2 TRD V TRUE 137 -Bos taurus non-functional TRDV3S3 TRD V FALSE 138 -Bos taurus functional TRDV4 TRD V TRUE 139 -Bos taurus non-functional TRDV5 TRD V FALSE 140 -Bos taurus functional TRGJ1-1 TRG J TRUE 141 -Bos taurus functional TRGJ1-2 TRG J TRUE 142 -Bos taurus functional TRGJ2-1 TRG J TRUE 143 -Bos taurus functional TRGJ2-2 TRG J TRUE 144 -Bos taurus functional TRGJ3-1 TRG J TRUE 145 -Bos taurus functional TRGJ4-1 TRG J TRUE 146 -Bos taurus functional TRGJ4-2 TRG J TRUE 147 -Bos taurus functional TRGJ5-1 TRG J TRUE 148 -Bos taurus functional TRGJ6-1 TRG J TRUE 149 -Bos taurus functional TRGV1-1 TRG V TRUE 150 -Bos taurus functional TRGV10-1 TRG V TRUE 151 -Bos taurus functional TRGV2-1 TRG V TRUE 152 -Bos taurus functional TRGV3-1 TRG V TRUE 153 -Bos taurus functional TRGV3-2 TRG V TRUE 154 -Bos taurus non-functional TRGV4-1 TRG V FALSE 155 -Bos taurus non-functional TRGV5-1 TRG V FALSE 156 -Bos taurus non-functional TRGV5-2 TRG V FALSE 157 -Bos taurus functional TRGV6-1 TRG V TRUE 158 -Bos taurus functional TRGV6-2 TRG V TRUE 159 -Bos taurus functional TRGV7-1 TRG V TRUE 160 -Bos taurus functional TRGV8-1 TRG V TRUE 161 -Bos taurus functional TRGV8-2 TRG V TRUE 162 -Bos taurus functional TRGV8-3 TRG V TRUE 163 -Bos taurus functional TRGV8-4 TRG V TRUE 164 -Bos taurus functional TRGV9-1 TRG V TRUE 165 -Bos taurus functional TRGV9-2 TRG V TRUE 166 -Camelus dromedarius functional TRDV1S1 TRD V TRUE 167 -Camelus dromedarius functional TRDV1S2 TRD V TRUE 168 -Camelus dromedarius functional TRDV1S3 TRD V TRUE 169 -Camelus dromedarius functional TRDV1S4 TRD V TRUE 170 -Camelus dromedarius functional TRDV1S5 TRD V TRUE 171 -Camelus dromedarius functional TRDV1S6 TRD V TRUE 172 -Camelus dromedarius functional TRDV2S1 TRD V TRUE 173 -Camelus dromedarius functional TRDV2S2 TRD V TRUE 174 -Camelus dromedarius functional TRDV2S3 TRD V TRUE 175 -Camelus dromedarius functional TRDV4S1 TRD V TRUE 176 -Camelus dromedarius functional TRGJ1-1 TRG J TRUE 177 -Camelus dromedarius non-functional TRGJ2-1 TRG J FALSE 178 -Camelus dromedarius functional TRGJ2-2 TRG J TRUE 179 -Camelus dromedarius functional TRGV1 TRG V TRUE 180 -Camelus dromedarius functional TRGV2 TRG V TRUE 181 -Canis lupus familiaris functional TRBD1 TRB D TRUE 182 -Canis lupus familiaris functional TRBD2 TRB D TRUE 183 -Canis lupus familiaris functional TRBJ1-1 TRB J TRUE 184 -Canis lupus familiaris functional TRBJ1-2 TRB J TRUE 185 -Canis lupus familiaris non-functional TRBJ1-3 TRB J FALSE 186 -Canis lupus familiaris functional TRBJ1-4 TRB J TRUE 187 -Canis lupus familiaris functional TRBJ1-5 TRB J TRUE 188 -Canis lupus familiaris functional TRBJ1-6 TRB J TRUE 189 -Canis lupus familiaris non-functional TRBJ2-1 TRB J FALSE 190 -Canis lupus familiaris functional TRBJ2-2 TRB J TRUE 191 -Canis lupus familiaris functional TRBJ2-3 TRB J TRUE 192 -Canis lupus familiaris non-functional TRBJ2-4 TRB J FALSE 193 -Canis lupus familiaris functional TRBJ2-5 TRB J TRUE 194 -Canis lupus familiaris functional TRBJ2-6 TRB J TRUE 195 -Canis lupus familiaris functional TRBV1 TRB V TRUE 196 -Canis lupus familiaris functional TRBV10 TRB V TRUE 197 -Canis lupus familiaris non-functional TRBV11 TRB V FALSE 198 -Canis lupus familiaris non-functional TRBV12-1 TRB V FALSE 199 -Canis lupus familiaris functional TRBV12-2 TRB V TRUE 200 -Canis lupus familiaris non-functional TRBV15 TRB V FALSE 201 -Canis lupus familiaris functional TRBV16 TRB V TRUE 202 -Canis lupus familiaris functional TRBV18 TRB V TRUE 203 -Canis lupus familiaris non-functional TRBV19 TRB V FALSE 204 -Canis lupus familiaris non-functional TRBV2-1 TRB V FALSE 205 -Canis lupus familiaris non-functional TRBV2-2 TRB V FALSE 206 -Canis lupus familiaris non-functional TRBV2-3 TRB V FALSE 207 -Canis lupus familiaris functional TRBV20 TRB V TRUE 208 -Canis lupus familiaris non-functional TRBV21 TRB V FALSE 209 -Canis lupus familiaris functional TRBV22 TRB V TRUE 210 -Canis lupus familiaris functional TRBV24 TRB V TRUE 211 -Canis lupus familiaris functional TRBV25 TRB V TRUE 212 -Canis lupus familiaris non-functional TRBV26 TRB V FALSE 213 -Canis lupus familiaris non-functional TRBV27 TRB V FALSE 214 -Canis lupus familiaris non-functional TRBV28 TRB V FALSE 215 -Canis lupus familiaris functional TRBV29 TRB V TRUE 216 -Canis lupus familiaris functional TRBV3-1 TRB V TRUE 217 -Canis lupus familiaris functional TRBV3-2 TRB V TRUE 218 -Canis lupus familiaris non-functional TRBV3-3 TRB V FALSE 219 -Canis lupus familiaris functional TRBV30 TRB V TRUE 220 -Canis lupus familiaris functional TRBV4-1 TRB V TRUE 221 -Canis lupus familiaris functional TRBV4-2 TRB V TRUE 222 -Canis lupus familiaris functional TRBV4-3 TRB V TRUE 223 -Canis lupus familiaris non-functional TRBV5-1 TRB V FALSE 224 -Canis lupus familiaris functional TRBV5-2 TRB V TRUE 225 -Canis lupus familiaris non-functional TRBV5-3 TRB V FALSE 226 -Canis lupus familiaris functional TRBV5-4 TRB V TRUE 227 -Canis lupus familiaris non-functional TRBV6 TRB V FALSE 228 -Canis lupus familiaris functional TRBV7 TRB V TRUE 229 -Canis lupus familiaris non-functional TRBV8 TRB V FALSE 230 -Canis lupus familiaris non-functional TRGJ1-1 TRG J FALSE 231 -Canis lupus familiaris functional TRGJ1-2 TRG J TRUE 232 -Canis lupus familiaris non-functional TRGJ2-1 TRG J FALSE 233 -Canis lupus familiaris functional TRGJ2-2 TRG J TRUE 234 -Canis lupus familiaris non-functional TRGJ3-1 TRG J FALSE 235 -Canis lupus familiaris non-functional TRGJ3-2 TRG J FALSE 236 -Canis lupus familiaris non-functional TRGJ4-1 TRG J FALSE 237 -Canis lupus familiaris functional TRGJ4-2 TRG J TRUE 238 -Canis lupus familiaris non-functional TRGJ5-1 TRG J FALSE 239 -Canis lupus familiaris functional TRGJ5-2 TRG J TRUE 240 -Canis lupus familiaris functional TRGJ6-1 TRG J TRUE 241 -Canis lupus familiaris non-functional TRGJ6-2 TRG J FALSE 242 -Canis lupus familiaris non-functional TRGJ7-1 TRG J FALSE 243 -Canis lupus familiaris functional TRGJ7-2 TRG J TRUE 244 -Canis lupus familiaris functional TRGJ8-1 TRG J TRUE 245 -Canis lupus familiaris non-functional TRGJ8-2 TRG J FALSE 246 -Canis lupus familiaris non-functional TRGV1-1 TRG V FALSE 247 -Canis lupus familiaris functional TRGV2-1 TRG V TRUE 248 -Canis lupus familiaris functional TRGV2-2 TRG V TRUE 249 -Canis lupus familiaris functional TRGV2-3 TRG V TRUE 250 -Canis lupus familiaris functional TRGV2-4 TRG V TRUE 251 -Canis lupus familiaris non-functional TRGV3-1 TRG V FALSE 252 -Canis lupus familiaris non-functional TRGV3-2 TRG V FALSE 253 -Canis lupus familiaris non-functional TRGV3-3 TRG V FALSE 254 -Canis lupus familiaris functional TRGV4-1 TRG V TRUE 255 -Canis lupus familiaris non-functional TRGV5-1 TRG V FALSE 256 -Canis lupus familiaris functional TRGV5-2 TRG V TRUE 257 -Canis lupus familiaris non-functional TRGV6-1 TRG V FALSE 258 -Canis lupus familiaris non-functional TRGV7-1 TRG V FALSE 259 -Canis lupus familiaris functional TRGV7-2 TRG V TRUE 260 -Canis lupus familiaris functional TRGV7-3 TRG V TRUE 261 -Canis lupus familiaris non-functional TRGV8-1 TRG V FALSE 262 -Danio rerio non-functional IGHD IGH D FALSE 263 -Danio rerio functional IGHD1-1 IGH D TRUE 264 -Danio rerio functional IGHD1-2 IGH D TRUE 265 -Danio rerio functional IGHD2-1 IGH D TRUE 266 -Danio rerio functional IGHD2-2 IGH D TRUE 267 -Danio rerio functional IGHD2-3 IGH D TRUE 268 -Danio rerio functional IGHD2-4 IGH D TRUE 269 -Danio rerio functional IGHD2-5 IGH D TRUE 270 -Danio rerio functional IGHJ1-1 IGH J TRUE 271 -Danio rerio functional IGHJ1-2 IGH J TRUE 272 -Danio rerio functional IGHJ2-1 IGH J TRUE 273 -Danio rerio functional IGHJ2-2 IGH J TRUE 274 -Danio rerio functional IGHJ2-3 IGH J TRUE 275 -Danio rerio functional IGHJ2-4 IGH J TRUE 276 -Danio rerio functional IGHJ2-5 IGH J TRUE 277 -Danio rerio functional IGHV1-1 IGH V TRUE 278 -Danio rerio functional IGHV1-2 IGH V TRUE 279 -Danio rerio functional IGHV1-3 IGH V TRUE 280 -Danio rerio functional IGHV1-4 IGH V TRUE 281 -Danio rerio functional IGHV1-5 IGH V TRUE 282 -Danio rerio functional IGHV10-1 IGH V TRUE 283 -Danio rerio functional IGHV11-1 IGH V TRUE 284 -Danio rerio functional IGHV11-2 IGH V TRUE 285 -Danio rerio non-functional IGHV12-1 IGH V FALSE 286 -Danio rerio functional IGHV13-2 IGH V TRUE 287 -Danio rerio functional IGHV14-1 IGH V TRUE 288 -Danio rerio functional IGHV2-1 IGH V TRUE 289 -Danio rerio functional IGHV2-2 IGH V TRUE 290 -Danio rerio functional IGHV2-3 IGH V TRUE 291 -Danio rerio functional IGHV3-2 IGH V TRUE 292 -Danio rerio functional IGHV4-1 IGH V TRUE 293 -Danio rerio functional IGHV4-2 IGH V TRUE 294 -Danio rerio functional IGHV4-3 IGH V TRUE 295 -Danio rerio functional IGHV4-5 IGH V TRUE 296 -Danio rerio functional IGHV4-6 IGH V TRUE 297 -Danio rerio functional IGHV4-7 IGH V TRUE 298 -Danio rerio functional IGHV4-8 IGH V TRUE 299 -Danio rerio functional IGHV4-9 IGH V TRUE 300 -Danio rerio functional IGHV5-1 IGH V TRUE 301 -Danio rerio non-functional IGHV5-2 IGH V FALSE 302 -Danio rerio functional IGHV5-3 IGH V TRUE 303 -Danio rerio non-functional IGHV5-4 IGH V FALSE 304 -Danio rerio non-functional IGHV5-5 IGH V FALSE 305 -Danio rerio functional IGHV5-7 IGH V TRUE 306 -Danio rerio non-functional IGHV5-8 IGH V FALSE 307 -Danio rerio functional IGHV6-1 IGH V TRUE 308 -Danio rerio non-functional IGHV6-2 IGH V FALSE 309 -Danio rerio functional IGHV7-1 IGH V TRUE 310 -Danio rerio functional IGHV8-1 IGH V TRUE 311 -Danio rerio functional IGHV8-2 IGH V TRUE 312 -Danio rerio functional IGHV8-3 IGH V TRUE 313 -Danio rerio functional IGHV8-4 IGH V TRUE 314 -Danio rerio functional IGHV9-1 IGH V TRUE 315 -Danio rerio functional IGHV9-2 IGH V TRUE 316 -Danio rerio functional IGHV9-3 IGH V TRUE 317 -Danio rerio functional IGHV9-4 IGH V TRUE 318 -Danio rerio functional IGIJ1 IGI J TRUE 319 -Danio rerio non-functional IGIJ2 IGI J FALSE 320 -Danio rerio functional IGIJ3 IGI J TRUE 321 -Danio rerio non-functional IGIJ4 IGI J FALSE 322 -Danio rerio functional IGIJ5 IGI J TRUE 323 -Danio rerio non-functional IGIJ6S1 IGI J FALSE 324 -Danio rerio non-functional IGIJ7S1 IGI J FALSE 325 -Danio rerio non-functional IGIJ8S1 IGI J FALSE 326 -Danio rerio non-functional IGIV1-1 IGI V FALSE 327 -Danio rerio functional IGIV1-2 IGI V TRUE 328 -Danio rerio non-functional IGIV2 IGI V FALSE 329 -Danio rerio functional IGIV3-1 IGI V TRUE 330 -Danio rerio non-functional IGIV3-2 IGI V FALSE 331 -Danio rerio functional IGIV3-3 IGI V TRUE 332 -Danio rerio non-functional IGIV4-1 IGI V FALSE 333 -Danio rerio non-functional IGIV4-2 IGI V FALSE 334 -Danio rerio non-functional IGIV6S1 IGI V FALSE 335 -Danio rerio non-functional IGIV6S2 IGI V FALSE 336 -Danio rerio non-functional IGIV7S1 IGI V FALSE 337 -Danio rerio non-functional IGIV7S2 IGI V FALSE 338 -Danio rerio non-functional IGIV8S1 IGI V FALSE 339 -Homo sapiens functional CD1D CD1 D TRUE 340 -Homo sapiens functional IGHD IGH D TRUE 341 -Homo sapiens functional IGHD1-1 IGH D TRUE 342 -Homo sapiens non-functional IGHD1-14 IGH D FALSE 343 -Homo sapiens functional IGHD1-20 IGH D TRUE 344 -Homo sapiens functional IGHD1-26 IGH D TRUE 345 -Homo sapiens functional IGHD1-7 IGH D TRUE 346 -Homo sapiens non-functional IGHD1/OR15-1a IGH D FALSE 347 -Homo sapiens non-functional IGHD1/OR15-1b IGH D FALSE 348 -Homo sapiens functional IGHD2-15 IGH D TRUE 349 -Homo sapiens functional IGHD2-2 IGH D TRUE 350 -Homo sapiens functional IGHD2-21 IGH D TRUE 351 -Homo sapiens functional IGHD2-8 IGH D TRUE 352 -Homo sapiens non-functional IGHD2/OR15-2a IGH D FALSE 353 -Homo sapiens non-functional IGHD2/OR15-2b IGH D FALSE 354 -Homo sapiens functional IGHD3-10 IGH D TRUE 355 -Homo sapiens functional IGHD3-16 IGH D TRUE 356 -Homo sapiens functional IGHD3-22 IGH D TRUE 357 -Homo sapiens functional IGHD3-3 IGH D TRUE 358 -Homo sapiens functional IGHD3-9 IGH D TRUE 359 -Homo sapiens non-functional IGHD3/OR15-3a IGH D FALSE 360 -Homo sapiens non-functional IGHD3/OR15-3b IGH D FALSE 361 -Homo sapiens non-functional IGHD4-11 IGH D FALSE 362 -Homo sapiens functional IGHD4-17 IGH D TRUE 363 -Homo sapiens non-functional IGHD4-23 IGH D FALSE 364 -Homo sapiens functional IGHD4-4 IGH D TRUE 365 -Homo sapiens non-functional IGHD4/OR15-4a IGH D FALSE 366 -Homo sapiens non-functional IGHD4/OR15-4b IGH D FALSE 367 -Homo sapiens functional IGHD5-12 IGH D TRUE 368 -Homo sapiens functional IGHD5-18 IGH D TRUE 369 -Homo sapiens non-functional IGHD5-24 IGH D FALSE 370 -Homo sapiens functional IGHD5-5 IGH D TRUE 371 -Homo sapiens non-functional IGHD5/OR15-5a IGH D FALSE 372 -Homo sapiens non-functional IGHD5/OR15-5b IGH D FALSE 373 -Homo sapiens functional IGHD6-13 IGH D TRUE 374 -Homo sapiens functional IGHD6-19 IGH D TRUE 375 -Homo sapiens functional IGHD6-25 IGH D TRUE 376 -Homo sapiens functional IGHD6-6 IGH D TRUE 377 -Homo sapiens functional IGHD7-27 IGH D TRUE 378 -Homo sapiens functional IGHJ1 IGH J TRUE 379 -Homo sapiens non-functional IGHJ1P IGH J FALSE 380 -Homo sapiens functional IGHJ2 IGH J TRUE 381 -Homo sapiens non-functional IGHJ2P IGH J FALSE 382 -Homo sapiens functional IGHJ3 IGH J TRUE 383 -Homo sapiens non-functional IGHJ3P IGH J FALSE 384 -Homo sapiens functional IGHJ4 IGH J TRUE 385 -Homo sapiens functional IGHJ5 IGH J TRUE 386 -Homo sapiens functional IGHJ6 IGH J TRUE 387 -Homo sapiens non-functional IGHV(II)-1-1 IGH V FALSE 388 -Homo sapiens non-functional IGHV(II)-15-1 IGH V FALSE 389 -Homo sapiens non-functional IGHV(II)-20-1 IGH V FALSE 390 -Homo sapiens non-functional IGHV(II)-22-1 IGH V FALSE 391 -Homo sapiens non-functional IGHV(II)-23-1 IGH V FALSE 392 -Homo sapiens non-functional IGHV(II)-23-2 IGH V FALSE 393 -Homo sapiens non-functional IGHV(II)-26-2 IGH V FALSE 394 -Homo sapiens non-functional IGHV(II)-28-1 IGH V FALSE 395 -Homo sapiens non-functional IGHV(II)-30-1 IGH V FALSE 396 -Homo sapiens non-functional IGHV(II)-31-1 IGH V FALSE 397 -Homo sapiens non-functional IGHV(II)-33-1 IGH V FALSE 398 -Homo sapiens non-functional IGHV(II)-40-1 IGH V FALSE 399 -Homo sapiens non-functional IGHV(II)-43-1 IGH V FALSE 400 -Homo sapiens non-functional IGHV(II)-43-1D IGH V FALSE 401 -Homo sapiens non-functional IGHV(II)-44-1D IGH V FALSE 402 -Homo sapiens non-functional IGHV(II)-44-2 IGH V FALSE 403 -Homo sapiens non-functional IGHV(II)-44-2D IGH V FALSE 404 -Homo sapiens non-functional IGHV(II)-44-3 IGH V FALSE 405 -Homo sapiens non-functional IGHV(II)-46-1 IGH V FALSE 406 -Homo sapiens non-functional IGHV(II)-49-1 IGH V FALSE 407 -Homo sapiens non-functional IGHV(II)-51-2 IGH V FALSE 408 -Homo sapiens non-functional IGHV(II)-53-1 IGH V FALSE 409 -Homo sapiens non-functional IGHV(II)-60-1 IGH V FALSE 410 -Homo sapiens non-functional IGHV(II)-62-1 IGH V FALSE 411 -Homo sapiens non-functional IGHV(II)-65-1 IGH V FALSE 412 -Homo sapiens non-functional IGHV(II)-67-1 IGH V FALSE 413 -Homo sapiens non-functional IGHV(II)-74-1 IGH V FALSE 414 -Homo sapiens non-functional IGHV(II)-78-1 IGH V FALSE 415 -Homo sapiens non-functional IGHV(III)-11-1 IGH V FALSE 416 -Homo sapiens non-functional IGHV(III)-13-1 IGH V FALSE 417 -Homo sapiens non-functional IGHV(III)-16-1 IGH V FALSE 418 -Homo sapiens non-functional IGHV(III)-2-1 IGH V FALSE 419 -Homo sapiens non-functional IGHV(III)-22-2 IGH V FALSE 420 -Homo sapiens non-functional IGHV(III)-25-1 IGH V FALSE 421 -Homo sapiens non-functional IGHV(III)-26-1 IGH V FALSE 422 -Homo sapiens non-functional IGHV(III)-38-1 IGH V FALSE 423 -Homo sapiens non-functional IGHV(III)-38-1D IGH V FALSE 424 -Homo sapiens non-functional IGHV(III)-44 IGH V FALSE 425 -Homo sapiens non-functional IGHV(III)-44D IGH V FALSE 426 -Homo sapiens non-functional IGHV(III)-47-1 IGH V FALSE 427 -Homo sapiens non-functional IGHV(III)-5-1 IGH V FALSE 428 -Homo sapiens non-functional IGHV(III)-5-2 IGH V FALSE 429 -Homo sapiens non-functional IGHV(III)-51-1 IGH V FALSE 430 -Homo sapiens non-functional IGHV(III)-67-2 IGH V FALSE 431 -Homo sapiens non-functional IGHV(III)-67-3 IGH V FALSE 432 -Homo sapiens non-functional IGHV(III)-67-4 IGH V FALSE 433 -Homo sapiens non-functional IGHV(III)-76-1 IGH V FALSE 434 -Homo sapiens non-functional IGHV(III)-82 IGH V FALSE 435 -Homo sapiens non-functional IGHV(IV)-44-1 IGH V FALSE 436 -Homo sapiens non-functional IGHV1-12 IGH V FALSE 437 -Homo sapiens non-functional IGHV1-14 IGH V FALSE 438 -Homo sapiens non-functional IGHV1-17 IGH V FALSE 439 -Homo sapiens functional IGHV1-18 IGH V TRUE 440 -Homo sapiens functional IGHV1-2 IGH V TRUE 441 -Homo sapiens functional IGHV1-24 IGH V TRUE 442 -Homo sapiens functional IGHV1-3 IGH V TRUE 443 -Homo sapiens non-functional IGHV1-38-4 IGH V FALSE 444 -Homo sapiens functional IGHV1-45 IGH V TRUE 445 -Homo sapiens functional IGHV1-46 IGH V TRUE 446 -Homo sapiens functional IGHV1-58 IGH V TRUE 447 -Homo sapiens non-functional IGHV1-67 IGH V FALSE 448 -Homo sapiens non-functional IGHV1-68 IGH V FALSE 449 -Homo sapiens functional IGHV1-69 IGH V TRUE 450 -Homo sapiens functional IGHV1-69-2 IGH V TRUE 451 -Homo sapiens functional IGHV1-69D IGH V TRUE 452 -Homo sapiens functional IGHV1-8 IGH V TRUE 453 -Homo sapiens non-functional IGHV1-NL1 IGH V FALSE 454 -Homo sapiens non-functional IGHV1/OR15-1 IGH V FALSE 455 -Homo sapiens non-functional IGHV1/OR15-2 IGH V FALSE 456 -Homo sapiens non-functional IGHV1/OR15-3 IGH V FALSE 457 -Homo sapiens non-functional IGHV1/OR15-4 IGH V FALSE 458 -Homo sapiens non-functional IGHV1/OR15-5 IGH V FALSE 459 -Homo sapiens non-functional IGHV1/OR15-6 IGH V FALSE 460 -Homo sapiens non-functional IGHV1/OR15-9 IGH V FALSE 461 -Homo sapiens non-functional IGHV1/OR16-1 IGH V FALSE 462 -Homo sapiens non-functional IGHV1/OR16-2 IGH V FALSE 463 -Homo sapiens non-functional IGHV1/OR16-3 IGH V FALSE 464 -Homo sapiens non-functional IGHV1/OR16-4 IGH V FALSE 465 -Homo sapiens non-functional IGHV1/OR21-1 IGH V FALSE 466 -Homo sapiens non-functional IGHV2-10 IGH V FALSE 467 -Homo sapiens functional IGHV2-26 IGH V TRUE 468 -Homo sapiens functional IGHV2-5 IGH V TRUE 469 -Homo sapiens functional IGHV2-70 IGH V TRUE 470 -Homo sapiens functional IGHV2-70D IGH V TRUE 471 -Homo sapiens non-functional IGHV2/OR16-5 IGH V FALSE 472 -Homo sapiens functional IGHV3-11 IGH V TRUE 473 -Homo sapiens functional IGHV3-13 IGH V TRUE 474 -Homo sapiens functional IGHV3-15 IGH V TRUE 475 -Homo sapiens non-functional IGHV3-16 IGH V FALSE 476 -Homo sapiens non-functional IGHV3-19 IGH V FALSE 477 -Homo sapiens functional IGHV3-20 IGH V TRUE 478 -Homo sapiens functional IGHV3-21 IGH V TRUE 479 -Homo sapiens non-functional IGHV3-22 IGH V FALSE 480 -Homo sapiens functional IGHV3-23 IGH V TRUE 481 -Homo sapiens functional IGHV3-23D IGH V TRUE 482 -Homo sapiens non-functional IGHV3-25 IGH V FALSE 483 -Homo sapiens non-functional IGHV3-29 IGH V FALSE 484 -Homo sapiens functional IGHV3-30 IGH V TRUE 485 -Homo sapiens non-functional IGHV3-30-2 IGH V FALSE 486 -Homo sapiens functional IGHV3-30-3 IGH V TRUE 487 -Homo sapiens functional IGHV3-30-5 IGH V TRUE 488 -Homo sapiens non-functional IGHV3-32 IGH V FALSE 489 -Homo sapiens functional IGHV3-33 IGH V TRUE 490 -Homo sapiens non-functional IGHV3-33-2 IGH V FALSE 491 -Homo sapiens non-functional IGHV3-35 IGH V FALSE 492 -Homo sapiens non-functional IGHV3-36 IGH V FALSE 493 -Homo sapiens non-functional IGHV3-37 IGH V FALSE 494 -Homo sapiens non-functional IGHV3-38 IGH V FALSE 495 -Homo sapiens non-functional IGHV3-38-3 IGH V FALSE 496 -Homo sapiens non-functional IGHV3-41 IGH V FALSE 497 -Homo sapiens non-functional IGHV3-42 IGH V FALSE 498 -Homo sapiens non-functional IGHV3-42D IGH V FALSE 499 -Homo sapiens functional IGHV3-43 IGH V TRUE 500 -Homo sapiens functional IGHV3-43D IGH V TRUE 501 -Homo sapiens non-functional IGHV3-47 IGH V FALSE 502 -Homo sapiens functional IGHV3-48 IGH V TRUE 503 -Homo sapiens functional IGHV3-49 IGH V TRUE 504 -Homo sapiens non-functional IGHV3-50 IGH V FALSE 505 -Homo sapiens non-functional IGHV3-52 IGH V FALSE 506 -Homo sapiens functional IGHV3-53 IGH V TRUE 507 -Homo sapiens non-functional IGHV3-54 IGH V FALSE 508 -Homo sapiens non-functional IGHV3-57 IGH V FALSE 509 -Homo sapiens non-functional IGHV3-6 IGH V FALSE 510 -Homo sapiens non-functional IGHV3-60 IGH V FALSE 511 -Homo sapiens non-functional IGHV3-62 IGH V FALSE 512 -Homo sapiens non-functional IGHV3-63 IGH V FALSE 513 -Homo sapiens functional IGHV3-64 IGH V TRUE 514 -Homo sapiens functional IGHV3-64D IGH V TRUE 515 -Homo sapiens non-functional IGHV3-65 IGH V FALSE 516 -Homo sapiens functional IGHV3-66 IGH V TRUE 517 -Homo sapiens non-functional IGHV3-69-1 IGH V FALSE 518 -Homo sapiens functional IGHV3-7 IGH V TRUE 519 -Homo sapiens non-functional IGHV3-71 IGH V FALSE 520 -Homo sapiens functional IGHV3-72 IGH V TRUE 521 -Homo sapiens functional IGHV3-73 IGH V TRUE 522 -Homo sapiens functional IGHV3-74 IGH V TRUE 523 -Homo sapiens non-functional IGHV3-75 IGH V FALSE 524 -Homo sapiens non-functional IGHV3-76 IGH V FALSE 525 -Homo sapiens non-functional IGHV3-79 IGH V FALSE 526 -Homo sapiens functional IGHV3-9 IGH V TRUE 527 -Homo sapiens functional IGHV3-NL1 IGH V TRUE 528 -Homo sapiens non-functional IGHV3/OR15-7 IGH V FALSE 529 -Homo sapiens non-functional IGHV3/OR16-10 IGH V FALSE 530 -Homo sapiens non-functional IGHV3/OR16-11 IGH V FALSE 531 -Homo sapiens non-functional IGHV3/OR16-12 IGH V FALSE 532 -Homo sapiens non-functional IGHV3/OR16-13 IGH V FALSE 533 -Homo sapiens non-functional IGHV3/OR16-14 IGH V FALSE 534 -Homo sapiens non-functional IGHV3/OR16-15 IGH V FALSE 535 -Homo sapiens non-functional IGHV3/OR16-16 IGH V FALSE 536 -Homo sapiens non-functional IGHV3/OR16-6 IGH V FALSE 537 -Homo sapiens non-functional IGHV3/OR16-7 IGH V FALSE 538 -Homo sapiens non-functional IGHV3/OR16-8 IGH V FALSE 539 -Homo sapiens non-functional IGHV3/OR16-9 IGH V FALSE 540 -Homo sapiens functional IGHV4-28 IGH V TRUE 541 -Homo sapiens functional IGHV4-30-1 IGH V TRUE 542 -Homo sapiens functional IGHV4-30-2 IGH V TRUE 543 -Homo sapiens functional IGHV4-30-4 IGH V TRUE 544 -Homo sapiens functional IGHV4-31 IGH V TRUE 545 -Homo sapiens functional IGHV4-34 IGH V TRUE 546 -Homo sapiens functional IGHV4-38-2 IGH V TRUE 547 -Homo sapiens functional IGHV4-39 IGH V TRUE 548 -Homo sapiens functional IGHV4-4 IGH V TRUE 549 -Homo sapiens non-functional IGHV4-55 IGH V FALSE 550 -Homo sapiens functional IGHV4-59 IGH V TRUE 551 -Homo sapiens functional IGHV4-61 IGH V TRUE 552 -Homo sapiens non-functional IGHV4-80 IGH V FALSE 553 -Homo sapiens non-functional IGHV4/OR15-8 IGH V FALSE 554 -Homo sapiens functional IGHV5-10-1 IGH V TRUE 555 -Homo sapiens functional IGHV5-51 IGH V TRUE 556 -Homo sapiens non-functional IGHV5-78 IGH V FALSE 557 -Homo sapiens functional IGHV6-1 IGH V TRUE 558 -Homo sapiens non-functional IGHV7-27 IGH V FALSE 559 -Homo sapiens non-functional IGHV7-34-1 IGH V FALSE 560 -Homo sapiens functional IGHV7-4-1 IGH V TRUE 561 -Homo sapiens non-functional IGHV7-40 IGH V FALSE 562 -Homo sapiens non-functional IGHV7-40D IGH V FALSE 563 -Homo sapiens non-functional IGHV7-56 IGH V FALSE 564 -Homo sapiens non-functional IGHV7-77 IGH V FALSE 565 -Homo sapiens non-functional IGHV7-81 IGH V FALSE 566 -Homo sapiens non-functional IGHV7-NL1 IGH V FALSE 567 -Homo sapiens functional IGKJ1 IGK J TRUE 568 -Homo sapiens functional IGKJ2 IGK J TRUE 569 -Homo sapiens functional IGKJ3 IGK J TRUE 570 -Homo sapiens functional IGKJ4 IGK J TRUE 571 -Homo sapiens functional IGKJ5 IGK J TRUE 572 -Homo sapiens functional IGKV1-12 IGK V TRUE 573 -Homo sapiens functional IGKV1-13 IGK V TRUE 574 -Homo sapiens functional IGKV1-16 IGK V TRUE 575 -Homo sapiens functional IGKV1-17 IGK V TRUE 576 -Homo sapiens non-functional IGKV1-22 IGK V FALSE 577 -Homo sapiens functional IGKV1-27 IGK V TRUE 578 -Homo sapiens non-functional IGKV1-32 IGK V FALSE 579 -Homo sapiens functional IGKV1-33 IGK V TRUE 580 -Homo sapiens non-functional IGKV1-35 IGK V FALSE 581 -Homo sapiens non-functional IGKV1-37 IGK V FALSE 582 -Homo sapiens functional IGKV1-39 IGK V TRUE 583 -Homo sapiens functional IGKV1-5 IGK V TRUE 584 -Homo sapiens functional IGKV1-6 IGK V TRUE 585 -Homo sapiens functional IGKV1-8 IGK V TRUE 586 -Homo sapiens functional IGKV1-9 IGK V TRUE 587 -Homo sapiens functional IGKV1-NL1 IGK V TRUE 588 -Homo sapiens non-functional IGKV1/OR-2 IGK V FALSE 589 -Homo sapiens non-functional IGKV1/OR-3 IGK V FALSE 590 -Homo sapiens non-functional IGKV1/OR-4 IGK V FALSE 591 -Homo sapiens non-functional IGKV1/OR1-1 IGK V FALSE 592 -Homo sapiens non-functional IGKV1/OR10-1 IGK V FALSE 593 -Homo sapiens non-functional IGKV1/OR15-118 IGK V FALSE 594 -Homo sapiens non-functional IGKV1/OR2-0 IGK V FALSE 595 -Homo sapiens non-functional IGKV1/OR2-1 IGK V FALSE 596 -Homo sapiens non-functional IGKV1/OR2-108 IGK V FALSE 597 -Homo sapiens non-functional IGKV1/OR2-11 IGK V FALSE 598 -Homo sapiens non-functional IGKV1/OR2-118 IGK V FALSE 599 -Homo sapiens non-functional IGKV1/OR2-2 IGK V FALSE 600 -Homo sapiens non-functional IGKV1/OR2-3 IGK V FALSE 601 -Homo sapiens non-functional IGKV1/OR2-6 IGK V FALSE 602 -Homo sapiens non-functional IGKV1/OR2-9 IGK V FALSE 603 -Homo sapiens non-functional IGKV1/OR22-1 IGK V FALSE 604 -Homo sapiens non-functional IGKV1/OR22-5 IGK V FALSE 605 -Homo sapiens non-functional IGKV1/OR9-1 IGK V FALSE 606 -Homo sapiens non-functional IGKV1/OR9-2 IGK V FALSE 607 -Homo sapiens non-functional IGKV1/ORY-1 IGK V FALSE 608 -Homo sapiens functional IGKV1D-12 IGK V TRUE 609 -Homo sapiens functional IGKV1D-13 IGK V TRUE 610 -Homo sapiens functional IGKV1D-16 IGK V TRUE 611 -Homo sapiens functional IGKV1D-17 IGK V TRUE 612 -Homo sapiens non-functional IGKV1D-22 IGK V FALSE 613 -Homo sapiens non-functional IGKV1D-27 IGK V FALSE 614 -Homo sapiens non-functional IGKV1D-32 IGK V FALSE 615 -Homo sapiens functional IGKV1D-33 IGK V TRUE 616 -Homo sapiens non-functional IGKV1D-35 IGK V FALSE 617 -Homo sapiens non-functional IGKV1D-37 IGK V FALSE 618 -Homo sapiens functional IGKV1D-39 IGK V TRUE 619 -Homo sapiens non-functional IGKV1D-42 IGK V FALSE 620 -Homo sapiens functional IGKV1D-43 IGK V TRUE 621 -Homo sapiens functional IGKV1D-8 IGK V TRUE 622 -Homo sapiens non-functional IGKV2-10 IGK V FALSE 623 -Homo sapiens non-functional IGKV2-14 IGK V FALSE 624 -Homo sapiens non-functional IGKV2-18 IGK V FALSE 625 -Homo sapiens non-functional IGKV2-19 IGK V FALSE 626 -Homo sapiens non-functional IGKV2-23 IGK V FALSE 627 -Homo sapiens functional IGKV2-24 IGK V TRUE 628 -Homo sapiens non-functional IGKV2-26 IGK V FALSE 629 -Homo sapiens functional IGKV2-28 IGK V TRUE 630 -Homo sapiens functional IGKV2-29 IGK V TRUE 631 -Homo sapiens functional IGKV2-30 IGK V TRUE 632 -Homo sapiens non-functional IGKV2-36 IGK V FALSE 633 -Homo sapiens non-functional IGKV2-38 IGK V FALSE 634 -Homo sapiens non-functional IGKV2-4 IGK V FALSE 635 -Homo sapiens functional IGKV2-40 IGK V TRUE 636 -Homo sapiens non-functional IGKV2/OR2-1 IGK V FALSE 637 -Homo sapiens non-functional IGKV2/OR2-10 IGK V FALSE 638 -Homo sapiens non-functional IGKV2/OR2-2 IGK V FALSE 639 -Homo sapiens non-functional IGKV2/OR2-4 IGK V FALSE 640 -Homo sapiens non-functional IGKV2/OR2-7 IGK V FALSE 641 -Homo sapiens non-functional IGKV2/OR2-7D IGK V FALSE 642 -Homo sapiens non-functional IGKV2/OR2-8 IGK V FALSE 643 -Homo sapiens non-functional IGKV2/OR22-3 IGK V FALSE 644 -Homo sapiens non-functional IGKV2/OR22-4 IGK V FALSE 645 -Homo sapiens non-functional IGKV2D-10 IGK V FALSE 646 -Homo sapiens non-functional IGKV2D-14 IGK V FALSE 647 -Homo sapiens non-functional IGKV2D-18 IGK V FALSE 648 -Homo sapiens non-functional IGKV2D-19 IGK V FALSE 649 -Homo sapiens non-functional IGKV2D-23 IGK V FALSE 650 -Homo sapiens non-functional IGKV2D-24 IGK V FALSE 651 -Homo sapiens functional IGKV2D-26 IGK V TRUE 652 -Homo sapiens functional IGKV2D-28 IGK V TRUE 653 -Homo sapiens functional IGKV2D-29 IGK V TRUE 654 -Homo sapiens functional IGKV2D-30 IGK V TRUE 655 -Homo sapiens non-functional IGKV2D-36 IGK V FALSE 656 -Homo sapiens non-functional IGKV2D-38 IGK V FALSE 657 -Homo sapiens functional IGKV2D-40 IGK V TRUE 658 -Homo sapiens functional IGKV3-11 IGK V TRUE 659 -Homo sapiens functional IGKV3-15 IGK V TRUE 660 -Homo sapiens functional IGKV3-20 IGK V TRUE 661 -Homo sapiens non-functional IGKV3-25 IGK V FALSE 662 -Homo sapiens non-functional IGKV3-31 IGK V FALSE 663 -Homo sapiens non-functional IGKV3-34 IGK V FALSE 664 -Homo sapiens non-functional IGKV3-7 IGK V FALSE 665 -Homo sapiens functional IGKV3-NL1 IGK V TRUE 666 -Homo sapiens functional IGKV3-NL2 IGK V TRUE 667 -Homo sapiens functional IGKV3-NL3 IGK V TRUE 668 -Homo sapiens functional IGKV3-NL4 IGK V TRUE 669 -Homo sapiens functional IGKV3-NL5 IGK V TRUE 670 -Homo sapiens non-functional IGKV3/OR2-268 IGK V FALSE 671 -Homo sapiens non-functional IGKV3/OR2-5 IGK V FALSE 672 -Homo sapiens non-functional IGKV3/OR22-2 IGK V FALSE 673 -Homo sapiens functional IGKV3D-11 IGK V TRUE 674 -Homo sapiens functional IGKV3D-15 IGK V TRUE 675 -Homo sapiens functional IGKV3D-20 IGK V TRUE 676 -Homo sapiens non-functional IGKV3D-25 IGK V FALSE 677 -Homo sapiens non-functional IGKV3D-31 IGK V FALSE 678 -Homo sapiens non-functional IGKV3D-34 IGK V FALSE 679 -Homo sapiens functional IGKV3D-7 IGK V TRUE 680 -Homo sapiens functional IGKV4-1 IGK V TRUE 681 -Homo sapiens functional IGKV5-2 IGK V TRUE 682 -Homo sapiens non-functional IGKV6-21 IGK V FALSE 683 -Homo sapiens non-functional IGKV6D-21 IGK V FALSE 684 -Homo sapiens non-functional IGKV6D-41 IGK V FALSE 685 -Homo sapiens non-functional IGKV7-3 IGK V FALSE 686 -Homo sapiens non-functional IGLJ-C/OR18 IGL J FALSE 687 -Homo sapiens functional IGLJ1 IGL J TRUE 688 -Homo sapiens functional IGLJ2 IGL J TRUE 689 -Homo sapiens non-functional IGLJ2A IGL J FALSE 690 -Homo sapiens non-functional IGLJ2B IGL J FALSE 691 -Homo sapiens non-functional IGLJ2C IGL J FALSE 692 -Homo sapiens non-functional IGLJ2D IGL J FALSE 693 -Homo sapiens functional IGLJ3 IGL J TRUE 694 -Homo sapiens non-functional IGLJ4 IGL J FALSE 695 -Homo sapiens non-functional IGLJ5 IGL J FALSE 696 -Homo sapiens functional IGLJ6 IGL J TRUE 697 -Homo sapiens functional IGLJ7 IGL J TRUE 698 -Homo sapiens non-functional IGLV(I)-20 IGL V FALSE 699 -Homo sapiens non-functional IGLV(I)-38 IGL V FALSE 700 -Homo sapiens non-functional IGLV(I)-42 IGL V FALSE 701 -Homo sapiens non-functional IGLV(I)-56 IGL V FALSE 702 -Homo sapiens non-functional IGLV(I)-63 IGL V FALSE 703 -Homo sapiens non-functional IGLV(I)-68 IGL V FALSE 704 -Homo sapiens non-functional IGLV(I)-70 IGL V FALSE 705 -Homo sapiens non-functional IGLV(IV)-53 IGL V FALSE 706 -Homo sapiens non-functional IGLV(IV)-59 IGL V FALSE 707 -Homo sapiens non-functional IGLV(IV)-64 IGL V FALSE 708 -Homo sapiens non-functional IGLV(IV)-65 IGL V FALSE 709 -Homo sapiens non-functional IGLV(IV)-66-1 IGL V FALSE 710 -Homo sapiens non-functional IGLV(IV)/OR22-1 IGL V FALSE 711 -Homo sapiens non-functional IGLV(IV)/OR22-2 IGL V FALSE 712 -Homo sapiens non-functional IGLV(V)-58 IGL V FALSE 713 -Homo sapiens non-functional IGLV(V)-66 IGL V FALSE 714 -Homo sapiens non-functional IGLV(VI)-22-1 IGL V FALSE 715 -Homo sapiens non-functional IGLV(VI)-25-1 IGL V FALSE 716 -Homo sapiens non-functional IGLV(VII)-41-1 IGL V FALSE 717 -Homo sapiens non-functional IGLV/OR8-2 IGL V FALSE 718 -Homo sapiens functional IGLV1-36 IGL V TRUE 719 -Homo sapiens functional IGLV1-40 IGL V TRUE 720 -Homo sapiens non-functional IGLV1-41 IGL V FALSE 721 -Homo sapiens functional IGLV1-44 IGL V TRUE 722 -Homo sapiens functional IGLV1-47 IGL V TRUE 723 -Homo sapiens non-functional IGLV1-50 IGL V FALSE 724 -Homo sapiens functional IGLV1-51 IGL V TRUE 725 -Homo sapiens non-functional IGLV1-62 IGL V FALSE 726 -Homo sapiens functional IGLV10-54 IGL V TRUE 727 -Homo sapiens non-functional IGLV10-67 IGL V FALSE 728 -Homo sapiens non-functional IGLV11-55 IGL V FALSE 729 -Homo sapiens functional IGLV2-11 IGL V TRUE 730 -Homo sapiens functional IGLV2-14 IGL V TRUE 731 -Homo sapiens functional IGLV2-18 IGL V TRUE 732 -Homo sapiens functional IGLV2-23 IGL V TRUE 733 -Homo sapiens non-functional IGLV2-28 IGL V FALSE 734 -Homo sapiens non-functional IGLV2-33 IGL V FALSE 735 -Homo sapiens non-functional IGLV2-34 IGL V FALSE 736 -Homo sapiens non-functional IGLV2-5 IGL V FALSE 737 -Homo sapiens functional IGLV2-8 IGL V TRUE 738 -Homo sapiens non-functional IGLV2-NL1 IGL V FALSE 739 -Homo sapiens functional IGLV3-1 IGL V TRUE 740 -Homo sapiens functional IGLV3-10 IGL V TRUE 741 -Homo sapiens functional IGLV3-12 IGL V TRUE 742 -Homo sapiens non-functional IGLV3-13 IGL V FALSE 743 -Homo sapiens non-functional IGLV3-15 IGL V FALSE 744 -Homo sapiens functional IGLV3-16 IGL V TRUE 745 -Homo sapiens non-functional IGLV3-17 IGL V FALSE 746 -Homo sapiens functional IGLV3-19 IGL V TRUE 747 -Homo sapiens non-functional IGLV3-2 IGL V FALSE 748 -Homo sapiens functional IGLV3-21 IGL V TRUE 749 -Homo sapiens functional IGLV3-22 IGL V TRUE 750 -Homo sapiens non-functional IGLV3-24 IGL V FALSE 751 -Homo sapiens functional IGLV3-25 IGL V TRUE 752 -Homo sapiens non-functional IGLV3-26 IGL V FALSE 753 -Homo sapiens functional IGLV3-27 IGL V TRUE 754 -Homo sapiens non-functional IGLV3-29 IGL V FALSE 755 -Homo sapiens non-functional IGLV3-30 IGL V FALSE 756 -Homo sapiens non-functional IGLV3-31 IGL V FALSE 757 -Homo sapiens non-functional IGLV3-32 IGL V FALSE 758 -Homo sapiens non-functional IGLV3-4 IGL V FALSE 759 -Homo sapiens non-functional IGLV3-6 IGL V FALSE 760 -Homo sapiens non-functional IGLV3-7 IGL V FALSE 761 -Homo sapiens functional IGLV3-9 IGL V TRUE 762 -Homo sapiens functional IGLV4-3 IGL V TRUE 763 -Homo sapiens functional IGLV4-60 IGL V TRUE 764 -Homo sapiens functional IGLV4-69 IGL V TRUE 765 -Homo sapiens functional IGLV5-37 IGL V TRUE 766 -Homo sapiens functional IGLV5-39 IGL V TRUE 767 -Homo sapiens functional IGLV5-45 IGL V TRUE 768 -Homo sapiens non-functional IGLV5-48 IGL V FALSE 769 -Homo sapiens functional IGLV5-52 IGL V TRUE 770 -Homo sapiens functional IGLV6-57 IGL V TRUE 771 -Homo sapiens non-functional IGLV7-35 IGL V FALSE 772 -Homo sapiens functional IGLV7-43 IGL V TRUE 773 -Homo sapiens functional IGLV7-46 IGL V TRUE 774 -Homo sapiens functional IGLV8-61 IGL V TRUE 775 -Homo sapiens non-functional IGLV8/OR8-1 IGL V FALSE 776 -Homo sapiens functional IGLV9-49 IGL V TRUE 777 -Homo sapiens non-functional TRAJ1 TRA J FALSE 778 -Homo sapiens functional TRAJ10 TRA J TRUE 779 -Homo sapiens functional TRAJ11 TRA J TRUE 780 -Homo sapiens functional TRAJ12 TRA J TRUE 781 -Homo sapiens functional TRAJ13 TRA J TRUE 782 -Homo sapiens functional TRAJ14 TRA J TRUE 783 -Homo sapiens functional TRAJ15 TRA J TRUE 784 -Homo sapiens functional TRAJ16 TRA J TRUE 785 -Homo sapiens functional TRAJ17 TRA J TRUE 786 -Homo sapiens functional TRAJ18 TRA J TRUE 787 -Homo sapiens non-functional TRAJ19 TRA J FALSE 788 -Homo sapiens non-functional TRAJ2 TRA J FALSE 789 -Homo sapiens functional TRAJ20 TRA J TRUE 790 -Homo sapiens functional TRAJ21 TRA J TRUE 791 -Homo sapiens functional TRAJ22 TRA J TRUE 792 -Homo sapiens functional TRAJ23 TRA J TRUE 793 -Homo sapiens functional TRAJ24 TRA J TRUE 794 -Homo sapiens non-functional TRAJ25 TRA J FALSE 795 -Homo sapiens functional TRAJ26 TRA J TRUE 796 -Homo sapiens functional TRAJ27 TRA J TRUE 797 -Homo sapiens functional TRAJ28 TRA J TRUE 798 -Homo sapiens functional TRAJ29 TRA J TRUE 799 -Homo sapiens functional TRAJ3 TRA J TRUE 800 -Homo sapiens functional TRAJ30 TRA J TRUE 801 -Homo sapiens functional TRAJ31 TRA J TRUE 802 -Homo sapiens functional TRAJ32 TRA J TRUE 803 -Homo sapiens functional TRAJ33 TRA J TRUE 804 -Homo sapiens functional TRAJ34 TRA J TRUE 805 -Homo sapiens non-functional TRAJ35 TRA J FALSE 806 -Homo sapiens functional TRAJ36 TRA J TRUE 807 -Homo sapiens functional TRAJ37 TRA J TRUE 808 -Homo sapiens functional TRAJ38 TRA J TRUE 809 -Homo sapiens functional TRAJ39 TRA J TRUE 810 -Homo sapiens functional TRAJ4 TRA J TRUE 811 -Homo sapiens functional TRAJ40 TRA J TRUE 812 -Homo sapiens functional TRAJ41 TRA J TRUE 813 -Homo sapiens functional TRAJ42 TRA J TRUE 814 -Homo sapiens functional TRAJ43 TRA J TRUE 815 -Homo sapiens functional TRAJ44 TRA J TRUE 816 -Homo sapiens functional TRAJ45 TRA J TRUE 817 -Homo sapiens functional TRAJ46 TRA J TRUE 818 -Homo sapiens functional TRAJ47 TRA J TRUE 819 -Homo sapiens functional TRAJ48 TRA J TRUE 820 -Homo sapiens functional TRAJ49 TRA J TRUE 821 -Homo sapiens functional TRAJ5 TRA J TRUE 822 -Homo sapiens functional TRAJ50 TRA J TRUE 823 -Homo sapiens non-functional TRAJ51 TRA J FALSE 824 -Homo sapiens functional TRAJ52 TRA J TRUE 825 -Homo sapiens functional TRAJ53 TRA J TRUE 826 -Homo sapiens functional TRAJ54 TRA J TRUE 827 -Homo sapiens non-functional TRAJ55 TRA J FALSE 828 -Homo sapiens functional TRAJ56 TRA J TRUE 829 -Homo sapiens functional TRAJ57 TRA J TRUE 830 -Homo sapiens non-functional TRAJ58 TRA J FALSE 831 -Homo sapiens non-functional TRAJ59 TRA J FALSE 832 -Homo sapiens functional TRAJ6 TRA J TRUE 833 -Homo sapiens non-functional TRAJ60 TRA J FALSE 834 -Homo sapiens non-functional TRAJ61 TRA J FALSE 835 -Homo sapiens functional TRAJ7 TRA J TRUE 836 -Homo sapiens functional TRAJ8 TRA J TRUE 837 -Homo sapiens functional TRAJ9 TRA J TRUE 838 -Homo sapiens functional TRAV1-1 TRA V TRUE 839 -Homo sapiens functional TRAV1-2 TRA V TRUE 840 -Homo sapiens functional TRAV10 TRA V TRUE 841 -Homo sapiens non-functional TRAV11 TRA V FALSE 842 -Homo sapiens functional TRAV12-1 TRA V TRUE 843 -Homo sapiens functional TRAV12-2 TRA V TRUE 844 -Homo sapiens functional TRAV12-3 TRA V TRUE 845 -Homo sapiens functional TRAV13-1 TRA V TRUE 846 -Homo sapiens functional TRAV13-2 TRA V TRUE 847 -Homo sapiens functional TRAV14/DV4 TRA V TRUE 848 -Homo sapiens non-functional TRAV15 TRA V FALSE 849 -Homo sapiens functional TRAV16 TRA V TRUE 850 -Homo sapiens functional TRAV17 TRA V TRUE 851 -Homo sapiens functional TRAV18 TRA V TRUE 852 -Homo sapiens functional TRAV19 TRA V TRUE 853 -Homo sapiens functional TRAV2 TRA V TRUE 854 -Homo sapiens functional TRAV20 TRA V TRUE 855 -Homo sapiens functional TRAV21 TRA V TRUE 856 -Homo sapiens functional TRAV22 TRA V TRUE 857 -Homo sapiens functional TRAV23/DV6 TRA V TRUE 858 -Homo sapiens functional TRAV24 TRA V TRUE 859 -Homo sapiens functional TRAV25 TRA V TRUE 860 -Homo sapiens functional TRAV26-1 TRA V TRUE 861 -Homo sapiens functional TRAV26-2 TRA V TRUE 862 -Homo sapiens functional TRAV27 TRA V TRUE 863 -Homo sapiens non-functional TRAV28 TRA V FALSE 864 -Homo sapiens functional TRAV29/DV5 TRA V TRUE 865 -Homo sapiens functional TRAV3 TRA V TRUE 866 -Homo sapiens functional TRAV30 TRA V TRUE 867 -Homo sapiens non-functional TRAV31 TRA V FALSE 868 -Homo sapiens non-functional TRAV32 TRA V FALSE 869 -Homo sapiens non-functional TRAV33 TRA V FALSE 870 -Homo sapiens functional TRAV34 TRA V TRUE 871 -Homo sapiens functional TRAV35 TRA V TRUE 872 -Homo sapiens functional TRAV36/DV7 TRA V TRUE 873 -Homo sapiens non-functional TRAV37 TRA V FALSE 874 -Homo sapiens functional TRAV38-1 TRA V TRUE 875 -Homo sapiens functional TRAV38-2/DV8 TRA V TRUE 876 -Homo sapiens functional TRAV39 TRA V TRUE 877 -Homo sapiens functional TRAV4 TRA V TRUE 878 -Homo sapiens functional TRAV40 TRA V TRUE 879 -Homo sapiens functional TRAV41 TRA V TRUE 880 -Homo sapiens functional TRAV5 TRA V TRUE 881 -Homo sapiens functional TRAV6 TRA V TRUE 882 -Homo sapiens functional TRAV7 TRA V TRUE 883 -Homo sapiens functional TRAV8-1 TRA V TRUE 884 -Homo sapiens functional TRAV8-2 TRA V TRUE 885 -Homo sapiens functional TRAV8-3 TRA V TRUE 886 -Homo sapiens functional TRAV8-4 TRA V TRUE 887 -Homo sapiens non-functional TRAV8-5 TRA V FALSE 888 -Homo sapiens functional TRAV8-6 TRA V TRUE 889 -Homo sapiens non-functional TRAV8-7 TRA V FALSE 890 -Homo sapiens functional TRAV9-1 TRA V TRUE 891 -Homo sapiens functional TRAV9-2 TRA V TRUE 892 -Homo sapiens functional TRBD1 TRB D TRUE 893 -Homo sapiens functional TRBD2 TRB D TRUE 894 -Homo sapiens functional TRBJ1-1 TRB J TRUE 895 -Homo sapiens functional TRBJ1-2 TRB J TRUE 896 -Homo sapiens functional TRBJ1-3 TRB J TRUE 897 -Homo sapiens functional TRBJ1-4 TRB J TRUE 898 -Homo sapiens functional TRBJ1-5 TRB J TRUE 899 -Homo sapiens functional TRBJ1-6 TRB J TRUE 900 -Homo sapiens functional TRBJ2-1 TRB J TRUE 901 -Homo sapiens functional TRBJ2-2 TRB J TRUE 902 -Homo sapiens non-functional TRBJ2-2P TRB J FALSE 903 -Homo sapiens functional TRBJ2-3 TRB J TRUE 904 -Homo sapiens functional TRBJ2-4 TRB J TRUE 905 -Homo sapiens functional TRBJ2-5 TRB J TRUE 906 -Homo sapiens functional TRBJ2-6 TRB J TRUE 907 -Homo sapiens functional TRBJ2-7 TRB J TRUE 908 -Homo sapiens non-functional TRBV1 TRB V FALSE 909 -Homo sapiens functional TRBV10-1 TRB V TRUE 910 -Homo sapiens functional TRBV10-2 TRB V TRUE 911 -Homo sapiens functional TRBV10-3 TRB V TRUE 912 -Homo sapiens functional TRBV11-1 TRB V TRUE 913 -Homo sapiens functional TRBV11-2 TRB V TRUE 914 -Homo sapiens functional TRBV11-3 TRB V TRUE 915 -Homo sapiens non-functional TRBV12-1 TRB V FALSE 916 -Homo sapiens non-functional TRBV12-2 TRB V FALSE 917 -Homo sapiens functional TRBV12-3 TRB V TRUE 918 -Homo sapiens functional TRBV12-4 TRB V TRUE 919 -Homo sapiens functional TRBV12-5 TRB V TRUE 920 -Homo sapiens functional TRBV13 TRB V TRUE 921 -Homo sapiens functional TRBV14 TRB V TRUE 922 -Homo sapiens functional TRBV15 TRB V TRUE 923 -Homo sapiens functional TRBV16 TRB V TRUE 924 -Homo sapiens non-functional TRBV17 TRB V FALSE 925 -Homo sapiens functional TRBV18 TRB V TRUE 926 -Homo sapiens functional TRBV19 TRB V TRUE 927 -Homo sapiens functional TRBV2 TRB V TRUE 928 -Homo sapiens functional TRBV20-1 TRB V TRUE 929 -Homo sapiens non-functional TRBV20/OR9-2 TRB V FALSE 930 -Homo sapiens non-functional TRBV21-1 TRB V FALSE 931 -Homo sapiens non-functional TRBV21/OR9-2 TRB V FALSE 932 -Homo sapiens non-functional TRBV22-1 TRB V FALSE 933 -Homo sapiens non-functional TRBV22/OR9-2 TRB V FALSE 934 -Homo sapiens non-functional TRBV23-1 TRB V FALSE 935 -Homo sapiens non-functional TRBV23/OR9-2 TRB V FALSE 936 -Homo sapiens functional TRBV24-1 TRB V TRUE 937 -Homo sapiens non-functional TRBV24/OR9-2 TRB V FALSE 938 -Homo sapiens functional TRBV25-1 TRB V TRUE 939 -Homo sapiens non-functional TRBV25/OR9-2 TRB V FALSE 940 -Homo sapiens non-functional TRBV26 TRB V FALSE 941 -Homo sapiens non-functional TRBV26/OR9-2 TRB V FALSE 942 -Homo sapiens functional TRBV27 TRB V TRUE 943 -Homo sapiens functional TRBV28 TRB V TRUE 944 -Homo sapiens functional TRBV29-1 TRB V TRUE 945 -Homo sapiens non-functional TRBV29/OR9-2 TRB V FALSE 946 -Homo sapiens functional TRBV3-1 TRB V TRUE 947 -Homo sapiens non-functional TRBV3-2 TRB V FALSE 948 -Homo sapiens functional TRBV30 TRB V TRUE 949 -Homo sapiens functional TRBV4-1 TRB V TRUE 950 -Homo sapiens functional TRBV4-2 TRB V TRUE 951 -Homo sapiens functional TRBV4-3 TRB V TRUE 952 -Homo sapiens functional TRBV5-1 TRB V TRUE 953 -Homo sapiens non-functional TRBV5-2 TRB V FALSE 954 -Homo sapiens non-functional TRBV5-3 TRB V FALSE 955 -Homo sapiens functional TRBV5-4 TRB V TRUE 956 -Homo sapiens functional TRBV5-5 TRB V TRUE 957 -Homo sapiens functional TRBV5-6 TRB V TRUE 958 -Homo sapiens non-functional TRBV5-7 TRB V FALSE 959 -Homo sapiens functional TRBV5-8 TRB V TRUE 960 -Homo sapiens functional TRBV6-1 TRB V TRUE 961 -Homo sapiens functional TRBV6-2 TRB V TRUE 962 -Homo sapiens functional TRBV6-3 TRB V TRUE 963 -Homo sapiens functional TRBV6-4 TRB V TRUE 964 -Homo sapiens functional TRBV6-5 TRB V TRUE 965 -Homo sapiens functional TRBV6-6 TRB V TRUE 966 -Homo sapiens non-functional TRBV6-7 TRB V FALSE 967 -Homo sapiens functional TRBV6-8 TRB V TRUE 968 -Homo sapiens functional TRBV6-9 TRB V TRUE 969 -Homo sapiens non-functional TRBV7-1 TRB V FALSE 970 -Homo sapiens functional TRBV7-2 TRB V TRUE 971 -Homo sapiens functional TRBV7-3 TRB V TRUE 972 -Homo sapiens functional TRBV7-4 TRB V TRUE 973 -Homo sapiens non-functional TRBV7-5 TRB V FALSE 974 -Homo sapiens functional TRBV7-6 TRB V TRUE 975 -Homo sapiens functional TRBV7-7 TRB V TRUE 976 -Homo sapiens functional TRBV7-8 TRB V TRUE 977 -Homo sapiens functional TRBV7-9 TRB V TRUE 978 -Homo sapiens non-functional TRBV8-1 TRB V FALSE 979 -Homo sapiens non-functional TRBV8-2 TRB V FALSE 980 -Homo sapiens functional TRBV9 TRB V TRUE 981 -Homo sapiens non-functional TRBVA TRB V FALSE 982 -Homo sapiens non-functional TRBVA/OR9-2 TRB V FALSE 983 -Homo sapiens non-functional TRBVB TRB V FALSE 984 -Homo sapiens non-functional TRBVC TRB V FALSE 985 -Homo sapiens functional TRDD1 TRD D TRUE 986 -Homo sapiens functional TRDD2 TRD D TRUE 987 -Homo sapiens functional TRDD3 TRD D TRUE 988 -Homo sapiens functional TRDJ1 TRD J TRUE 989 -Homo sapiens functional TRDJ2 TRD J TRUE 990 -Homo sapiens functional TRDJ3 TRD J TRUE 991 -Homo sapiens functional TRDJ4 TRD J TRUE 992 -Homo sapiens functional TRDV1 TRD V TRUE 993 -Homo sapiens functional TRDV2 TRD V TRUE 994 -Homo sapiens functional TRDV3 TRD V TRUE 995 -Homo sapiens functional TRGJ1 TRG J TRUE 996 -Homo sapiens functional TRGJ2 TRG J TRUE 997 -Homo sapiens functional TRGJP TRG J TRUE 998 -Homo sapiens functional TRGJP1 TRG J TRUE 999 -Homo sapiens functional TRGJP2 TRG J TRUE 1000 -Homo sapiens non-functional TRGV1 TRG V FALSE 1001 -Homo sapiens non-functional TRGV10 TRG V FALSE 1002 -Homo sapiens non-functional TRGV11 TRG V FALSE 1003 -Homo sapiens functional TRGV2 TRG V TRUE 1004 -Homo sapiens functional TRGV3 TRG V TRUE 1005 -Homo sapiens non-functional TRGV3P TRG V FALSE 1006 -Homo sapiens functional TRGV4 TRG V TRUE 1007 -Homo sapiens functional TRGV5 TRG V TRUE 1008 -Homo sapiens non-functional TRGV5P TRG V FALSE 1009 -Homo sapiens non-functional TRGV6 TRG V FALSE 1010 -Homo sapiens non-functional TRGV7 TRG V FALSE 1011 -Homo sapiens functional TRGV8 TRG V TRUE 1012 -Homo sapiens functional TRGV9 TRG V TRUE 1013 -Homo sapiens non-functional TRGVA TRG V FALSE 1014 -Homo sapiens non-functional TRGVB TRG V FALSE 1015 -Macaca mulatta non-functional IGHD IGH D FALSE 1016 -Macaca mulatta functional IGHD1-1 IGH D TRUE 1017 -Macaca mulatta functional IGHD1-2 IGH D TRUE 1018 -Macaca mulatta functional IGHD1-3 IGH D TRUE 1019 -Macaca mulatta functional IGHD1-4 IGH D TRUE 1020 -Macaca mulatta non-functional IGHD1-5 IGH D FALSE 1021 -Macaca mulatta non-functional IGHD1-6 IGH D FALSE 1022 -Macaca mulatta non-functional IGHD1-7 IGH D FALSE 1023 -Macaca mulatta functional IGHD1-8 IGH D TRUE 1024 -Macaca mulatta functional IGHD2-1 IGH D TRUE 1025 -Macaca mulatta functional IGHD2-2 IGH D TRUE 1026 -Macaca mulatta functional IGHD2-3 IGH D TRUE 1027 -Macaca mulatta functional IGHD2-4 IGH D TRUE 1028 -Macaca mulatta functional IGHD2-5 IGH D TRUE 1029 -Macaca mulatta functional IGHD2-6 IGH D TRUE 1030 -Macaca mulatta non-functional IGHD3-1 IGH D FALSE 1031 -Macaca mulatta non-functional IGHD3-2 IGH D FALSE 1032 -Macaca mulatta functional IGHD3-3 IGH D TRUE 1033 -Macaca mulatta functional IGHD3-4 IGH D TRUE 1034 -Macaca mulatta non-functional IGHD4-1 IGH D FALSE 1035 -Macaca mulatta functional IGHD4-2 IGH D TRUE 1036 -Macaca mulatta functional IGHD4-3 IGH D TRUE 1037 -Macaca mulatta functional IGHD4-4 IGH D TRUE 1038 -Macaca mulatta non-functional IGHD5-1 IGH D FALSE 1039 -Macaca mulatta functional IGHD5-2 IGH D TRUE 1040 -Macaca mulatta non-functional IGHD5-3 IGH D FALSE 1041 -Macaca mulatta functional IGHD6-1 IGH D TRUE 1042 -Macaca mulatta functional IGHD6-2 IGH D TRUE 1043 -Macaca mulatta functional IGHD6-3 IGH D TRUE 1044 -Macaca mulatta functional IGHD6-4 IGH D TRUE 1045 -Macaca mulatta functional IGHD6-5 IGH D TRUE 1046 -Macaca mulatta functional IGHD6-6 IGH D TRUE 1047 -Macaca mulatta functional IGHD7-1 IGH D TRUE 1048 -Macaca mulatta functional IGHJ1 IGH J TRUE 1049 -Macaca mulatta non-functional IGHJ2 IGH J FALSE 1050 -Macaca mulatta functional IGHJ3 IGH J TRUE 1051 -Macaca mulatta functional IGHJ4 IGH J TRUE 1052 -Macaca mulatta functional IGHJ5-1 IGH J TRUE 1053 -Macaca mulatta functional IGHJ5-2 IGH J TRUE 1054 -Macaca mulatta functional IGHJ6 IGH J TRUE 1055 -Macaca mulatta non-functional IGHV(II)-1 IGH V FALSE 1056 -Macaca mulatta non-functional IGHV(II)-10 IGH V FALSE 1057 -Macaca mulatta non-functional IGHV(II)-11 IGH V FALSE 1058 -Macaca mulatta non-functional IGHV(II)-12 IGH V FALSE 1059 -Macaca mulatta non-functional IGHV(II)-2 IGH V FALSE 1060 -Macaca mulatta non-functional IGHV(II)-3 IGH V FALSE 1061 -Macaca mulatta non-functional IGHV(II)-4 IGH V FALSE 1062 -Macaca mulatta non-functional IGHV(II)-5 IGH V FALSE 1063 -Macaca mulatta non-functional IGHV(II)-6 IGH V FALSE 1064 -Macaca mulatta non-functional IGHV(II)-7 IGH V FALSE 1065 -Macaca mulatta non-functional IGHV(II)-8 IGH V FALSE 1066 -Macaca mulatta non-functional IGHV(II)-9 IGH V FALSE 1067 -Macaca mulatta non-functional IGHV(III)-1 IGH V FALSE 1068 -Macaca mulatta non-functional IGHV(III)-2 IGH V FALSE 1069 -Macaca mulatta non-functional IGHV(III)-3 IGH V FALSE 1070 -Macaca mulatta non-functional IGHV(III)-4 IGH V FALSE 1071 -Macaca mulatta functional IGHV1-1 IGH V TRUE 1072 -Macaca mulatta non-functional IGHV1-2 IGH V FALSE 1073 -Macaca mulatta functional IGHV2-1 IGH V TRUE 1074 -Macaca mulatta non-functional IGHV2-2 IGH V FALSE 1075 -Macaca mulatta non-functional IGHV3-1 IGH V FALSE 1076 -Macaca mulatta functional IGHV3-10 IGH V TRUE 1077 -Macaca mulatta functional IGHV3-11 IGH V TRUE 1078 -Macaca mulatta functional IGHV3-12 IGH V TRUE 1079 -Macaca mulatta functional IGHV3-13 IGH V TRUE 1080 -Macaca mulatta functional IGHV3-14 IGH V TRUE 1081 -Macaca mulatta non-functional IGHV3-15 IGH V FALSE 1082 -Macaca mulatta non-functional IGHV3-16 IGH V FALSE 1083 -Macaca mulatta non-functional IGHV3-17 IGH V FALSE 1084 -Macaca mulatta functional IGHV3-18 IGH V TRUE 1085 -Macaca mulatta non-functional IGHV3-19 IGH V FALSE 1086 -Macaca mulatta non-functional IGHV3-2 IGH V FALSE 1087 -Macaca mulatta functional IGHV3-20 IGH V TRUE 1088 -Macaca mulatta functional IGHV3-21 IGH V TRUE 1089 -Macaca mulatta functional IGHV3-22 IGH V TRUE 1090 -Macaca mulatta non-functional IGHV3-23 IGH V FALSE 1091 -Macaca mulatta non-functional IGHV3-24 IGH V FALSE 1092 -Macaca mulatta non-functional IGHV3-3 IGH V FALSE 1093 -Macaca mulatta non-functional IGHV3-4 IGH V FALSE 1094 -Macaca mulatta functional IGHV3-5 IGH V TRUE 1095 -Macaca mulatta functional IGHV3-6 IGH V TRUE 1096 -Macaca mulatta functional IGHV3-7 IGH V TRUE 1097 -Macaca mulatta non-functional IGHV3-8 IGH V FALSE 1098 -Macaca mulatta functional IGHV3-9 IGH V TRUE 1099 -Macaca mulatta non-functional IGHV4-1 IGH V FALSE 1100 -Macaca mulatta functional IGHV4-2 IGH V TRUE 1101 -Macaca mulatta non-functional IGHV4-3 IGH V FALSE 1102 -Macaca mulatta non-functional IGHV4-4 IGH V FALSE 1103 -Macaca mulatta non-functional IGHV5-1 IGH V FALSE 1104 -Macaca mulatta functional IGHV5-2 IGH V TRUE 1105 -Macaca mulatta functional IGHV6-1 IGH V TRUE 1106 -Macaca mulatta functional IGHV7-1 IGH V TRUE 1107 -Macaca mulatta functional IGKJ1 IGK J TRUE 1108 -Macaca mulatta functional IGKJ2 IGK J TRUE 1109 -Macaca mulatta functional IGKJ3 IGK J TRUE 1110 -Macaca mulatta functional IGKJ4 IGK J TRUE 1111 -Macaca mulatta non-functional IGKJ5 IGK J FALSE 1112 -Macaca mulatta non-functional IGKV1-1 IGK V FALSE 1113 -Macaca mulatta functional IGKV1-10 IGK V TRUE 1114 -Macaca mulatta functional IGKV1-11 IGK V TRUE 1115 -Macaca mulatta non-functional IGKV1-12 IGK V FALSE 1116 -Macaca mulatta functional IGKV1-13 IGK V TRUE 1117 -Macaca mulatta functional IGKV1-14 IGK V TRUE 1118 -Macaca mulatta functional IGKV1-15 IGK V TRUE 1119 -Macaca mulatta non-functional IGKV1-16 IGK V FALSE 1120 -Macaca mulatta functional IGKV1-17 IGK V TRUE 1121 -Macaca mulatta functional IGKV1-18 IGK V TRUE 1122 -Macaca mulatta non-functional IGKV1-19 IGK V FALSE 1123 -Macaca mulatta functional IGKV1-2 IGK V TRUE 1124 -Macaca mulatta functional IGKV1-20 IGK V TRUE 1125 -Macaca mulatta functional IGKV1-21 IGK V TRUE 1126 -Macaca mulatta functional IGKV1-22 IGK V TRUE 1127 -Macaca mulatta functional IGKV1-23 IGK V TRUE 1128 -Macaca mulatta non-functional IGKV1-3 IGK V FALSE 1129 -Macaca mulatta non-functional IGKV1-4 IGK V FALSE 1130 -Macaca mulatta functional IGKV1-5 IGK V TRUE 1131 -Macaca mulatta non-functional IGKV1-6 IGK V FALSE 1132 -Macaca mulatta non-functional IGKV1-7 IGK V FALSE 1133 -Macaca mulatta functional IGKV1-8 IGK V TRUE 1134 -Macaca mulatta functional IGKV1-9 IGK V TRUE 1135 -Macaca mulatta functional IGKV1S1 IGK V TRUE 1136 -Macaca mulatta functional IGKV1S10 IGK V TRUE 1137 -Macaca mulatta functional IGKV1S11 IGK V TRUE 1138 -Macaca mulatta functional IGKV1S12 IGK V TRUE 1139 -Macaca mulatta functional IGKV1S13 IGK V TRUE 1140 -Macaca mulatta functional IGKV1S14 IGK V TRUE 1141 -Macaca mulatta functional IGKV1S15 IGK V TRUE 1142 -Macaca mulatta functional IGKV1S16 IGK V TRUE 1143 -Macaca mulatta functional IGKV1S17 IGK V TRUE 1144 -Macaca mulatta non-functional IGKV1S18 IGK V FALSE 1145 -Macaca mulatta functional IGKV1S19 IGK V TRUE 1146 -Macaca mulatta functional IGKV1S2 IGK V TRUE 1147 -Macaca mulatta functional IGKV1S20 IGK V TRUE 1148 -Macaca mulatta functional IGKV1S21 IGK V TRUE 1149 -Macaca mulatta functional IGKV1S22 IGK V TRUE 1150 -Macaca mulatta functional IGKV1S23 IGK V TRUE 1151 -Macaca mulatta functional IGKV1S24 IGK V TRUE 1152 -Macaca mulatta functional IGKV1S25 IGK V TRUE 1153 -Macaca mulatta functional IGKV1S26 IGK V TRUE 1154 -Macaca mulatta functional IGKV1S27 IGK V TRUE 1155 -Macaca mulatta functional IGKV1S28 IGK V TRUE 1156 -Macaca mulatta functional IGKV1S3 IGK V TRUE 1157 -Macaca mulatta functional IGKV1S4 IGK V TRUE 1158 -Macaca mulatta functional IGKV1S5 IGK V TRUE 1159 -Macaca mulatta functional IGKV1S6 IGK V TRUE 1160 -Macaca mulatta functional IGKV1S7 IGK V TRUE 1161 -Macaca mulatta functional IGKV1S8 IGK V TRUE 1162 -Macaca mulatta functional IGKV1S9 IGK V TRUE 1163 -Macaca mulatta functional IGKV2-1 IGK V TRUE 1164 -Macaca mulatta non-functional IGKV2-10 IGK V FALSE 1165 -Macaca mulatta non-functional IGKV2-11 IGK V FALSE 1166 -Macaca mulatta non-functional IGKV2-12 IGK V FALSE 1167 -Macaca mulatta non-functional IGKV2-13 IGK V FALSE 1168 -Macaca mulatta functional IGKV2-14 IGK V TRUE 1169 -Macaca mulatta non-functional IGKV2-15 IGK V FALSE 1170 -Macaca mulatta non-functional IGKV2-16 IGK V FALSE 1171 -Macaca mulatta non-functional IGKV2-2 IGK V FALSE 1172 -Macaca mulatta functional IGKV2-3 IGK V TRUE 1173 -Macaca mulatta non-functional IGKV2-4 IGK V FALSE 1174 -Macaca mulatta non-functional IGKV2-5 IGK V FALSE 1175 -Macaca mulatta non-functional IGKV2-6 IGK V FALSE 1176 -Macaca mulatta non-functional IGKV2-7 IGK V FALSE 1177 -Macaca mulatta non-functional IGKV2-8 IGK V FALSE 1178 -Macaca mulatta non-functional IGKV2-9 IGK V FALSE 1179 -Macaca mulatta functional IGKV2S1 IGK V TRUE 1180 -Macaca mulatta functional IGKV2S10 IGK V TRUE 1181 -Macaca mulatta functional IGKV2S11 IGK V TRUE 1182 -Macaca mulatta functional IGKV2S12 IGK V TRUE 1183 -Macaca mulatta functional IGKV2S13 IGK V TRUE 1184 -Macaca mulatta non-functional IGKV2S14 IGK V FALSE 1185 -Macaca mulatta functional IGKV2S15 IGK V TRUE 1186 -Macaca mulatta non-functional IGKV2S16 IGK V FALSE 1187 -Macaca mulatta functional IGKV2S17 IGK V TRUE 1188 -Macaca mulatta functional IGKV2S18 IGK V TRUE 1189 -Macaca mulatta functional IGKV2S19 IGK V TRUE 1190 -Macaca mulatta functional IGKV2S2 IGK V TRUE 1191 -Macaca mulatta functional IGKV2S20 IGK V TRUE 1192 -Macaca mulatta functional IGKV2S3 IGK V TRUE 1193 -Macaca mulatta functional IGKV2S4 IGK V TRUE 1194 -Macaca mulatta functional IGKV2S5 IGK V TRUE 1195 -Macaca mulatta non-functional IGKV2S6 IGK V FALSE 1196 -Macaca mulatta non-functional IGKV2S7 IGK V FALSE 1197 -Macaca mulatta functional IGKV2S8 IGK V TRUE 1198 -Macaca mulatta functional IGKV2S9 IGK V TRUE 1199 -Macaca mulatta functional IGKV3-1 IGK V TRUE 1200 -Macaca mulatta non-functional IGKV3-10 IGK V FALSE 1201 -Macaca mulatta non-functional IGKV3-11 IGK V FALSE 1202 -Macaca mulatta functional IGKV3-2 IGK V TRUE 1203 -Macaca mulatta functional IGKV3-3 IGK V TRUE 1204 -Macaca mulatta non-functional IGKV3-4 IGK V FALSE 1205 -Macaca mulatta functional IGKV3-5 IGK V TRUE 1206 -Macaca mulatta non-functional IGKV3-6 IGK V FALSE 1207 -Macaca mulatta non-functional IGKV3-7 IGK V FALSE 1208 -Macaca mulatta non-functional IGKV3-8 IGK V FALSE 1209 -Macaca mulatta functional IGKV3-9 IGK V TRUE 1210 -Macaca mulatta functional IGKV3S1 IGK V TRUE 1211 -Macaca mulatta functional IGKV3S10 IGK V TRUE 1212 -Macaca mulatta functional IGKV3S11 IGK V TRUE 1213 -Macaca mulatta functional IGKV3S2 IGK V TRUE 1214 -Macaca mulatta functional IGKV3S3 IGK V TRUE 1215 -Macaca mulatta functional IGKV3S4 IGK V TRUE 1216 -Macaca mulatta functional IGKV3S5 IGK V TRUE 1217 -Macaca mulatta functional IGKV3S6 IGK V TRUE 1218 -Macaca mulatta functional IGKV3S7 IGK V TRUE 1219 -Macaca mulatta functional IGKV3S8 IGK V TRUE 1220 -Macaca mulatta functional IGKV3S9 IGK V TRUE 1221 -Macaca mulatta non-functional IGKV4-1 IGK V FALSE 1222 -Macaca mulatta non-functional IGKV4-2 IGK V FALSE 1223 -Macaca mulatta functional IGKV4-3 IGK V TRUE 1224 -Macaca mulatta functional IGKV5-1 IGK V TRUE 1225 -Macaca mulatta functional IGKV5-2 IGK V TRUE 1226 -Macaca mulatta functional IGKV6-1 IGK V TRUE 1227 -Macaca mulatta functional IGKV6-2 IGK V TRUE 1228 -Macaca mulatta non-functional IGKV6-3 IGK V FALSE 1229 -Macaca mulatta functional IGKV7-1 IGK V TRUE 1230 -Macaca mulatta functional IGLJ1 IGL J TRUE 1231 -Macaca mulatta functional IGLJ2 IGL J TRUE 1232 -Macaca mulatta functional IGLJ3 IGL J TRUE 1233 -Macaca mulatta non-functional IGLJ4 IGL J FALSE 1234 -Macaca mulatta functional IGLJ5 IGL J TRUE 1235 -Macaca mulatta functional IGLJ6 IGL J TRUE 1236 -Macaca mulatta non-functional IGLV(I)-1 IGL V FALSE 1237 -Macaca mulatta non-functional IGLV(I)-10 IGL V FALSE 1238 -Macaca mulatta non-functional IGLV(I)-11 IGL V FALSE 1239 -Macaca mulatta non-functional IGLV(I)-12 IGL V FALSE 1240 -Macaca mulatta non-functional IGLV(I)-2 IGL V FALSE 1241 -Macaca mulatta non-functional IGLV(I)-3 IGL V FALSE 1242 -Macaca mulatta non-functional IGLV(I)-4 IGL V FALSE 1243 -Macaca mulatta non-functional IGLV(I)-5 IGL V FALSE 1244 -Macaca mulatta non-functional IGLV(I)-6 IGL V FALSE 1245 -Macaca mulatta non-functional IGLV(I)-7 IGL V FALSE 1246 -Macaca mulatta non-functional IGLV(I)-8 IGL V FALSE 1247 -Macaca mulatta non-functional IGLV(I)-9 IGL V FALSE 1248 -Macaca mulatta non-functional IGLV(II)-1 IGL V FALSE 1249 -Macaca mulatta non-functional IGLV(II)-2 IGL V FALSE 1250 -Macaca mulatta non-functional IGLV(III)-1 IGL V FALSE 1251 -Macaca mulatta non-functional IGLV(III)-2 IGL V FALSE 1252 -Macaca mulatta non-functional IGLV(III)-3 IGL V FALSE 1253 -Macaca mulatta non-functional IGLV(III)-4 IGL V FALSE 1254 -Macaca mulatta non-functional IGLV(IV)-1 IGL V FALSE 1255 -Macaca mulatta non-functional IGLV(IV)-2 IGL V FALSE 1256 -Macaca mulatta non-functional IGLV(IV)-3 IGL V FALSE 1257 -Macaca mulatta non-functional IGLV(V)-1 IGL V FALSE 1258 -Macaca mulatta non-functional IGLV(V)-2 IGL V FALSE 1259 -Macaca mulatta non-functional IGLV1-1 IGL V FALSE 1260 -Macaca mulatta functional IGLV1-10 IGL V TRUE 1261 -Macaca mulatta functional IGLV1-11 IGL V TRUE 1262 -Macaca mulatta functional IGLV1-12 IGL V TRUE 1263 -Macaca mulatta functional IGLV1-13 IGL V TRUE 1264 -Macaca mulatta functional IGLV1-14 IGL V TRUE 1265 -Macaca mulatta functional IGLV1-15 IGL V TRUE 1266 -Macaca mulatta non-functional IGLV1-2 IGL V FALSE 1267 -Macaca mulatta non-functional IGLV1-3 IGL V FALSE 1268 -Macaca mulatta non-functional IGLV1-4 IGL V FALSE 1269 -Macaca mulatta non-functional IGLV1-5 IGL V FALSE 1270 -Macaca mulatta functional IGLV1-6 IGL V TRUE 1271 -Macaca mulatta functional IGLV1-7 IGL V TRUE 1272 -Macaca mulatta functional IGLV1-8 IGL V TRUE 1273 -Macaca mulatta functional IGLV1-9 IGL V TRUE 1274 -Macaca mulatta functional IGLV10-1 IGL V TRUE 1275 -Macaca mulatta non-functional IGLV10-2 IGL V FALSE 1276 -Macaca mulatta functional IGLV11-1 IGL V TRUE 1277 -Macaca mulatta functional IGLV1S1 IGL V TRUE 1278 -Macaca mulatta functional IGLV1S2 IGL V TRUE 1279 -Macaca mulatta functional IGLV1S3 IGL V TRUE 1280 -Macaca mulatta functional IGLV1S4 IGL V TRUE 1281 -Macaca mulatta non-functional IGLV1S5 IGL V FALSE 1282 -Macaca mulatta functional IGLV1S6 IGL V TRUE 1283 -Macaca mulatta functional IGLV1S7 IGL V TRUE 1284 -Macaca mulatta non-functional IGLV2-1 IGL V FALSE 1285 -Macaca mulatta functional IGLV2-10 IGL V TRUE 1286 -Macaca mulatta non-functional IGLV2-11 IGL V FALSE 1287 -Macaca mulatta non-functional IGLV2-2 IGL V FALSE 1288 -Macaca mulatta functional IGLV2-3 IGL V TRUE 1289 -Macaca mulatta non-functional IGLV2-4 IGL V FALSE 1290 -Macaca mulatta non-functional IGLV2-5 IGL V FALSE 1291 -Macaca mulatta non-functional IGLV2-6 IGL V FALSE 1292 -Macaca mulatta functional IGLV2-7 IGL V TRUE 1293 -Macaca mulatta functional IGLV2-8 IGL V TRUE 1294 -Macaca mulatta functional IGLV2-9 IGL V TRUE 1295 -Macaca mulatta functional IGLV2S1 IGL V TRUE 1296 -Macaca mulatta functional IGLV2S10 IGL V TRUE 1297 -Macaca mulatta functional IGLV2S11 IGL V TRUE 1298 -Macaca mulatta functional IGLV2S2 IGL V TRUE 1299 -Macaca mulatta functional IGLV2S3 IGL V TRUE 1300 -Macaca mulatta functional IGLV2S4 IGL V TRUE 1301 -Macaca mulatta functional IGLV2S5 IGL V TRUE 1302 -Macaca mulatta functional IGLV2S6 IGL V TRUE 1303 -Macaca mulatta functional IGLV2S7 IGL V TRUE 1304 -Macaca mulatta non-functional IGLV2S8 IGL V FALSE 1305 -Macaca mulatta functional IGLV2S9 IGL V TRUE 1306 -Macaca mulatta functional IGLV3-1 IGL V TRUE 1307 -Macaca mulatta functional IGLV3-10 IGL V TRUE 1308 -Macaca mulatta non-functional IGLV3-11 IGL V FALSE 1309 -Macaca mulatta functional IGLV3-12 IGL V TRUE 1310 -Macaca mulatta non-functional IGLV3-13 IGL V FALSE 1311 -Macaca mulatta non-functional IGLV3-14 IGL V FALSE 1312 -Macaca mulatta non-functional IGLV3-15 IGL V FALSE 1313 -Macaca mulatta non-functional IGLV3-16 IGL V FALSE 1314 -Macaca mulatta non-functional IGLV3-17 IGL V FALSE 1315 -Macaca mulatta non-functional IGLV3-18 IGL V FALSE 1316 -Macaca mulatta functional IGLV3-19 IGL V TRUE 1317 -Macaca mulatta functional IGLV3-2 IGL V TRUE 1318 -Macaca mulatta non-functional IGLV3-20 IGL V FALSE 1319 -Macaca mulatta functional IGLV3-3 IGL V TRUE 1320 -Macaca mulatta functional IGLV3-4 IGL V TRUE 1321 -Macaca mulatta functional IGLV3-5 IGL V TRUE 1322 -Macaca mulatta functional IGLV3-6 IGL V TRUE 1323 -Macaca mulatta non-functional IGLV3-7 IGL V FALSE 1324 -Macaca mulatta non-functional IGLV3-8 IGL V FALSE 1325 -Macaca mulatta non-functional IGLV3-9 IGL V FALSE 1326 -Macaca mulatta functional IGLV3S1 IGL V TRUE 1327 -Macaca mulatta functional IGLV3S10 IGL V TRUE 1328 -Macaca mulatta functional IGLV3S11 IGL V TRUE 1329 -Macaca mulatta functional IGLV3S12 IGL V TRUE 1330 -Macaca mulatta functional IGLV3S13 IGL V TRUE 1331 -Macaca mulatta functional IGLV3S14 IGL V TRUE 1332 -Macaca mulatta functional IGLV3S15 IGL V TRUE 1333 -Macaca mulatta functional IGLV3S16 IGL V TRUE 1334 -Macaca mulatta non-functional IGLV3S18 IGL V FALSE 1335 -Macaca mulatta non-functional IGLV3S19 IGL V FALSE 1336 -Macaca mulatta functional IGLV3S2 IGL V TRUE 1337 -Macaca mulatta non-functional IGLV3S20 IGL V FALSE 1338 -Macaca mulatta functional IGLV3S3 IGL V TRUE 1339 -Macaca mulatta functional IGLV3S4 IGL V TRUE 1340 -Macaca mulatta functional IGLV3S5 IGL V TRUE 1341 -Macaca mulatta functional IGLV3S6 IGL V TRUE 1342 -Macaca mulatta functional IGLV3S7 IGL V TRUE 1343 -Macaca mulatta functional IGLV3S8 IGL V TRUE 1344 -Macaca mulatta functional IGLV3S9 IGL V TRUE 1345 -Macaca mulatta non-functional IGLV4-1 IGL V FALSE 1346 -Macaca mulatta functional IGLV4-2 IGL V TRUE 1347 -Macaca mulatta non-functional IGLV4-3 IGL V FALSE 1348 -Macaca mulatta functional IGLV4-4 IGL V TRUE 1349 -Macaca mulatta functional IGLV4S1 IGL V TRUE 1350 -Macaca mulatta functional IGLV4S2 IGL V TRUE 1351 -Macaca mulatta functional IGLV4S3 IGL V TRUE 1352 -Macaca mulatta functional IGLV4S4 IGL V TRUE 1353 -Macaca mulatta non-functional IGLV5-1 IGL V FALSE 1354 -Macaca mulatta functional IGLV5-10 IGL V TRUE 1355 -Macaca mulatta functional IGLV5-11 IGL V TRUE 1356 -Macaca mulatta non-functional IGLV5-12 IGL V FALSE 1357 -Macaca mulatta functional IGLV5-13 IGL V TRUE 1358 -Macaca mulatta functional IGLV5-14 IGL V TRUE 1359 -Macaca mulatta non-functional IGLV5-15 IGL V FALSE 1360 -Macaca mulatta non-functional IGLV5-2 IGL V FALSE 1361 -Macaca mulatta non-functional IGLV5-3 IGL V FALSE 1362 -Macaca mulatta non-functional IGLV5-4 IGL V FALSE 1363 -Macaca mulatta functional IGLV5-5 IGL V TRUE 1364 -Macaca mulatta functional IGLV5-6 IGL V TRUE 1365 -Macaca mulatta functional IGLV5-7 IGL V TRUE 1366 -Macaca mulatta non-functional IGLV5-8 IGL V FALSE 1367 -Macaca mulatta non-functional IGLV5-9 IGL V FALSE 1368 -Macaca mulatta non-functional IGLV5S1 IGL V FALSE 1369 -Macaca mulatta functional IGLV5S2 IGL V TRUE 1370 -Macaca mulatta functional IGLV5S3 IGL V TRUE 1371 -Macaca mulatta functional IGLV5S4 IGL V TRUE 1372 -Macaca mulatta functional IGLV6-1 IGL V TRUE 1373 -Macaca mulatta functional IGLV6-2 IGL V TRUE 1374 -Macaca mulatta non-functional IGLV6-3 IGL V FALSE 1375 -Macaca mulatta non-functional IGLV6-4 IGL V FALSE 1376 -Macaca mulatta functional IGLV6-5 IGL V TRUE 1377 -Macaca mulatta functional IGLV7-1 IGL V TRUE 1378 -Macaca mulatta functional IGLV7-2 IGL V TRUE 1379 -Macaca mulatta functional IGLV7-3 IGL V TRUE 1380 -Macaca mulatta functional IGLV7-4 IGL V TRUE 1381 -Macaca mulatta non-functional IGLV7-5 IGL V FALSE 1382 -Macaca mulatta non-functional IGLV7-6 IGL V FALSE 1383 -Macaca mulatta non-functional IGLV7-7 IGL V FALSE 1384 -Macaca mulatta functional IGLV8-1 IGL V TRUE 1385 -Macaca mulatta non-functional IGLV8S1 IGL V FALSE 1386 -Macaca mulatta functional IGLV9-1 IGL V TRUE 1387 -Macaca mulatta functional TRBD1 TRB D TRUE 1388 -Macaca mulatta functional TRBD2 TRB D TRUE 1389 -Macaca mulatta functional TRBJ1-1 TRB J TRUE 1390 -Macaca mulatta functional TRBJ1-2 TRB J TRUE 1391 -Macaca mulatta functional TRBJ1-3 TRB J TRUE 1392 -Macaca mulatta functional TRBJ1-4 TRB J TRUE 1393 -Macaca mulatta functional TRBJ1-5 TRB J TRUE 1394 -Macaca mulatta functional TRBJ1-6 TRB J TRUE 1395 -Macaca mulatta functional TRBJ2-1 TRB J TRUE 1396 -Macaca mulatta functional TRBJ2-2 TRB J TRUE 1397 -Macaca mulatta non-functional TRBJ2-2P TRB J FALSE 1398 -Macaca mulatta functional TRBJ2-3 TRB J TRUE 1399 -Macaca mulatta functional TRBJ2-4 TRB J TRUE 1400 -Macaca mulatta functional TRBJ2-5 TRB J TRUE 1401 -Macaca mulatta functional TRBJ2-6 TRB J TRUE 1402 -Macaca mulatta functional TRBJ2-7 TRB J TRUE 1403 -Macaca mulatta non-functional TRBV1-1 TRB V FALSE 1404 -Macaca mulatta non-functional TRBV1-2 TRB V FALSE 1405 -Macaca mulatta non-functional TRBV1-3 TRB V FALSE 1406 -Macaca mulatta functional TRBV10-1 TRB V TRUE 1407 -Macaca mulatta functional TRBV10-2 TRB V TRUE 1408 -Macaca mulatta functional TRBV10-3 TRB V TRUE 1409 -Macaca mulatta functional TRBV11-1 TRB V TRUE 1410 -Macaca mulatta functional TRBV11-2 TRB V TRUE 1411 -Macaca mulatta functional TRBV11-3 TRB V TRUE 1412 -Macaca mulatta functional TRBV12-1 TRB V TRUE 1413 -Macaca mulatta functional TRBV12-2 TRB V TRUE 1414 -Macaca mulatta functional TRBV12-3 TRB V TRUE 1415 -Macaca mulatta functional TRBV12-4 TRB V TRUE 1416 -Macaca mulatta functional TRBV13 TRB V TRUE 1417 -Macaca mulatta functional TRBV14 TRB V TRUE 1418 -Macaca mulatta functional TRBV15 TRB V TRUE 1419 -Macaca mulatta functional TRBV16 TRB V TRUE 1420 -Macaca mulatta functional TRBV18 TRB V TRUE 1421 -Macaca mulatta functional TRBV19 TRB V TRUE 1422 -Macaca mulatta functional TRBV2-1 TRB V TRUE 1423 -Macaca mulatta functional TRBV2-2 TRB V TRUE 1424 -Macaca mulatta functional TRBV2-3 TRB V TRUE 1425 -Macaca mulatta functional TRBV20-1 TRB V TRUE 1426 -Macaca mulatta functional TRBV21-1 TRB V TRUE 1427 -Macaca mulatta non-functional TRBV22-1 TRB V FALSE 1428 -Macaca mulatta functional TRBV23-1 TRB V TRUE 1429 -Macaca mulatta functional TRBV24-1 TRB V TRUE 1430 -Macaca mulatta functional TRBV25-1 TRB V TRUE 1431 -Macaca mulatta non-functional TRBV26 TRB V FALSE 1432 -Macaca mulatta functional TRBV27 TRB V TRUE 1433 -Macaca mulatta functional TRBV28 TRB V TRUE 1434 -Macaca mulatta functional TRBV29-1 TRB V TRUE 1435 -Macaca mulatta functional TRBV3-1 TRB V TRUE 1436 -Macaca mulatta functional TRBV3-2 TRB V TRUE 1437 -Macaca mulatta functional TRBV3-3 TRB V TRUE 1438 -Macaca mulatta functional TRBV3-4 TRB V TRUE 1439 -Macaca mulatta functional TRBV30 TRB V TRUE 1440 -Macaca mulatta functional TRBV4-1 TRB V TRUE 1441 -Macaca mulatta functional TRBV4-2 TRB V TRUE 1442 -Macaca mulatta functional TRBV4-3 TRB V TRUE 1443 -Macaca mulatta non-functional TRBV5-1 TRB V FALSE 1444 -Macaca mulatta functional TRBV5-10 TRB V TRUE 1445 -Macaca mulatta non-functional TRBV5-2 TRB V FALSE 1446 -Macaca mulatta non-functional TRBV5-3 TRB V FALSE 1447 -Macaca mulatta functional TRBV5-4 TRB V TRUE 1448 -Macaca mulatta functional TRBV5-5 TRB V TRUE 1449 -Macaca mulatta functional TRBV5-6 TRB V TRUE 1450 -Macaca mulatta functional TRBV5-7 TRB V TRUE 1451 -Macaca mulatta functional TRBV5-8 TRB V TRUE 1452 -Macaca mulatta functional TRBV5-9 TRB V TRUE 1453 -Macaca mulatta functional TRBV6-1 TRB V TRUE 1454 -Macaca mulatta functional TRBV6-2 TRB V TRUE 1455 -Macaca mulatta functional TRBV6-3 TRB V TRUE 1456 -Macaca mulatta functional TRBV6-4 TRB V TRUE 1457 -Macaca mulatta functional TRBV6-5 TRB V TRUE 1458 -Macaca mulatta functional TRBV6-6 TRB V TRUE 1459 -Macaca mulatta functional TRBV6-7 TRB V TRUE 1460 -Macaca mulatta functional TRBV6-8 TRB V TRUE 1461 -Macaca mulatta non-functional TRBV7-1 TRB V FALSE 1462 -Macaca mulatta functional TRBV7-10 TRB V TRUE 1463 -Macaca mulatta functional TRBV7-2 TRB V TRUE 1464 -Macaca mulatta functional TRBV7-3 TRB V TRUE 1465 -Macaca mulatta functional TRBV7-4 TRB V TRUE 1466 -Macaca mulatta functional TRBV7-5 TRB V TRUE 1467 -Macaca mulatta functional TRBV7-6 TRB V TRUE 1468 -Macaca mulatta functional TRBV7-7 TRB V TRUE 1469 -Macaca mulatta non-functional TRBV7-8 TRB V FALSE 1470 -Macaca mulatta functional TRBV7-9 TRB V TRUE 1471 -Macaca mulatta non-functional TRBV8-1 TRB V FALSE 1472 -Macaca mulatta functional TRBV9 TRB V TRUE 1473 -Macaca mulatta non-functional TRBVA TRB V FALSE 1474 -Macaca mulatta non-functional TRBVB TRB V FALSE 1475 -Mus musculus functional IGHD IGH D TRUE 1476 -Mus musculus functional IGHD1-1 IGH D TRUE 1477 -Mus musculus functional IGHD1-2 IGH D TRUE 1478 -Mus musculus functional IGHD1-3 IGH D TRUE 1479 -Mus musculus functional IGHD2-1 IGH D TRUE 1480 -Mus musculus functional IGHD2-10 IGH D TRUE 1481 -Mus musculus functional IGHD2-11 IGH D TRUE 1482 -Mus musculus functional IGHD2-12 IGH D TRUE 1483 -Mus musculus functional IGHD2-13 IGH D TRUE 1484 -Mus musculus functional IGHD2-14 IGH D TRUE 1485 -Mus musculus functional IGHD2-2 IGH D TRUE 1486 -Mus musculus functional IGHD2-3 IGH D TRUE 1487 -Mus musculus functional IGHD2-4 IGH D TRUE 1488 -Mus musculus functional IGHD2-5 IGH D TRUE 1489 -Mus musculus functional IGHD2-6 IGH D TRUE 1490 -Mus musculus functional IGHD2-7 IGH D TRUE 1491 -Mus musculus functional IGHD2-8 IGH D TRUE 1492 -Mus musculus functional IGHD2-9 IGH D TRUE 1493 -Mus musculus functional IGHD3-1 IGH D TRUE 1494 -Mus musculus functional IGHD3-2 IGH D TRUE 1495 -Mus musculus functional IGHD3-3 IGH D TRUE 1496 -Mus musculus functional IGHD4-1 IGH D TRUE 1497 -Mus musculus non-functional IGHD5-1 IGH D FALSE 1498 -Mus musculus non-functional IGHD5-2 IGH D FALSE 1499 -Mus musculus non-functional IGHD5-3 IGH D FALSE 1500 -Mus musculus non-functional IGHD5-4 IGH D FALSE 1501 -Mus musculus non-functional IGHD5-5 IGH D FALSE 1502 -Mus musculus non-functional IGHD5-6 IGH D FALSE 1503 -Mus musculus non-functional IGHD5-7 IGH D FALSE 1504 -Mus musculus non-functional IGHD5-8 IGH D FALSE 1505 -Mus musculus non-functional IGHD6-1 IGH D FALSE 1506 -Mus musculus non-functional IGHD6-2 IGH D FALSE 1507 -Mus musculus non-functional IGHD6-3 IGH D FALSE 1508 -Mus musculus functional IGHJ1 IGH J TRUE 1509 -Mus musculus functional IGHJ2 IGH J TRUE 1510 -Mus musculus functional IGHJ3 IGH J TRUE 1511 -Mus musculus functional IGHJ4 IGH J TRUE 1512 -Mus musculus non-functional IGHV(I)-1 IGH V FALSE 1513 -Mus musculus non-functional IGHV(II)-1 IGH V FALSE 1514 -Mus musculus non-functional IGHV(II)-2 IGH V FALSE 1515 -Mus musculus non-functional IGHV(II)-3 IGH V FALSE 1516 -Mus musculus non-functional IGHV(II)-4 IGH V FALSE 1517 -Mus musculus non-functional IGHV(II)-5 IGH V FALSE 1518 -Mus musculus non-functional IGHV(III)-1 IGH V FALSE 1519 -Mus musculus non-functional IGHV(III)-10 IGH V FALSE 1520 -Mus musculus non-functional IGHV(III)-11 IGH V FALSE 1521 -Mus musculus non-functional IGHV(III)-12 IGH V FALSE 1522 -Mus musculus non-functional IGHV(III)-13 IGH V FALSE 1523 -Mus musculus non-functional IGHV(III)-2 IGH V FALSE 1524 -Mus musculus non-functional IGHV(III)-3 IGH V FALSE 1525 -Mus musculus non-functional IGHV(III)-4 IGH V FALSE 1526 -Mus musculus non-functional IGHV(III)-5 IGH V FALSE 1527 -Mus musculus non-functional IGHV(III)-6 IGH V FALSE 1528 -Mus musculus non-functional IGHV(III)-7 IGH V FALSE 1529 -Mus musculus non-functional IGHV(III)-8 IGH V FALSE 1530 -Mus musculus non-functional IGHV(III)-9 IGH V FALSE 1531 -Mus musculus non-functional IGHV1-1 IGH V FALSE 1532 -Mus musculus non-functional IGHV1-10 IGH V FALSE 1533 -Mus musculus functional IGHV1-11 IGH V TRUE 1534 -Mus musculus functional IGHV1-12 IGH V TRUE 1535 -Mus musculus non-functional IGHV1-13 IGH V FALSE 1536 -Mus musculus functional IGHV1-14 IGH V TRUE 1537 -Mus musculus functional IGHV1-15 IGH V TRUE 1538 -Mus musculus non-functional IGHV1-16 IGH V FALSE 1539 -Mus musculus non-functional IGHV1-17 IGH V FALSE 1540 -Mus musculus functional IGHV1-17-1 IGH V TRUE 1541 -Mus musculus functional IGHV1-18 IGH V TRUE 1542 -Mus musculus functional IGHV1-19 IGH V TRUE 1543 -Mus musculus non-functional IGHV1-19-1 IGH V FALSE 1544 -Mus musculus non-functional IGHV1-2 IGH V FALSE 1545 -Mus musculus functional IGHV1-20 IGH V TRUE 1546 -Mus musculus non-functional IGHV1-21 IGH V FALSE 1547 -Mus musculus non-functional IGHV1-21-1 IGH V FALSE 1548 -Mus musculus functional IGHV1-22 IGH V TRUE 1549 -Mus musculus non-functional IGHV1-23 IGH V FALSE 1550 -Mus musculus non-functional IGHV1-24 IGH V FALSE 1551 -Mus musculus non-functional IGHV1-25 IGH V FALSE 1552 -Mus musculus functional IGHV1-26 IGH V TRUE 1553 -Mus musculus non-functional IGHV1-27 IGH V FALSE 1554 -Mus musculus non-functional IGHV1-28 IGH V FALSE 1555 -Mus musculus non-functional IGHV1-29 IGH V FALSE 1556 -Mus musculus non-functional IGHV1-3 IGH V FALSE 1557 -Mus musculus non-functional IGHV1-30 IGH V FALSE 1558 -Mus musculus functional IGHV1-31 IGH V TRUE 1559 -Mus musculus non-functional IGHV1-32 IGH V FALSE 1560 -Mus musculus non-functional IGHV1-33 IGH V FALSE 1561 -Mus musculus functional IGHV1-34 IGH V TRUE 1562 -Mus musculus non-functional IGHV1-35 IGH V FALSE 1563 -Mus musculus functional IGHV1-36 IGH V TRUE 1564 -Mus musculus functional IGHV1-37 IGH V TRUE 1565 -Mus musculus non-functional IGHV1-38 IGH V FALSE 1566 -Mus musculus functional IGHV1-39 IGH V TRUE 1567 -Mus musculus functional IGHV1-4 IGH V TRUE 1568 -Mus musculus non-functional IGHV1-40 IGH V FALSE 1569 -Mus musculus non-functional IGHV1-41 IGH V FALSE 1570 -Mus musculus functional IGHV1-42 IGH V TRUE 1571 -Mus musculus functional IGHV1-43 IGH V TRUE 1572 -Mus musculus non-functional IGHV1-44 IGH V FALSE 1573 -Mus musculus non-functional IGHV1-45 IGH V FALSE 1574 -Mus musculus non-functional IGHV1-46 IGH V FALSE 1575 -Mus musculus functional IGHV1-47 IGH V TRUE 1576 -Mus musculus non-functional IGHV1-48 IGH V FALSE 1577 -Mus musculus functional IGHV1-49 IGH V TRUE 1578 -Mus musculus functional IGHV1-5 IGH V TRUE 1579 -Mus musculus functional IGHV1-50 IGH V TRUE 1580 -Mus musculus non-functional IGHV1-51 IGH V FALSE 1581 -Mus musculus functional IGHV1-52 IGH V TRUE 1582 -Mus musculus functional IGHV1-53 IGH V TRUE 1583 -Mus musculus functional IGHV1-54 IGH V TRUE 1584 -Mus musculus functional IGHV1-55 IGH V TRUE 1585 -Mus musculus functional IGHV1-56 IGH V TRUE 1586 -Mus musculus non-functional IGHV1-57 IGH V FALSE 1587 -Mus musculus functional IGHV1-58 IGH V TRUE 1588 -Mus musculus functional IGHV1-59 IGH V TRUE 1589 -Mus musculus non-functional IGHV1-6 IGH V FALSE 1590 -Mus musculus non-functional IGHV1-60 IGH V FALSE 1591 -Mus musculus functional IGHV1-61 IGH V TRUE 1592 -Mus musculus non-functional IGHV1-62 IGH V FALSE 1593 -Mus musculus functional IGHV1-62-1 IGH V TRUE 1594 -Mus musculus functional IGHV1-62-2 IGH V TRUE 1595 -Mus musculus non-functional IGHV1-62-3 IGH V FALSE 1596 -Mus musculus functional IGHV1-63 IGH V TRUE 1597 -Mus musculus functional IGHV1-64 IGH V TRUE 1598 -Mus musculus non-functional IGHV1-65 IGH V FALSE 1599 -Mus musculus functional IGHV1-66 IGH V TRUE 1600 -Mus musculus functional IGHV1-67 IGH V TRUE 1601 -Mus musculus non-functional IGHV1-68 IGH V FALSE 1602 -Mus musculus functional IGHV1-69 IGH V TRUE 1603 -Mus musculus functional IGHV1-7 IGH V TRUE 1604 -Mus musculus non-functional IGHV1-70 IGH V FALSE 1605 -Mus musculus functional IGHV1-71 IGH V TRUE 1606 -Mus musculus functional IGHV1-72 IGH V TRUE 1607 -Mus musculus non-functional IGHV1-73 IGH V FALSE 1608 -Mus musculus functional IGHV1-74 IGH V TRUE 1609 -Mus musculus functional IGHV1-75 IGH V TRUE 1610 -Mus musculus functional IGHV1-76 IGH V TRUE 1611 -Mus musculus functional IGHV1-77 IGH V TRUE 1612 -Mus musculus functional IGHV1-78 IGH V TRUE 1613 -Mus musculus non-functional IGHV1-79 IGH V FALSE 1614 -Mus musculus non-functional IGHV1-8 IGH V FALSE 1615 -Mus musculus functional IGHV1-80 IGH V TRUE 1616 -Mus musculus functional IGHV1-81 IGH V TRUE 1617 -Mus musculus functional IGHV1-82 IGH V TRUE 1618 -Mus musculus non-functional IGHV1-83 IGH V FALSE 1619 -Mus musculus functional IGHV1-84 IGH V TRUE 1620 -Mus musculus functional IGHV1-85 IGH V TRUE 1621 -Mus musculus non-functional IGHV1-86 IGH V FALSE 1622 -Mus musculus functional IGHV1-9 IGH V TRUE 1623 -Mus musculus functional IGHV10-1 IGH V TRUE 1624 -Mus musculus non-functional IGHV10-2 IGH V FALSE 1625 -Mus musculus functional IGHV10-3 IGH V TRUE 1626 -Mus musculus non-functional IGHV10-4 IGH V FALSE 1627 -Mus musculus functional IGHV10S3 IGH V TRUE 1628 -Mus musculus non-functional IGHV10S4 IGH V FALSE 1629 -Mus musculus non-functional IGHV10S5 IGH V FALSE 1630 -Mus musculus functional IGHV11-1 IGH V TRUE 1631 -Mus musculus functional IGHV11-2 IGH V TRUE 1632 -Mus musculus non-functional IGHV12-1 IGH V FALSE 1633 -Mus musculus functional IGHV12-1-1 IGH V TRUE 1634 -Mus musculus non-functional IGHV12-1-2 IGH V FALSE 1635 -Mus musculus non-functional IGHV12-2 IGH V FALSE 1636 -Mus musculus non-functional IGHV12-2-1 IGH V FALSE 1637 -Mus musculus functional IGHV12-3 IGH V TRUE 1638 -Mus musculus non-functional IGHV12S2 IGH V FALSE 1639 -Mus musculus non-functional IGHV13-1 IGH V FALSE 1640 -Mus musculus functional IGHV13-2 IGH V TRUE 1641 -Mus musculus functional IGHV14-1 IGH V TRUE 1642 -Mus musculus functional IGHV14-2 IGH V TRUE 1643 -Mus musculus functional IGHV14-3 IGH V TRUE 1644 -Mus musculus functional IGHV14-4 IGH V TRUE 1645 -Mus musculus non-functional IGHV14S4 IGH V FALSE 1646 -Mus musculus non-functional IGHV15-1 IGH V FALSE 1647 -Mus musculus functional IGHV15-2 IGH V TRUE 1648 -Mus musculus functional IGHV16-1 IGH V TRUE 1649 -Mus musculus non-functional IGHV1S10 IGH V FALSE 1650 -Mus musculus non-functional IGHV1S100 IGH V FALSE 1651 -Mus musculus non-functional IGHV1S101 IGH V FALSE 1652 -Mus musculus non-functional IGHV1S103 IGH V FALSE 1653 -Mus musculus non-functional IGHV1S107 IGH V FALSE 1654 -Mus musculus non-functional IGHV1S108 IGH V FALSE 1655 -Mus musculus non-functional IGHV1S11 IGH V FALSE 1656 -Mus musculus non-functional IGHV1S110 IGH V FALSE 1657 -Mus musculus non-functional IGHV1S111 IGH V FALSE 1658 -Mus musculus non-functional IGHV1S112 IGH V FALSE 1659 -Mus musculus non-functional IGHV1S113 IGH V FALSE 1660 -Mus musculus non-functional IGHV1S118 IGH V FALSE 1661 -Mus musculus functional IGHV1S12 IGH V TRUE 1662 -Mus musculus non-functional IGHV1S120 IGH V FALSE 1663 -Mus musculus non-functional IGHV1S121 IGH V FALSE 1664 -Mus musculus non-functional IGHV1S122 IGH V FALSE 1665 -Mus musculus non-functional IGHV1S124 IGH V FALSE 1666 -Mus musculus non-functional IGHV1S126 IGH V FALSE 1667 -Mus musculus non-functional IGHV1S127 IGH V FALSE 1668 -Mus musculus non-functional IGHV1S130 IGH V FALSE 1669 -Mus musculus non-functional IGHV1S132 IGH V FALSE 1670 -Mus musculus non-functional IGHV1S134 IGH V FALSE 1671 -Mus musculus non-functional IGHV1S135 IGH V FALSE 1672 -Mus musculus non-functional IGHV1S136 IGH V FALSE 1673 -Mus musculus non-functional IGHV1S137 IGH V FALSE 1674 -Mus musculus functional IGHV1S14 IGH V TRUE 1675 -Mus musculus non-functional IGHV1S15 IGH V FALSE 1676 -Mus musculus non-functional IGHV1S16 IGH V FALSE 1677 -Mus musculus non-functional IGHV1S17 IGH V FALSE 1678 -Mus musculus non-functional IGHV1S18 IGH V FALSE 1679 -Mus musculus non-functional IGHV1S19 IGH V FALSE 1680 -Mus musculus functional IGHV1S20 IGH V TRUE 1681 -Mus musculus functional IGHV1S21 IGH V TRUE 1682 -Mus musculus functional IGHV1S22 IGH V TRUE 1683 -Mus musculus functional IGHV1S26 IGH V TRUE 1684 -Mus musculus non-functional IGHV1S28 IGH V FALSE 1685 -Mus musculus functional IGHV1S29 IGH V TRUE 1686 -Mus musculus functional IGHV1S30 IGH V TRUE 1687 -Mus musculus functional IGHV1S31 IGH V TRUE 1688 -Mus musculus functional IGHV1S32 IGH V TRUE 1689 -Mus musculus functional IGHV1S33 IGH V TRUE 1690 -Mus musculus functional IGHV1S34 IGH V TRUE 1691 -Mus musculus functional IGHV1S35 IGH V TRUE 1692 -Mus musculus functional IGHV1S36 IGH V TRUE 1693 -Mus musculus functional IGHV1S37 IGH V TRUE 1694 -Mus musculus functional IGHV1S40 IGH V TRUE 1695 -Mus musculus functional IGHV1S41 IGH V TRUE 1696 -Mus musculus non-functional IGHV1S44 IGH V FALSE 1697 -Mus musculus functional IGHV1S45 IGH V TRUE 1698 -Mus musculus non-functional IGHV1S46 IGH V FALSE 1699 -Mus musculus non-functional IGHV1S47 IGH V FALSE 1700 -Mus musculus functional IGHV1S49 IGH V TRUE 1701 -Mus musculus functional IGHV1S5 IGH V TRUE 1702 -Mus musculus functional IGHV1S50 IGH V TRUE 1703 -Mus musculus non-functional IGHV1S51 IGH V FALSE 1704 -Mus musculus functional IGHV1S52 IGH V TRUE 1705 -Mus musculus functional IGHV1S53 IGH V TRUE 1706 -Mus musculus functional IGHV1S55 IGH V TRUE 1707 -Mus musculus functional IGHV1S56 IGH V TRUE 1708 -Mus musculus non-functional IGHV1S57 IGH V FALSE 1709 -Mus musculus functional IGHV1S61 IGH V TRUE 1710 -Mus musculus non-functional IGHV1S65 IGH V FALSE 1711 -Mus musculus non-functional IGHV1S67 IGH V FALSE 1712 -Mus musculus non-functional IGHV1S68 IGH V FALSE 1713 -Mus musculus non-functional IGHV1S70 IGH V FALSE 1714 -Mus musculus non-functional IGHV1S72 IGH V FALSE 1715 -Mus musculus non-functional IGHV1S73 IGH V FALSE 1716 -Mus musculus non-functional IGHV1S74 IGH V FALSE 1717 -Mus musculus non-functional IGHV1S75 IGH V FALSE 1718 -Mus musculus non-functional IGHV1S78 IGH V FALSE 1719 -Mus musculus non-functional IGHV1S81 IGH V FALSE 1720 -Mus musculus non-functional IGHV1S82 IGH V FALSE 1721 -Mus musculus non-functional IGHV1S83 IGH V FALSE 1722 -Mus musculus non-functional IGHV1S84 IGH V FALSE 1723 -Mus musculus non-functional IGHV1S87 IGH V FALSE 1724 -Mus musculus non-functional IGHV1S88 IGH V FALSE 1725 -Mus musculus non-functional IGHV1S9 IGH V FALSE 1726 -Mus musculus non-functional IGHV1S92 IGH V FALSE 1727 -Mus musculus non-functional IGHV1S95 IGH V FALSE 1728 -Mus musculus non-functional IGHV1S96 IGH V FALSE 1729 -Mus musculus non-functional IGHV2-1 IGH V FALSE 1730 -Mus musculus functional IGHV2-2 IGH V TRUE 1731 -Mus musculus non-functional IGHV2-2-1 IGH V FALSE 1732 -Mus musculus functional IGHV2-2-2 IGH V TRUE 1733 -Mus musculus functional IGHV2-3 IGH V TRUE 1734 -Mus musculus functional IGHV2-3-1 IGH V TRUE 1735 -Mus musculus functional IGHV2-4 IGH V TRUE 1736 -Mus musculus functional IGHV2-4-1 IGH V TRUE 1737 -Mus musculus functional IGHV2-5 IGH V TRUE 1738 -Mus musculus functional IGHV2-5-1 IGH V TRUE 1739 -Mus musculus functional IGHV2-6 IGH V TRUE 1740 -Mus musculus functional IGHV2-6-1 IGH V TRUE 1741 -Mus musculus functional IGHV2-6-2 IGH V TRUE 1742 -Mus musculus functional IGHV2-6-3 IGH V TRUE 1743 -Mus musculus functional IGHV2-6-4 IGH V TRUE 1744 -Mus musculus functional IGHV2-6-5 IGH V TRUE 1745 -Mus musculus functional IGHV2-6-6 IGH V TRUE 1746 -Mus musculus functional IGHV2-6-7 IGH V TRUE 1747 -Mus musculus functional IGHV2-6-8 IGH V TRUE 1748 -Mus musculus functional IGHV2-7 IGH V TRUE 1749 -Mus musculus non-functional IGHV2-8 IGH V FALSE 1750 -Mus musculus functional IGHV2-9 IGH V TRUE 1751 -Mus musculus functional IGHV2-9-1 IGH V TRUE 1752 -Mus musculus functional IGHV2S3 IGH V TRUE 1753 -Mus musculus functional IGHV3-1 IGH V TRUE 1754 -Mus musculus functional IGHV3-2 IGH V TRUE 1755 -Mus musculus functional IGHV3-3 IGH V TRUE 1756 -Mus musculus functional IGHV3-4 IGH V TRUE 1757 -Mus musculus functional IGHV3-5 IGH V TRUE 1758 -Mus musculus functional IGHV3-6 IGH V TRUE 1759 -Mus musculus non-functional IGHV3-7 IGH V FALSE 1760 -Mus musculus functional IGHV3-8 IGH V TRUE 1761 -Mus musculus functional IGHV3S1 IGH V TRUE 1762 -Mus musculus non-functional IGHV3S7 IGH V FALSE 1763 -Mus musculus functional IGHV4-1 IGH V TRUE 1764 -Mus musculus functional IGHV4-2 IGH V TRUE 1765 -Mus musculus non-functional IGHV5-1 IGH V FALSE 1766 -Mus musculus non-functional IGHV5-10 IGH V FALSE 1767 -Mus musculus non-functional IGHV5-10-1 IGH V FALSE 1768 -Mus musculus non-functional IGHV5-10-2 IGH V FALSE 1769 -Mus musculus non-functional IGHV5-11 IGH V FALSE 1770 -Mus musculus non-functional IGHV5-11-1 IGH V FALSE 1771 -Mus musculus non-functional IGHV5-11-2 IGH V FALSE 1772 -Mus musculus functional IGHV5-12 IGH V TRUE 1773 -Mus musculus functional IGHV5-12-1 IGH V TRUE 1774 -Mus musculus functional IGHV5-12-2 IGH V TRUE 1775 -Mus musculus non-functional IGHV5-12-3 IGH V FALSE 1776 -Mus musculus functional IGHV5-12-4 IGH V TRUE 1777 -Mus musculus non-functional IGHV5-13 IGH V FALSE 1778 -Mus musculus non-functional IGHV5-13-1 IGH V FALSE 1779 -Mus musculus functional IGHV5-15 IGH V TRUE 1780 -Mus musculus functional IGHV5-16 IGH V TRUE 1781 -Mus musculus functional IGHV5-17 IGH V TRUE 1782 -Mus musculus non-functional IGHV5-18 IGH V FALSE 1783 -Mus musculus non-functional IGHV5-19 IGH V FALSE 1784 -Mus musculus functional IGHV5-2 IGH V TRUE 1785 -Mus musculus non-functional IGHV5-21 IGH V FALSE 1786 -Mus musculus non-functional IGHV5-3 IGH V FALSE 1787 -Mus musculus functional IGHV5-4 IGH V TRUE 1788 -Mus musculus non-functional IGHV5-5 IGH V FALSE 1789 -Mus musculus non-functional IGHV5-5-1 IGH V FALSE 1790 -Mus musculus functional IGHV5-6 IGH V TRUE 1791 -Mus musculus functional IGHV5-6-1 IGH V TRUE 1792 -Mus musculus functional IGHV5-6-2 IGH V TRUE 1793 -Mus musculus functional IGHV5-6-3 IGH V TRUE 1794 -Mus musculus functional IGHV5-6-4 IGH V TRUE 1795 -Mus musculus functional IGHV5-6-5 IGH V TRUE 1796 -Mus musculus functional IGHV5-6-6 IGH V TRUE 1797 -Mus musculus non-functional IGHV5-7 IGH V FALSE 1798 -Mus musculus non-functional IGHV5-7-1 IGH V FALSE 1799 -Mus musculus non-functional IGHV5-7-2 IGH V FALSE 1800 -Mus musculus non-functional IGHV5-7-3 IGH V FALSE 1801 -Mus musculus non-functional IGHV5-7-4 IGH V FALSE 1802 -Mus musculus non-functional IGHV5-7-5 IGH V FALSE 1803 -Mus musculus non-functional IGHV5-7-6 IGH V FALSE 1804 -Mus musculus non-functional IGHV5-8 IGH V FALSE 1805 -Mus musculus non-functional IGHV5-8-1 IGH V FALSE 1806 -Mus musculus non-functional IGHV5-8-2 IGH V FALSE 1807 -Mus musculus non-functional IGHV5-8-3 IGH V FALSE 1808 -Mus musculus functional IGHV5-9 IGH V TRUE 1809 -Mus musculus functional IGHV5-9-1 IGH V TRUE 1810 -Mus musculus functional IGHV5-9-2 IGH V TRUE 1811 -Mus musculus functional IGHV5-9-3 IGH V TRUE 1812 -Mus musculus functional IGHV5-9-4 IGH V TRUE 1813 -Mus musculus functional IGHV5-9-5 IGH V TRUE 1814 -Mus musculus non-functional IGHV5S12 IGH V FALSE 1815 -Mus musculus non-functional IGHV5S21 IGH V FALSE 1816 -Mus musculus non-functional IGHV5S24 IGH V FALSE 1817 -Mus musculus functional IGHV5S4 IGH V TRUE 1818 -Mus musculus functional IGHV5S9 IGH V TRUE 1819 -Mus musculus non-functional IGHV6-1 IGH V FALSE 1820 -Mus musculus non-functional IGHV6-1-1 IGH V FALSE 1821 -Mus musculus non-functional IGHV6-2 IGH V FALSE 1822 -Mus musculus functional IGHV6-3 IGH V TRUE 1823 -Mus musculus functional IGHV6-4 IGH V TRUE 1824 -Mus musculus functional IGHV6-5 IGH V TRUE 1825 -Mus musculus functional IGHV6-6 IGH V TRUE 1826 -Mus musculus functional IGHV6-7 IGH V TRUE 1827 -Mus musculus non-functional IGHV6S2 IGH V FALSE 1828 -Mus musculus non-functional IGHV6S3 IGH V FALSE 1829 -Mus musculus non-functional IGHV6S4 IGH V FALSE 1830 -Mus musculus functional IGHV7-1 IGH V TRUE 1831 -Mus musculus functional IGHV7-2 IGH V TRUE 1832 -Mus musculus functional IGHV7-3 IGH V TRUE 1833 -Mus musculus functional IGHV7-4 IGH V TRUE 1834 -Mus musculus non-functional IGHV8-1 IGH V FALSE 1835 -Mus musculus non-functional IGHV8-10 IGH V FALSE 1836 -Mus musculus functional IGHV8-11 IGH V TRUE 1837 -Mus musculus functional IGHV8-12 IGH V TRUE 1838 -Mus musculus non-functional IGHV8-13 IGH V FALSE 1839 -Mus musculus non-functional IGHV8-14 IGH V FALSE 1840 -Mus musculus non-functional IGHV8-15 IGH V FALSE 1841 -Mus musculus non-functional IGHV8-16 IGH V FALSE 1842 -Mus musculus non-functional IGHV8-2 IGH V FALSE 1843 -Mus musculus non-functional IGHV8-3 IGH V FALSE 1844 -Mus musculus functional IGHV8-4 IGH V TRUE 1845 -Mus musculus functional IGHV8-5 IGH V TRUE 1846 -Mus musculus functional IGHV8-6 IGH V TRUE 1847 -Mus musculus non-functional IGHV8-7 IGH V FALSE 1848 -Mus musculus functional IGHV8-8 IGH V TRUE 1849 -Mus musculus non-functional IGHV8-8-1 IGH V FALSE 1850 -Mus musculus non-functional IGHV8-9 IGH V FALSE 1851 -Mus musculus non-functional IGHV8S1 IGH V FALSE 1852 -Mus musculus non-functional IGHV8S2 IGH V FALSE 1853 -Mus musculus non-functional IGHV8S6 IGH V FALSE 1854 -Mus musculus non-functional IGHV8S9 IGH V FALSE 1855 -Mus musculus functional IGHV9-1 IGH V TRUE 1856 -Mus musculus functional IGHV9-2 IGH V TRUE 1857 -Mus musculus functional IGHV9-2-1 IGH V TRUE 1858 -Mus musculus functional IGHV9-3 IGH V TRUE 1859 -Mus musculus functional IGHV9-3-1 IGH V TRUE 1860 -Mus musculus functional IGHV9-4 IGH V TRUE 1861 -Mus musculus functional IGHV9S7 IGH V TRUE 1862 -Mus musculus functional IGHV9S8 IGH V TRUE 1863 -Mus musculus functional IGKJ1 IGK J TRUE 1864 -Mus musculus functional IGKJ2 IGK J TRUE 1865 -Mus musculus non-functional IGKJ3 IGK J FALSE 1866 -Mus musculus functional IGKJ4 IGK J TRUE 1867 -Mus musculus functional IGKJ5 IGK J TRUE 1868 -Mus musculus non-functional IGKV1-108 IGK V FALSE 1869 -Mus musculus functional IGKV1-110 IGK V TRUE 1870 -Mus musculus non-functional IGKV1-115 IGK V FALSE 1871 -Mus musculus functional IGKV1-117 IGK V TRUE 1872 -Mus musculus functional IGKV1-122 IGK V TRUE 1873 -Mus musculus non-functional IGKV1-131 IGK V FALSE 1874 -Mus musculus functional IGKV1-132 IGK V TRUE 1875 -Mus musculus functional IGKV1-133 IGK V TRUE 1876 -Mus musculus functional IGKV1-135 IGK V TRUE 1877 -Mus musculus non-functional IGKV1-136 IGK V FALSE 1878 -Mus musculus non-functional IGKV1-35 IGK V FALSE 1879 -Mus musculus functional IGKV1-88 IGK V TRUE 1880 -Mus musculus functional IGKV1-99 IGK V TRUE 1881 -Mus musculus non-functional IGKV1/OR16-1 IGK V FALSE 1882 -Mus musculus non-functional IGKV1/OR19-1 IGK V FALSE 1883 -Mus musculus non-functional IGKV1/OR6-1 IGK V FALSE 1884 -Mus musculus functional IGKV10-94 IGK V TRUE 1885 -Mus musculus functional IGKV10-95 IGK V TRUE 1886 -Mus musculus functional IGKV10-96 IGK V TRUE 1887 -Mus musculus non-functional IGKV11-106 IGK V FALSE 1888 -Mus musculus non-functional IGKV11-114 IGK V FALSE 1889 -Mus musculus non-functional IGKV11-118 IGK V FALSE 1890 -Mus musculus functional IGKV11-125 IGK V TRUE 1891 -Mus musculus functional IGKV12-38 IGK V TRUE 1892 -Mus musculus non-functional IGKV12-40 IGK V FALSE 1893 -Mus musculus functional IGKV12-41 IGK V TRUE 1894 -Mus musculus non-functional IGKV12-42 IGK V FALSE 1895 -Mus musculus functional IGKV12-44 IGK V TRUE 1896 -Mus musculus functional IGKV12-46 IGK V TRUE 1897 -Mus musculus non-functional IGKV12-47 IGK V FALSE 1898 -Mus musculus non-functional IGKV12-49 IGK V FALSE 1899 -Mus musculus non-functional IGKV12-66 IGK V FALSE 1900 -Mus musculus non-functional IGKV12-67 IGK V FALSE 1901 -Mus musculus functional IGKV12-89 IGK V TRUE 1902 -Mus musculus functional IGKV12-98 IGK V TRUE 1903 -Mus musculus functional IGKV12-e IGK V TRUE 1904 -Mus musculus non-functional IGKV13-54-1 IGK V FALSE 1905 -Mus musculus non-functional IGKV13-55-1 IGK V FALSE 1906 -Mus musculus non-functional IGKV13-56-1 IGK V FALSE 1907 -Mus musculus non-functional IGKV13-57-1 IGK V FALSE 1908 -Mus musculus non-functional IGKV13-57-2 IGK V FALSE 1909 -Mus musculus non-functional IGKV13-61-1 IGK V FALSE 1910 -Mus musculus non-functional IGKV13-62-1 IGK V FALSE 1911 -Mus musculus non-functional IGKV13-64 IGK V FALSE 1912 -Mus musculus non-functional IGKV13-71-1 IGK V FALSE 1913 -Mus musculus non-functional IGKV13-73-1 IGK V FALSE 1914 -Mus musculus non-functional IGKV13-74-1 IGK V FALSE 1915 -Mus musculus non-functional IGKV13-76 IGK V FALSE 1916 -Mus musculus non-functional IGKV13-78-1 IGK V FALSE 1917 -Mus musculus non-functional IGKV13-80-1 IGK V FALSE 1918 -Mus musculus non-functional IGKV13-82 IGK V FALSE 1919 -Mus musculus functional IGKV13-84 IGK V TRUE 1920 -Mus musculus functional IGKV13-85 IGK V TRUE 1921 -Mus musculus non-functional IGKV13-87 IGK V FALSE 1922 -Mus musculus non-functional IGKV13-89-1 IGK V FALSE 1923 -Mus musculus functional IGKV14-100 IGK V TRUE 1924 -Mus musculus functional IGKV14-111 IGK V TRUE 1925 -Mus musculus non-functional IGKV14-118-1 IGK V FALSE 1926 -Mus musculus non-functional IGKV14-118-2 IGK V FALSE 1927 -Mus musculus functional IGKV14-126 IGK V TRUE 1928 -Mus musculus non-functional IGKV14-126-1 IGK V FALSE 1929 -Mus musculus functional IGKV14-130 IGK V TRUE 1930 -Mus musculus non-functional IGKV14-134-1 IGK V FALSE 1931 -Mus musculus non-functional IGKV14/OR16-2 IGK V FALSE 1932 -Mus musculus non-functional IGKV14/OR6-2 IGK V FALSE 1933 -Mus musculus non-functional IGKV14/OR6-3 IGK V FALSE 1934 -Mus musculus non-functional IGKV15-101 IGK V FALSE 1935 -Mus musculus non-functional IGKV15-101-1 IGK V FALSE 1936 -Mus musculus non-functional IGKV15-102 IGK V FALSE 1937 -Mus musculus non-functional IGKV15-103 IGK V FALSE 1938 -Mus musculus non-functional IGKV15-97 IGK V FALSE 1939 -Mus musculus functional IGKV16-104 IGK V TRUE 1940 -Mus musculus functional IGKV17-121 IGK V TRUE 1941 -Mus musculus functional IGKV17-127 IGK V TRUE 1942 -Mus musculus non-functional IGKV17-134 IGK V FALSE 1943 -Mus musculus non-functional IGKV17/OR16-3 IGK V FALSE 1944 -Mus musculus non-functional IGKV17/OR19-2 IGK V FALSE 1945 -Mus musculus functional IGKV18-36 IGK V TRUE 1946 -Mus musculus functional IGKV19-93 IGK V TRUE 1947 -Mus musculus non-functional IGKV2-105 IGK V FALSE 1948 -Mus musculus non-functional IGKV2-107 IGK V FALSE 1949 -Mus musculus functional IGKV2-109 IGK V TRUE 1950 -Mus musculus functional IGKV2-112 IGK V TRUE 1951 -Mus musculus non-functional IGKV2-113 IGK V FALSE 1952 -Mus musculus non-functional IGKV2-116 IGK V FALSE 1953 -Mus musculus functional IGKV2-137 IGK V TRUE 1954 -Mus musculus non-functional IGKV2-93-1 IGK V FALSE 1955 -Mus musculus non-functional IGKV2-95-1 IGK V FALSE 1956 -Mus musculus non-functional IGKV2-95-2 IGK V FALSE 1957 -Mus musculus functional IGKV2-a IGK V TRUE 1958 -Mus musculus non-functional IGKV2-f IGK V FALSE 1959 -Mus musculus functional IGKV20-101-2 IGK V TRUE 1960 -Mus musculus functional IGKV3-1 IGK V TRUE 1961 -Mus musculus functional IGKV3-10 IGK V TRUE 1962 -Mus musculus non-functional IGKV3-11 IGK V FALSE 1963 -Mus musculus functional IGKV3-12 IGK V TRUE 1964 -Mus musculus non-functional IGKV3-12-1 IGK V FALSE 1965 -Mus musculus functional IGKV3-2 IGK V TRUE 1966 -Mus musculus functional IGKV3-3 IGK V TRUE 1967 -Mus musculus functional IGKV3-4 IGK V TRUE 1968 -Mus musculus functional IGKV3-5 IGK V TRUE 1969 -Mus musculus non-functional IGKV3-6 IGK V FALSE 1970 -Mus musculus functional IGKV3-7 IGK V TRUE 1971 -Mus musculus non-functional IGKV3-8 IGK V FALSE 1972 -Mus musculus functional IGKV3-9 IGK V TRUE 1973 -Mus musculus functional IGKV4-50 IGK V TRUE 1974 -Mus musculus functional IGKV4-51 IGK V TRUE 1975 -Mus musculus non-functional IGKV4-52 IGK V FALSE 1976 -Mus musculus functional IGKV4-53 IGK V TRUE 1977 -Mus musculus non-functional IGKV4-54 IGK V FALSE 1978 -Mus musculus functional IGKV4-55 IGK V TRUE 1979 -Mus musculus non-functional IGKV4-56 IGK V FALSE 1980 -Mus musculus functional IGKV4-57 IGK V TRUE 1981 -Mus musculus functional IGKV4-57-1 IGK V TRUE 1982 -Mus musculus functional IGKV4-58 IGK V TRUE 1983 -Mus musculus functional IGKV4-59 IGK V TRUE 1984 -Mus musculus non-functional IGKV4-60 IGK V FALSE 1985 -Mus musculus functional IGKV4-61 IGK V TRUE 1986 -Mus musculus non-functional IGKV4-62 IGK V FALSE 1987 -Mus musculus functional IGKV4-63 IGK V TRUE 1988 -Mus musculus non-functional IGKV4-65 IGK V FALSE 1989 -Mus musculus functional IGKV4-68 IGK V TRUE 1990 -Mus musculus functional IGKV4-69 IGK V TRUE 1991 -Mus musculus functional IGKV4-70 IGK V TRUE 1992 -Mus musculus functional IGKV4-71 IGK V TRUE 1993 -Mus musculus functional IGKV4-72 IGK V TRUE 1994 -Mus musculus functional IGKV4-73 IGK V TRUE 1995 -Mus musculus functional IGKV4-74 IGK V TRUE 1996 -Mus musculus non-functional IGKV4-75 IGK V FALSE 1997 -Mus musculus non-functional IGKV4-77 IGK V FALSE 1998 -Mus musculus functional IGKV4-78 IGK V TRUE 1999 -Mus musculus functional IGKV4-79 IGK V TRUE 2000 -Mus musculus functional IGKV4-80 IGK V TRUE 2001 -Mus musculus functional IGKV4-81 IGK V TRUE 2002 -Mus musculus non-functional IGKV4-83 IGK V FALSE 2003 -Mus musculus functional IGKV4-86 IGK V TRUE 2004 -Mus musculus functional IGKV4-90 IGK V TRUE 2005 -Mus musculus functional IGKV4-91 IGK V TRUE 2006 -Mus musculus functional IGKV4-92 IGK V TRUE 2007 -Mus musculus functional IGKV5-37 IGK V TRUE 2008 -Mus musculus functional IGKV5-39 IGK V TRUE 2009 -Mus musculus non-functional IGKV5-40-1 IGK V FALSE 2010 -Mus musculus functional IGKV5-43 IGK V TRUE 2011 -Mus musculus functional IGKV5-45 IGK V TRUE 2012 -Mus musculus functional IGKV5-48 IGK V TRUE 2013 -Mus musculus non-functional IGKV6-13 IGK V FALSE 2014 -Mus musculus functional IGKV6-14 IGK V TRUE 2015 -Mus musculus functional IGKV6-15 IGK V TRUE 2016 -Mus musculus functional IGKV6-17 IGK V TRUE 2017 -Mus musculus functional IGKV6-20 IGK V TRUE 2018 -Mus musculus functional IGKV6-23 IGK V TRUE 2019 -Mus musculus functional IGKV6-25 IGK V TRUE 2020 -Mus musculus functional IGKV6-29 IGK V TRUE 2021 -Mus musculus functional IGKV6-32 IGK V TRUE 2022 -Mus musculus functional IGKV6-b IGK V TRUE 2023 -Mus musculus functional IGKV6-c IGK V TRUE 2024 -Mus musculus functional IGKV6-d IGK V TRUE 2025 -Mus musculus functional IGKV7-33 IGK V TRUE 2026 -Mus musculus functional IGKV8-16 IGK V TRUE 2027 -Mus musculus non-functional IGKV8-18 IGK V FALSE 2028 -Mus musculus functional IGKV8-19 IGK V TRUE 2029 -Mus musculus functional IGKV8-21 IGK V TRUE 2030 -Mus musculus non-functional IGKV8-22 IGK V FALSE 2031 -Mus musculus non-functional IGKV8-23-1 IGK V FALSE 2032 -Mus musculus functional IGKV8-24 IGK V TRUE 2033 -Mus musculus non-functional IGKV8-26 IGK V FALSE 2034 -Mus musculus functional IGKV8-27 IGK V TRUE 2035 -Mus musculus functional IGKV8-28 IGK V TRUE 2036 -Mus musculus functional IGKV8-30 IGK V TRUE 2037 -Mus musculus non-functional IGKV8-31 IGK V FALSE 2038 -Mus musculus functional IGKV8-34 IGK V TRUE 2039 -Mus musculus non-functional IGKV9-119 IGK V FALSE 2040 -Mus musculus functional IGKV9-120 IGK V TRUE 2041 -Mus musculus functional IGKV9-123 IGK V TRUE 2042 -Mus musculus functional IGKV9-124 IGK V TRUE 2043 -Mus musculus non-functional IGKV9-128 IGK V FALSE 2044 -Mus musculus non-functional IGKV9-129 IGK V FALSE 2045 -Mus musculus functional IGLJ1 IGL J TRUE 2046 -Mus musculus functional IGLJ2 IGL J TRUE 2047 -Mus musculus functional IGLJ3 IGL J TRUE 2048 -Mus musculus non-functional IGLJ3P IGL J FALSE 2049 -Mus musculus non-functional IGLJ4 IGL J FALSE 2050 -Mus musculus functional IGLV1 IGL V TRUE 2051 -Mus musculus functional IGLV2 IGL V TRUE 2052 -Mus musculus functional IGLV3 IGL V TRUE 2053 -Mus musculus non-functional IGLV4 IGL V FALSE 2054 -Mus musculus non-functional IGLV5 IGL V FALSE 2055 -Mus musculus non-functional IGLV6 IGL V FALSE 2056 -Mus musculus non-functional IGLV7 IGL V FALSE 2057 -Mus musculus non-functional IGLV8 IGL V FALSE 2058 -Mus musculus non-functional TRAJ1 TRA J FALSE 2059 -Mus musculus functional TRAJ11 TRA J TRUE 2060 -Mus musculus functional TRAJ12 TRA J TRUE 2061 -Mus musculus functional TRAJ13 TRA J TRUE 2062 -Mus musculus non-functional TRAJ14 TRA J FALSE 2063 -Mus musculus functional TRAJ15 TRA J TRUE 2064 -Mus musculus functional TRAJ16 TRA J TRUE 2065 -Mus musculus functional TRAJ17 TRA J TRUE 2066 -Mus musculus functional TRAJ18 TRA J TRUE 2067 -Mus musculus non-functional TRAJ19 TRA J FALSE 2068 -Mus musculus functional TRAJ2 TRA J TRUE 2069 -Mus musculus non-functional TRAJ20 TRA J FALSE 2070 -Mus musculus functional TRAJ21 TRA J TRUE 2071 -Mus musculus functional TRAJ22 TRA J TRUE 2072 -Mus musculus functional TRAJ23 TRA J TRUE 2073 -Mus musculus functional TRAJ24 TRA J TRUE 2074 -Mus musculus non-functional TRAJ25 TRA J FALSE 2075 -Mus musculus functional TRAJ26 TRA J TRUE 2076 -Mus musculus functional TRAJ27 TRA J TRUE 2077 -Mus musculus functional TRAJ28 TRA J TRUE 2078 -Mus musculus non-functional TRAJ29 TRA J FALSE 2079 -Mus musculus non-functional TRAJ3 TRA J FALSE 2080 -Mus musculus functional TRAJ30 TRA J TRUE 2081 -Mus musculus functional TRAJ31 TRA J TRUE 2082 -Mus musculus functional TRAJ32 TRA J TRUE 2083 -Mus musculus functional TRAJ33 TRA J TRUE 2084 -Mus musculus functional TRAJ34 TRA J TRUE 2085 -Mus musculus functional TRAJ35 TRA J TRUE 2086 -Mus musculus non-functional TRAJ36 TRA J FALSE 2087 -Mus musculus functional TRAJ37 TRA J TRUE 2088 -Mus musculus functional TRAJ38 TRA J TRUE 2089 -Mus musculus functional TRAJ39 TRA J TRUE 2090 -Mus musculus non-functional TRAJ4 TRA J FALSE 2091 -Mus musculus functional TRAJ40 TRA J TRUE 2092 -Mus musculus non-functional TRAJ41 TRA J FALSE 2093 -Mus musculus functional TRAJ42 TRA J TRUE 2094 -Mus musculus functional TRAJ43 TRA J TRUE 2095 -Mus musculus non-functional TRAJ44 TRA J FALSE 2096 -Mus musculus functional TRAJ45 TRA J TRUE 2097 -Mus musculus non-functional TRAJ46 TRA J FALSE 2098 -Mus musculus non-functional TRAJ47 TRA J FALSE 2099 -Mus musculus functional TRAJ48 TRA J TRUE 2100 -Mus musculus functional TRAJ49 TRA J TRUE 2101 -Mus musculus functional TRAJ5 TRA J TRUE 2102 -Mus musculus functional TRAJ50 TRA J TRUE 2103 -Mus musculus non-functional TRAJ51 TRA J FALSE 2104 -Mus musculus functional TRAJ52 TRA J TRUE 2105 -Mus musculus functional TRAJ53 TRA J TRUE 2106 -Mus musculus non-functional TRAJ54 TRA J FALSE 2107 -Mus musculus non-functional TRAJ55 TRA J FALSE 2108 -Mus musculus functional TRAJ56 TRA J TRUE 2109 -Mus musculus functional TRAJ57 TRA J TRUE 2110 -Mus musculus functional TRAJ58 TRA J TRUE 2111 -Mus musculus non-functional TRAJ59 TRA J FALSE 2112 -Mus musculus functional TRAJ6 TRA J TRUE 2113 -Mus musculus non-functional TRAJ60 TRA J FALSE 2114 -Mus musculus non-functional TRAJ61 TRA J FALSE 2115 -Mus musculus non-functional TRAJ7 TRA J FALSE 2116 -Mus musculus non-functional TRAJ8 TRA J FALSE 2117 -Mus musculus functional TRAJ9 TRA J TRUE 2118 -Mus musculus functional TRAV1 TRA V TRUE 2119 -Mus musculus functional TRAV10 TRA V TRUE 2120 -Mus musculus functional TRAV10D TRA V TRUE 2121 -Mus musculus functional TRAV10N TRA V TRUE 2122 -Mus musculus functional TRAV11 TRA V TRUE 2123 -Mus musculus functional TRAV11D TRA V TRUE 2124 -Mus musculus non-functional TRAV11N TRA V FALSE 2125 -Mus musculus functional TRAV12-1 TRA V TRUE 2126 -Mus musculus functional TRAV12-2 TRA V TRUE 2127 -Mus musculus functional TRAV12-3 TRA V TRUE 2128 -Mus musculus non-functional TRAV12-4 TRA V FALSE 2129 -Mus musculus functional TRAV12D-1 TRA V TRUE 2130 -Mus musculus functional TRAV12D-2 TRA V TRUE 2131 -Mus musculus functional TRAV12D-3 TRA V TRUE 2132 -Mus musculus functional TRAV12N-1 TRA V TRUE 2133 -Mus musculus functional TRAV12N-2 TRA V TRUE 2134 -Mus musculus functional TRAV12N-3 TRA V TRUE 2135 -Mus musculus functional TRAV13-1 TRA V TRUE 2136 -Mus musculus functional TRAV13-2 TRA V TRUE 2137 -Mus musculus functional TRAV13-3 TRA V TRUE 2138 -Mus musculus functional TRAV13-4/DV7 TRA V TRUE 2139 -Mus musculus functional TRAV13-5 TRA V TRUE 2140 -Mus musculus functional TRAV13D-1 TRA V TRUE 2141 -Mus musculus functional TRAV13D-2 TRA V TRUE 2142 -Mus musculus functional TRAV13D-3 TRA V TRUE 2143 -Mus musculus functional TRAV13D-4 TRA V TRUE 2144 -Mus musculus functional TRAV13N-1 TRA V TRUE 2145 -Mus musculus functional TRAV13N-2 TRA V TRUE 2146 -Mus musculus functional TRAV13N-3 TRA V TRUE 2147 -Mus musculus functional TRAV13N-4 TRA V TRUE 2148 -Mus musculus functional TRAV14-1 TRA V TRUE 2149 -Mus musculus functional TRAV14-2 TRA V TRUE 2150 -Mus musculus functional TRAV14-3 TRA V TRUE 2151 -Mus musculus functional TRAV14D-1 TRA V TRUE 2152 -Mus musculus functional TRAV14D-2 TRA V TRUE 2153 -Mus musculus functional TRAV14D-3/DV8 TRA V TRUE 2154 -Mus musculus functional TRAV14N-1 TRA V TRUE 2155 -Mus musculus functional TRAV14N-2 TRA V TRUE 2156 -Mus musculus functional TRAV14N-3 TRA V TRUE 2157 -Mus musculus functional TRAV15-1/DV6-1 TRA V TRUE 2158 -Mus musculus functional TRAV15-2/DV6-2 TRA V TRUE 2159 -Mus musculus non-functional TRAV15-3 TRA V FALSE 2160 -Mus musculus functional TRAV15D-1/DV6D-1 TRA V TRUE 2161 -Mus musculus functional TRAV15D-2/DV6D-2 TRA V TRUE 2162 -Mus musculus non-functional TRAV15D-3 TRA V FALSE 2163 -Mus musculus functional TRAV15N-1 TRA V TRUE 2164 -Mus musculus functional TRAV15N-2 TRA V TRUE 2165 -Mus musculus non-functional TRAV15N-3 TRA V FALSE 2166 -Mus musculus functional TRAV16 TRA V TRUE 2167 -Mus musculus functional TRAV16D/DV11 TRA V TRUE 2168 -Mus musculus functional TRAV16N TRA V TRUE 2169 -Mus musculus functional TRAV17 TRA V TRUE 2170 -Mus musculus non-functional TRAV18 TRA V FALSE 2171 -Mus musculus functional TRAV19 TRA V TRUE 2172 -Mus musculus functional TRAV2 TRA V TRUE 2173 -Mus musculus non-functional TRAV20 TRA V FALSE 2174 -Mus musculus functional TRAV21/DV12 TRA V TRUE 2175 -Mus musculus non-functional TRAV22 TRA V FALSE 2176 -Mus musculus non-functional TRAV23 TRA V FALSE 2177 -Mus musculus functional TRAV3-1 TRA V TRUE 2178 -Mus musculus non-functional TRAV3-2 TRA V FALSE 2179 -Mus musculus functional TRAV3-3 TRA V TRUE 2180 -Mus musculus functional TRAV3-4 TRA V TRUE 2181 -Mus musculus non-functional TRAV3D-2 TRA V FALSE 2182 -Mus musculus functional TRAV3D-3 TRA V TRUE 2183 -Mus musculus non-functional TRAV3N-2 TRA V FALSE 2184 -Mus musculus functional TRAV3N-3 TRA V TRUE 2185 -Mus musculus non-functional TRAV4-1 TRA V FALSE 2186 -Mus musculus functional TRAV4-2 TRA V TRUE 2187 -Mus musculus functional TRAV4-3 TRA V TRUE 2188 -Mus musculus functional TRAV4-4/DV10 TRA V TRUE 2189 -Mus musculus non-functional TRAV4D-2 TRA V FALSE 2190 -Mus musculus functional TRAV4D-3 TRA V TRUE 2191 -Mus musculus functional TRAV4D-4 TRA V TRUE 2192 -Mus musculus functional TRAV4N-3 TRA V TRUE 2193 -Mus musculus functional TRAV4N-4 TRA V TRUE 2194 -Mus musculus functional TRAV5-1 TRA V TRUE 2195 -Mus musculus non-functional TRAV5-2 TRA V FALSE 2196 -Mus musculus non-functional TRAV5-3 TRA V FALSE 2197 -Mus musculus non-functional TRAV5-4 TRA V FALSE 2198 -Mus musculus non-functional TRAV5D-2 TRA V FALSE 2199 -Mus musculus non-functional TRAV5D-3 TRA V FALSE 2200 -Mus musculus functional TRAV5D-4 TRA V TRUE 2201 -Mus musculus non-functional TRAV5N-2 TRA V FALSE 2202 -Mus musculus non-functional TRAV5N-3 TRA V FALSE 2203 -Mus musculus functional TRAV5N-4 TRA V TRUE 2204 -Mus musculus functional TRAV6-1 TRA V TRUE 2205 -Mus musculus functional TRAV6-2 TRA V TRUE 2206 -Mus musculus functional TRAV6-3 TRA V TRUE 2207 -Mus musculus functional TRAV6-4 TRA V TRUE 2208 -Mus musculus functional TRAV6-5 TRA V TRUE 2209 -Mus musculus functional TRAV6-6 TRA V TRUE 2210 -Mus musculus functional TRAV6-7/DV9 TRA V TRUE 2211 -Mus musculus functional TRAV6D-3 TRA V TRUE 2212 -Mus musculus functional TRAV6D-4 TRA V TRUE 2213 -Mus musculus functional TRAV6D-5 TRA V TRUE 2214 -Mus musculus functional TRAV6D-6 TRA V TRUE 2215 -Mus musculus functional TRAV6D-7 TRA V TRUE 2216 -Mus musculus functional TRAV6N-5 TRA V TRUE 2217 -Mus musculus functional TRAV6N-6 TRA V TRUE 2218 -Mus musculus functional TRAV6N-7 TRA V TRUE 2219 -Mus musculus functional TRAV7-1 TRA V TRUE 2220 -Mus musculus functional TRAV7-2 TRA V TRUE 2221 -Mus musculus functional TRAV7-3 TRA V TRUE 2222 -Mus musculus functional TRAV7-4 TRA V TRUE 2223 -Mus musculus functional TRAV7-5 TRA V TRUE 2224 -Mus musculus functional TRAV7-6 TRA V TRUE 2225 -Mus musculus functional TRAV7D-2 TRA V TRUE 2226 -Mus musculus functional TRAV7D-3 TRA V TRUE 2227 -Mus musculus functional TRAV7D-4 TRA V TRUE 2228 -Mus musculus functional TRAV7D-5 TRA V TRUE 2229 -Mus musculus functional TRAV7D-6 TRA V TRUE 2230 -Mus musculus functional TRAV7N-4 TRA V TRUE 2231 -Mus musculus functional TRAV7N-5 TRA V TRUE 2232 -Mus musculus functional TRAV7N-6 TRA V TRUE 2233 -Mus musculus functional TRAV8-1 TRA V TRUE 2234 -Mus musculus functional TRAV8-2 TRA V TRUE 2235 -Mus musculus functional TRAV8D-1 TRA V TRUE 2236 -Mus musculus functional TRAV8D-2 TRA V TRUE 2237 -Mus musculus functional TRAV8N-2 TRA V TRUE 2238 -Mus musculus functional TRAV9-1 TRA V TRUE 2239 -Mus musculus functional TRAV9-2 TRA V TRUE 2240 -Mus musculus functional TRAV9-3 TRA V TRUE 2241 -Mus musculus functional TRAV9-4 TRA V TRUE 2242 -Mus musculus functional TRAV9D-1 TRA V TRUE 2243 -Mus musculus functional TRAV9D-2 TRA V TRUE 2244 -Mus musculus functional TRAV9D-3 TRA V TRUE 2245 -Mus musculus functional TRAV9D-4 TRA V TRUE 2246 -Mus musculus non-functional TRAV9N-1 TRA V FALSE 2247 -Mus musculus functional TRAV9N-2 TRA V TRUE 2248 -Mus musculus functional TRAV9N-3 TRA V TRUE 2249 -Mus musculus functional TRAV9N-4 TRA V TRUE 2250 -Mus musculus functional TRBD1 TRB D TRUE 2251 -Mus musculus functional TRBD2 TRB D TRUE 2252 -Mus musculus functional TRBJ1-1 TRB J TRUE 2253 -Mus musculus functional TRBJ1-2 TRB J TRUE 2254 -Mus musculus functional TRBJ1-3 TRB J TRUE 2255 -Mus musculus functional TRBJ1-4 TRB J TRUE 2256 -Mus musculus functional TRBJ1-5 TRB J TRUE 2257 -Mus musculus non-functional TRBJ1-6 TRB J FALSE 2258 -Mus musculus non-functional TRBJ1-7 TRB J FALSE 2259 -Mus musculus functional TRBJ2-1 TRB J TRUE 2260 -Mus musculus functional TRBJ2-2 TRB J TRUE 2261 -Mus musculus functional TRBJ2-3 TRB J TRUE 2262 -Mus musculus functional TRBJ2-4 TRB J TRUE 2263 -Mus musculus functional TRBJ2-5 TRB J TRUE 2264 -Mus musculus non-functional TRBJ2-6 TRB J FALSE 2265 -Mus musculus functional TRBJ2-7 TRB J TRUE 2266 -Mus musculus functional TRBV1 TRB V TRUE 2267 -Mus musculus non-functional TRBV10 TRB V FALSE 2268 -Mus musculus non-functional TRBV11 TRB V FALSE 2269 -Mus musculus functional TRBV12-1 TRB V TRUE 2270 -Mus musculus functional TRBV12-2 TRB V TRUE 2271 -Mus musculus non-functional TRBV12-3 TRB V FALSE 2272 -Mus musculus functional TRBV13-1 TRB V TRUE 2273 -Mus musculus functional TRBV13-2 TRB V TRUE 2274 -Mus musculus functional TRBV13-3 TRB V TRUE 2275 -Mus musculus functional TRBV14 TRB V TRUE 2276 -Mus musculus functional TRBV15 TRB V TRUE 2277 -Mus musculus functional TRBV16 TRB V TRUE 2278 -Mus musculus functional TRBV17 TRB V TRUE 2279 -Mus musculus non-functional TRBV18 TRB V FALSE 2280 -Mus musculus functional TRBV19 TRB V TRUE 2281 -Mus musculus functional TRBV2 TRB V TRUE 2282 -Mus musculus functional TRBV20 TRB V TRUE 2283 -Mus musculus non-functional TRBV21 TRB V FALSE 2284 -Mus musculus non-functional TRBV22 TRB V FALSE 2285 -Mus musculus functional TRBV23 TRB V TRUE 2286 -Mus musculus functional TRBV24 TRB V TRUE 2287 -Mus musculus non-functional TRBV25 TRB V FALSE 2288 -Mus musculus functional TRBV26 TRB V TRUE 2289 -Mus musculus non-functional TRBV27 TRB V FALSE 2290 -Mus musculus non-functional TRBV28 TRB V FALSE 2291 -Mus musculus functional TRBV29 TRB V TRUE 2292 -Mus musculus functional TRBV3 TRB V TRUE 2293 -Mus musculus functional TRBV30 TRB V TRUE 2294 -Mus musculus functional TRBV31 TRB V TRUE 2295 -Mus musculus functional TRBV4 TRB V TRUE 2296 -Mus musculus functional TRBV5 TRB V TRUE 2297 -Mus musculus non-functional TRBV6 TRB V FALSE 2298 -Mus musculus non-functional TRBV7 TRB V FALSE 2299 -Mus musculus non-functional TRBV8 TRB V FALSE 2300 -Mus musculus non-functional TRBV9 TRB V FALSE 2301 -Mus musculus functional TRDD1 TRD D TRUE 2302 -Mus musculus functional TRDD2 TRD D TRUE 2303 -Mus musculus functional TRDJ1 TRD J TRUE 2304 -Mus musculus functional TRDJ2 TRD J TRUE 2305 -Mus musculus functional TRDV1 TRD V TRUE 2306 -Mus musculus functional TRDV2-1 TRD V TRUE 2307 -Mus musculus functional TRDV2-2 TRD V TRUE 2308 -Mus musculus non-functional TRDV3 TRD V FALSE 2309 -Mus musculus functional TRDV4 TRD V TRUE 2310 -Mus musculus functional TRDV5 TRD V TRUE 2311 -Mus musculus functional TRGJ1 TRG J TRUE 2312 -Mus musculus functional TRGJ2 TRG J TRUE 2313 -Mus musculus functional TRGJ3 TRG J TRUE 2314 -Mus musculus functional TRGJ4 TRG J TRUE 2315 -Mus musculus functional TRGV1 TRG V TRUE 2316 -Mus musculus functional TRGV2 TRG V TRUE 2317 -Mus musculus functional TRGV3 TRG V TRUE 2318 -Mus musculus functional TRGV4 TRG V TRUE 2319 -Mus musculus functional TRGV5 TRG V TRUE 2320 -Mus musculus functional TRGV6 TRG V TRUE 2321 -Mus musculus functional TRGV7 TRG V TRUE 2322 -Mus spretus functional IGKV10-94 IGK V TRUE 2323 -Mus spretus functional IGKV10-96 IGK V TRUE 2324 -Mus spretus non-functional IGLJ4 IGL J FALSE 2325 -Mus spretus non-functional IGLJ5 IGL J FALSE 2326 -Mus spretus functional IGLV2 IGL V TRUE 2327 -Mus spretus functional IGLV3 IGL V TRUE 2328 -Mus spretus non-functional IGLV4 IGL V FALSE 2329 -Mus spretus non-functional IGLV8 IGL V FALSE 2330 -Oncorhynchus mykiss functional IGHD IGH D TRUE 2331 -Oncorhynchus mykiss functional IGHD1 IGH D TRUE 2332 -Oncorhynchus mykiss functional IGHD2 IGH D TRUE 2333 -Oncorhynchus mykiss functional IGHD3 IGH D TRUE 2334 -Oncorhynchus mykiss functional IGHD4 IGH D TRUE 2335 -Oncorhynchus mykiss functional IGHD5 IGH D TRUE 2336 -Oncorhynchus mykiss functional IGHD6 IGH D TRUE 2337 -Oncorhynchus mykiss functional IGHD7 IGH D TRUE 2338 -Oncorhynchus mykiss functional IGHD8 IGH D TRUE 2339 -Oncorhynchus mykiss functional IGHD9 IGH D TRUE 2340 -Oncorhynchus mykiss functional IGHJ1 IGH J TRUE 2341 -Oncorhynchus mykiss functional IGHJ2 IGH J TRUE 2342 -Oncorhynchus mykiss functional IGHJ3 IGH J TRUE 2343 -Oncorhynchus mykiss functional IGHJ4 IGH J TRUE 2344 -Oncorhynchus mykiss functional IGHJ5 IGH J TRUE 2345 -Oncorhynchus mykiss functional IGHJ6 IGH J TRUE 2346 -Oncorhynchus mykiss functional IGHJ7 IGH J TRUE 2347 -Oncorhynchus mykiss non-functional IGHV10S1 IGH V FALSE 2348 -Oncorhynchus mykiss non-functional IGHV11S1 IGH V FALSE 2349 -Oncorhynchus mykiss non-functional IGHV12S1 IGH V FALSE 2350 -Oncorhynchus mykiss non-functional IGHV13S1 IGH V FALSE 2351 -Oncorhynchus mykiss functional IGHV1S1 IGH V TRUE 2352 -Oncorhynchus mykiss functional IGHV1S2 IGH V TRUE 2353 -Oncorhynchus mykiss non-functional IGHV1S3 IGH V FALSE 2354 -Oncorhynchus mykiss non-functional IGHV1S4 IGH V FALSE 2355 -Oncorhynchus mykiss non-functional IGHV1S5 IGH V FALSE 2356 -Oncorhynchus mykiss non-functional IGHV1S6 IGH V FALSE 2357 -Oncorhynchus mykiss non-functional IGHV1S7 IGH V FALSE 2358 -Oncorhynchus mykiss non-functional IGHV2S1 IGH V FALSE 2359 -Oncorhynchus mykiss functional IGHV2S2 IGH V TRUE 2360 -Oncorhynchus mykiss non-functional IGHV2S3 IGH V FALSE 2361 -Oncorhynchus mykiss non-functional IGHV3S1 IGH V FALSE 2362 -Oncorhynchus mykiss non-functional IGHV3S2 IGH V FALSE 2363 -Oncorhynchus mykiss non-functional IGHV3S3 IGH V FALSE 2364 -Oncorhynchus mykiss non-functional IGHV3S4 IGH V FALSE 2365 -Oncorhynchus mykiss non-functional IGHV4S1 IGH V FALSE 2366 -Oncorhynchus mykiss non-functional IGHV5S1 IGH V FALSE 2367 -Oncorhynchus mykiss functional IGHV5S10 IGH V TRUE 2368 -Oncorhynchus mykiss non-functional IGHV5S2 IGH V FALSE 2369 -Oncorhynchus mykiss non-functional IGHV5S3 IGH V FALSE 2370 -Oncorhynchus mykiss non-functional IGHV5S4 IGH V FALSE 2371 -Oncorhynchus mykiss non-functional IGHV5S5 IGH V FALSE 2372 -Oncorhynchus mykiss non-functional IGHV5S6 IGH V FALSE 2373 -Oncorhynchus mykiss non-functional IGHV5S7 IGH V FALSE 2374 -Oncorhynchus mykiss non-functional IGHV5S8 IGH V FALSE 2375 -Oncorhynchus mykiss non-functional IGHV5S9 IGH V FALSE 2376 -Oncorhynchus mykiss functional IGHV6S1 IGH V TRUE 2377 -Oncorhynchus mykiss non-functional IGHV6S2 IGH V FALSE 2378 -Oncorhynchus mykiss non-functional IGHV6S3 IGH V FALSE 2379 -Oncorhynchus mykiss non-functional IGHV6S4 IGH V FALSE 2380 -Oncorhynchus mykiss non-functional IGHV6S5 IGH V FALSE 2381 -Oncorhynchus mykiss non-functional IGHV6S6 IGH V FALSE 2382 -Oncorhynchus mykiss non-functional IGHV6S7 IGH V FALSE 2383 -Oncorhynchus mykiss non-functional IGHV6S8 IGH V FALSE 2384 -Oncorhynchus mykiss non-functional IGHV6S9 IGH V FALSE 2385 -Oncorhynchus mykiss non-functional IGHV7S1 IGH V FALSE 2386 -Oncorhynchus mykiss non-functional IGHV8S1 IGH V FALSE 2387 -Oncorhynchus mykiss non-functional IGHV8S10 IGH V FALSE 2388 -Oncorhynchus mykiss non-functional IGHV8S11 IGH V FALSE 2389 -Oncorhynchus mykiss non-functional IGHV8S2 IGH V FALSE 2390 -Oncorhynchus mykiss non-functional IGHV8S3 IGH V FALSE 2391 -Oncorhynchus mykiss non-functional IGHV8S4 IGH V FALSE 2392 -Oncorhynchus mykiss non-functional IGHV8S5 IGH V FALSE 2393 -Oncorhynchus mykiss non-functional IGHV8S6 IGH V FALSE 2394 -Oncorhynchus mykiss functional IGHV8S7 IGH V TRUE 2395 -Oncorhynchus mykiss non-functional IGHV8S8 IGH V FALSE 2396 -Oncorhynchus mykiss non-functional IGHV8S9 IGH V FALSE 2397 -Oncorhynchus mykiss non-functional IGHV9S1 IGH V FALSE 2398 -Oncorhynchus mykiss non-functional IGHV9S2 IGH V FALSE 2399 -Oncorhynchus mykiss non-functional IGHV9S3 IGH V FALSE 2400 -Oncorhynchus mykiss non-functional IGHV9S4 IGH V FALSE 2401 -Oncorhynchus mykiss non-functional IGHV9S5 IGH V FALSE 2402 -Oncorhynchus mykiss non-functional IGHV9S6 IGH V FALSE 2403 -Oncorhynchus mykiss non-functional IGHV9S7 IGH V FALSE 2404 -Oncorhynchus mykiss functional TRBD1 TRB D TRUE 2405 -Oncorhynchus mykiss functional TRBJ1 TRB J TRUE 2406 -Oncorhynchus mykiss functional TRBJ10 TRB J TRUE 2407 -Oncorhynchus mykiss functional TRBJ2 TRB J TRUE 2408 -Oncorhynchus mykiss functional TRBJ3 TRB J TRUE 2409 -Oncorhynchus mykiss functional TRBJ4 TRB J TRUE 2410 -Oncorhynchus mykiss functional TRBJ5 TRB J TRUE 2411 -Oncorhynchus mykiss functional TRBJ6 TRB J TRUE 2412 -Oncorhynchus mykiss functional TRBJ7 TRB J TRUE 2413 -Oncorhynchus mykiss functional TRBJ8 TRB J TRUE 2414 -Oncorhynchus mykiss functional TRBJ9 TRB J TRUE 2415 -Oncorhynchus mykiss non-functional TRBV10S1 TRB V FALSE 2416 -Oncorhynchus mykiss non-functional TRBV1S1 TRB V FALSE 2417 -Oncorhynchus mykiss non-functional TRBV1S2 TRB V FALSE 2418 -Oncorhynchus mykiss non-functional TRBV1S3 TRB V FALSE 2419 -Oncorhynchus mykiss non-functional TRBV1S4 TRB V FALSE 2420 -Oncorhynchus mykiss non-functional TRBV1S5 TRB V FALSE 2421 -Oncorhynchus mykiss non-functional TRBV2S1 TRB V FALSE 2422 -Oncorhynchus mykiss non-functional TRBV2S10 TRB V FALSE 2423 -Oncorhynchus mykiss non-functional TRBV2S11 TRB V FALSE 2424 -Oncorhynchus mykiss non-functional TRBV2S12 TRB V FALSE 2425 -Oncorhynchus mykiss non-functional TRBV2S13 TRB V FALSE 2426 -Oncorhynchus mykiss non-functional TRBV2S14 TRB V FALSE 2427 -Oncorhynchus mykiss non-functional TRBV2S15 TRB V FALSE 2428 -Oncorhynchus mykiss non-functional TRBV2S16 TRB V FALSE 2429 -Oncorhynchus mykiss non-functional TRBV2S17 TRB V FALSE 2430 -Oncorhynchus mykiss non-functional TRBV2S18 TRB V FALSE 2431 -Oncorhynchus mykiss non-functional TRBV2S19 TRB V FALSE 2432 -Oncorhynchus mykiss non-functional TRBV2S2 TRB V FALSE 2433 -Oncorhynchus mykiss non-functional TRBV2S20 TRB V FALSE 2434 -Oncorhynchus mykiss non-functional TRBV2S21 TRB V FALSE 2435 -Oncorhynchus mykiss non-functional TRBV2S23 TRB V FALSE 2436 -Oncorhynchus mykiss non-functional TRBV2S24 TRB V FALSE 2437 -Oncorhynchus mykiss non-functional TRBV2S25 TRB V FALSE 2438 -Oncorhynchus mykiss non-functional TRBV2S26 TRB V FALSE 2439 -Oncorhynchus mykiss non-functional TRBV2S27 TRB V FALSE 2440 -Oncorhynchus mykiss non-functional TRBV2S3 TRB V FALSE 2441 -Oncorhynchus mykiss non-functional TRBV2S4 TRB V FALSE 2442 -Oncorhynchus mykiss non-functional TRBV2S5 TRB V FALSE 2443 -Oncorhynchus mykiss non-functional TRBV2S6 TRB V FALSE 2444 -Oncorhynchus mykiss non-functional TRBV2S7 TRB V FALSE 2445 -Oncorhynchus mykiss non-functional TRBV2S8 TRB V FALSE 2446 -Oncorhynchus mykiss non-functional TRBV2S9 TRB V FALSE 2447 -Oncorhynchus mykiss non-functional TRBV3S1 TRB V FALSE 2448 -Oncorhynchus mykiss non-functional TRBV3S2 TRB V FALSE 2449 -Oncorhynchus mykiss non-functional TRBV4S1 TRB V FALSE 2450 -Oncorhynchus mykiss non-functional TRBV5S1 TRB V FALSE 2451 -Oncorhynchus mykiss non-functional TRBV6S1 TRB V FALSE 2452 -Oncorhynchus mykiss non-functional TRBV7S1 TRB V FALSE 2453 -Oncorhynchus mykiss non-functional TRBV7S2 TRB V FALSE 2454 -Oncorhynchus mykiss non-functional TRBV7S3 TRB V FALSE 2455 -Oncorhynchus mykiss non-functional TRBV7S4 TRB V FALSE 2456 -Oncorhynchus mykiss non-functional TRBV8S1 TRB V FALSE 2457 -Oncorhynchus mykiss non-functional TRBV8S2 TRB V FALSE 2458 -Oncorhynchus mykiss non-functional TRBV8S3 TRB V FALSE 2459 -Oncorhynchus mykiss non-functional TRBV9S1 TRB V FALSE 2460 -Ornithorhynchus anatinus functional IGHD IGH D TRUE 2461 -Ornithorhynchus anatinus non-functional IGHD1 IGH D FALSE 2462 -Ornithorhynchus anatinus non-functional IGHD2 IGH D FALSE 2463 -Ornithorhynchus anatinus non-functional IGHD3 IGH D FALSE 2464 -Ornithorhynchus anatinus non-functional IGHJ1 IGH J FALSE 2465 -Ornithorhynchus anatinus functional IGHJ10 IGH J TRUE 2466 -Ornithorhynchus anatinus non-functional IGHJ11 IGH J FALSE 2467 -Ornithorhynchus anatinus non-functional IGHJ2 IGH J FALSE 2468 -Ornithorhynchus anatinus non-functional IGHJ3 IGH J FALSE 2469 -Ornithorhynchus anatinus non-functional IGHJ4 IGH J FALSE 2470 -Ornithorhynchus anatinus non-functional IGHJ5 IGH J FALSE 2471 -Ornithorhynchus anatinus non-functional IGHJ6 IGH J FALSE 2472 -Ornithorhynchus anatinus non-functional IGHJ7 IGH J FALSE 2473 -Ornithorhynchus anatinus non-functional IGHJ8 IGH J FALSE 2474 -Ornithorhynchus anatinus non-functional IGHJ9 IGH J FALSE 2475 -Oryctolagus cuniculus functional IGHD1-1 IGH D TRUE 2476 -Oryctolagus cuniculus functional IGHD2-1 IGH D TRUE 2477 -Oryctolagus cuniculus non-functional IGHD3-1 IGH D FALSE 2478 -Oryctolagus cuniculus functional IGHD3-2 IGH D TRUE 2479 -Oryctolagus cuniculus functional IGHD3-3 IGH D TRUE 2480 -Oryctolagus cuniculus functional IGHD4-1 IGH D TRUE 2481 -Oryctolagus cuniculus functional IGHD4-2 IGH D TRUE 2482 -Oryctolagus cuniculus functional IGHD5-1 IGH D TRUE 2483 -Oryctolagus cuniculus functional IGHD6-1 IGH D TRUE 2484 -Oryctolagus cuniculus functional IGHD7-1 IGH D TRUE 2485 -Oryctolagus cuniculus functional IGHD8-1 IGH D TRUE 2486 -Oryctolagus cuniculus functional IGHJ1 IGH J TRUE 2487 -Oryctolagus cuniculus functional IGHJ2 IGH J TRUE 2488 -Oryctolagus cuniculus functional IGHJ3 IGH J TRUE 2489 -Oryctolagus cuniculus functional IGHJ4 IGH J TRUE 2490 -Oryctolagus cuniculus functional IGHJ5 IGH J TRUE 2491 -Oryctolagus cuniculus functional IGHJ6 IGH J TRUE 2492 -Oryctolagus cuniculus functional IGHV1S1 IGH V TRUE 2493 -Oryctolagus cuniculus non-functional IGHV1S10 IGH V FALSE 2494 -Oryctolagus cuniculus non-functional IGHV1S11 IGH V FALSE 2495 -Oryctolagus cuniculus non-functional IGHV1S12 IGH V FALSE 2496 -Oryctolagus cuniculus functional IGHV1S13 IGH V TRUE 2497 -Oryctolagus cuniculus non-functional IGHV1S14 IGH V FALSE 2498 -Oryctolagus cuniculus non-functional IGHV1S15 IGH V FALSE 2499 -Oryctolagus cuniculus non-functional IGHV1S16 IGH V FALSE 2500 -Oryctolagus cuniculus functional IGHV1S17 IGH V TRUE 2501 -Oryctolagus cuniculus non-functional IGHV1S18 IGH V FALSE 2502 -Oryctolagus cuniculus non-functional IGHV1S19 IGH V FALSE 2503 -Oryctolagus cuniculus non-functional IGHV1S2 IGH V FALSE 2504 -Oryctolagus cuniculus non-functional IGHV1S20 IGH V FALSE 2505 -Oryctolagus cuniculus non-functional IGHV1S21 IGH V FALSE 2506 -Oryctolagus cuniculus non-functional IGHV1S22 IGH V FALSE 2507 -Oryctolagus cuniculus non-functional IGHV1S23 IGH V FALSE 2508 -Oryctolagus cuniculus functional IGHV1S24 IGH V TRUE 2509 -Oryctolagus cuniculus functional IGHV1S25 IGH V TRUE 2510 -Oryctolagus cuniculus functional IGHV1S26 IGH V TRUE 2511 -Oryctolagus cuniculus non-functional IGHV1S27 IGH V FALSE 2512 -Oryctolagus cuniculus functional IGHV1S28 IGH V TRUE 2513 -Oryctolagus cuniculus non-functional IGHV1S29 IGH V FALSE 2514 -Oryctolagus cuniculus non-functional IGHV1S3 IGH V FALSE 2515 -Oryctolagus cuniculus non-functional IGHV1S30 IGH V FALSE 2516 -Oryctolagus cuniculus functional IGHV1S31 IGH V TRUE 2517 -Oryctolagus cuniculus non-functional IGHV1S32 IGH V FALSE 2518 -Oryctolagus cuniculus functional IGHV1S33 IGH V TRUE 2519 -Oryctolagus cuniculus functional IGHV1S34 IGH V TRUE 2520 -Oryctolagus cuniculus non-functional IGHV1S35 IGH V FALSE 2521 -Oryctolagus cuniculus functional IGHV1S36 IGH V TRUE 2522 -Oryctolagus cuniculus non-functional IGHV1S37 IGH V FALSE 2523 -Oryctolagus cuniculus non-functional IGHV1S39 IGH V FALSE 2524 -Oryctolagus cuniculus non-functional IGHV1S4 IGH V FALSE 2525 -Oryctolagus cuniculus functional IGHV1S40 IGH V TRUE 2526 -Oryctolagus cuniculus non-functional IGHV1S42 IGH V FALSE 2527 -Oryctolagus cuniculus functional IGHV1S43 IGH V TRUE 2528 -Oryctolagus cuniculus functional IGHV1S44 IGH V TRUE 2529 -Oryctolagus cuniculus functional IGHV1S45 IGH V TRUE 2530 -Oryctolagus cuniculus non-functional IGHV1S46 IGH V FALSE 2531 -Oryctolagus cuniculus functional IGHV1S47 IGH V TRUE 2532 -Oryctolagus cuniculus non-functional IGHV1S48 IGH V FALSE 2533 -Oryctolagus cuniculus functional IGHV1S49 IGH V TRUE 2534 -Oryctolagus cuniculus non-functional IGHV1S5 IGH V FALSE 2535 -Oryctolagus cuniculus functional IGHV1S50 IGH V TRUE 2536 -Oryctolagus cuniculus functional IGHV1S51 IGH V TRUE 2537 -Oryctolagus cuniculus functional IGHV1S52 IGH V TRUE 2538 -Oryctolagus cuniculus functional IGHV1S53 IGH V TRUE 2539 -Oryctolagus cuniculus functional IGHV1S54 IGH V TRUE 2540 -Oryctolagus cuniculus functional IGHV1S55 IGH V TRUE 2541 -Oryctolagus cuniculus functional IGHV1S56 IGH V TRUE 2542 -Oryctolagus cuniculus functional IGHV1S57 IGH V TRUE 2543 -Oryctolagus cuniculus functional IGHV1S58 IGH V TRUE 2544 -Oryctolagus cuniculus functional IGHV1S59 IGH V TRUE 2545 -Oryctolagus cuniculus non-functional IGHV1S6 IGH V FALSE 2546 -Oryctolagus cuniculus functional IGHV1S60 IGH V TRUE 2547 -Oryctolagus cuniculus functional IGHV1S61 IGH V TRUE 2548 -Oryctolagus cuniculus functional IGHV1S62 IGH V TRUE 2549 -Oryctolagus cuniculus functional IGHV1S63 IGH V TRUE 2550 -Oryctolagus cuniculus functional IGHV1S64 IGH V TRUE 2551 -Oryctolagus cuniculus functional IGHV1S65 IGH V TRUE 2552 -Oryctolagus cuniculus functional IGHV1S66 IGH V TRUE 2553 -Oryctolagus cuniculus functional IGHV1S67 IGH V TRUE 2554 -Oryctolagus cuniculus functional IGHV1S68 IGH V TRUE 2555 -Oryctolagus cuniculus functional IGHV1S69 IGH V TRUE 2556 -Oryctolagus cuniculus functional IGHV1S7 IGH V TRUE 2557 -Oryctolagus cuniculus functional IGHV1S8 IGH V TRUE 2558 -Oryctolagus cuniculus non-functional IGHV1S9 IGH V FALSE 2559 -Oryctolagus cuniculus functional IGKJ1-1 IGK J TRUE 2560 -Oryctolagus cuniculus functional IGKJ1-2 IGK J TRUE 2561 -Oryctolagus cuniculus non-functional IGKJ1-3 IGK J FALSE 2562 -Oryctolagus cuniculus non-functional IGKJ1-4 IGK J FALSE 2563 -Oryctolagus cuniculus non-functional IGKJ1-5 IGK J FALSE 2564 -Oryctolagus cuniculus functional IGKJ2-1 IGK J TRUE 2565 -Oryctolagus cuniculus functional IGKJ2-2 IGK J TRUE 2566 -Oryctolagus cuniculus functional IGKJ2-3 IGK J TRUE 2567 -Oryctolagus cuniculus functional IGKV1S1 IGK V TRUE 2568 -Oryctolagus cuniculus functional IGKV1S10 IGK V TRUE 2569 -Oryctolagus cuniculus functional IGKV1S11 IGK V TRUE 2570 -Oryctolagus cuniculus functional IGKV1S12 IGK V TRUE 2571 -Oryctolagus cuniculus non-functional IGKV1S13 IGK V FALSE 2572 -Oryctolagus cuniculus functional IGKV1S14 IGK V TRUE 2573 -Oryctolagus cuniculus functional IGKV1S15 IGK V TRUE 2574 -Oryctolagus cuniculus functional IGKV1S16 IGK V TRUE 2575 -Oryctolagus cuniculus functional IGKV1S17 IGK V TRUE 2576 -Oryctolagus cuniculus functional IGKV1S18 IGK V TRUE 2577 -Oryctolagus cuniculus functional IGKV1S19 IGK V TRUE 2578 -Oryctolagus cuniculus functional IGKV1S2 IGK V TRUE 2579 -Oryctolagus cuniculus functional IGKV1S20 IGK V TRUE 2580 -Oryctolagus cuniculus functional IGKV1S21 IGK V TRUE 2581 -Oryctolagus cuniculus functional IGKV1S22 IGK V TRUE 2582 -Oryctolagus cuniculus functional IGKV1S23 IGK V TRUE 2583 -Oryctolagus cuniculus functional IGKV1S24 IGK V TRUE 2584 -Oryctolagus cuniculus functional IGKV1S25 IGK V TRUE 2585 -Oryctolagus cuniculus functional IGKV1S26 IGK V TRUE 2586 -Oryctolagus cuniculus non-functional IGKV1S27 IGK V FALSE 2587 -Oryctolagus cuniculus non-functional IGKV1S28 IGK V FALSE 2588 -Oryctolagus cuniculus non-functional IGKV1S29 IGK V FALSE 2589 -Oryctolagus cuniculus functional IGKV1S3 IGK V TRUE 2590 -Oryctolagus cuniculus non-functional IGKV1S30 IGK V FALSE 2591 -Oryctolagus cuniculus non-functional IGKV1S31 IGK V FALSE 2592 -Oryctolagus cuniculus non-functional IGKV1S32 IGK V FALSE 2593 -Oryctolagus cuniculus non-functional IGKV1S33 IGK V FALSE 2594 -Oryctolagus cuniculus non-functional IGKV1S34 IGK V FALSE 2595 -Oryctolagus cuniculus non-functional IGKV1S35 IGK V FALSE 2596 -Oryctolagus cuniculus non-functional IGKV1S36 IGK V FALSE 2597 -Oryctolagus cuniculus non-functional IGKV1S37 IGK V FALSE 2598 -Oryctolagus cuniculus non-functional IGKV1S38 IGK V FALSE 2599 -Oryctolagus cuniculus non-functional IGKV1S39 IGK V FALSE 2600 -Oryctolagus cuniculus non-functional IGKV1S4 IGK V FALSE 2601 -Oryctolagus cuniculus non-functional IGKV1S40 IGK V FALSE 2602 -Oryctolagus cuniculus non-functional IGKV1S41 IGK V FALSE 2603 -Oryctolagus cuniculus non-functional IGKV1S42 IGK V FALSE 2604 -Oryctolagus cuniculus non-functional IGKV1S43 IGK V FALSE 2605 -Oryctolagus cuniculus non-functional IGKV1S44 IGK V FALSE 2606 -Oryctolagus cuniculus non-functional IGKV1S45 IGK V FALSE 2607 -Oryctolagus cuniculus non-functional IGKV1S46 IGK V FALSE 2608 -Oryctolagus cuniculus non-functional IGKV1S47 IGK V FALSE 2609 -Oryctolagus cuniculus non-functional IGKV1S48 IGK V FALSE 2610 -Oryctolagus cuniculus non-functional IGKV1S49 IGK V FALSE 2611 -Oryctolagus cuniculus functional IGKV1S5 IGK V TRUE 2612 -Oryctolagus cuniculus non-functional IGKV1S50 IGK V FALSE 2613 -Oryctolagus cuniculus non-functional IGKV1S51 IGK V FALSE 2614 -Oryctolagus cuniculus non-functional IGKV1S52 IGK V FALSE 2615 -Oryctolagus cuniculus non-functional IGKV1S53 IGK V FALSE 2616 -Oryctolagus cuniculus non-functional IGKV1S54 IGK V FALSE 2617 -Oryctolagus cuniculus non-functional IGKV1S55 IGK V FALSE 2618 -Oryctolagus cuniculus non-functional IGKV1S56 IGK V FALSE 2619 -Oryctolagus cuniculus non-functional IGKV1S57 IGK V FALSE 2620 -Oryctolagus cuniculus non-functional IGKV1S58 IGK V FALSE 2621 -Oryctolagus cuniculus non-functional IGKV1S59 IGK V FALSE 2622 -Oryctolagus cuniculus functional IGKV1S6 IGK V TRUE 2623 -Oryctolagus cuniculus non-functional IGKV1S60 IGK V FALSE 2624 -Oryctolagus cuniculus non-functional IGKV1S61 IGK V FALSE 2625 -Oryctolagus cuniculus non-functional IGKV1S62 IGK V FALSE 2626 -Oryctolagus cuniculus non-functional IGKV1S63 IGK V FALSE 2627 -Oryctolagus cuniculus non-functional IGKV1S64 IGK V FALSE 2628 -Oryctolagus cuniculus non-functional IGKV1S65 IGK V FALSE 2629 -Oryctolagus cuniculus non-functional IGKV1S66 IGK V FALSE 2630 -Oryctolagus cuniculus non-functional IGKV1S67 IGK V FALSE 2631 -Oryctolagus cuniculus non-functional IGKV1S68 IGK V FALSE 2632 -Oryctolagus cuniculus functional IGKV1S7 IGK V TRUE 2633 -Oryctolagus cuniculus functional IGKV1S8 IGK V TRUE 2634 -Oryctolagus cuniculus functional IGKV1S9 IGK V TRUE 2635 -Oryctolagus cuniculus non-functional IGLJ1 IGL J FALSE 2636 -Oryctolagus cuniculus non-functional IGLJ3 IGL J FALSE 2637 -Oryctolagus cuniculus functional IGLJ5 IGL J TRUE 2638 -Oryctolagus cuniculus functional IGLJ6 IGL J TRUE 2639 -Oryctolagus cuniculus non-functional IGLV1S1 IGL V FALSE 2640 -Oryctolagus cuniculus non-functional IGLV1S2 IGL V FALSE 2641 -Oryctolagus cuniculus non-functional IGLV1S3 IGL V FALSE 2642 -Oryctolagus cuniculus functional IGLV2S1 IGL V TRUE 2643 -Oryctolagus cuniculus functional IGLV2S2 IGL V TRUE 2644 -Oryctolagus cuniculus non-functional IGLV2S3 IGL V FALSE 2645 -Oryctolagus cuniculus non-functional IGLV2S4 IGL V FALSE 2646 -Oryctolagus cuniculus non-functional IGLV2S5 IGL V FALSE 2647 -Oryctolagus cuniculus non-functional IGLV3S1 IGL V FALSE 2648 -Oryctolagus cuniculus non-functional IGLV3S10 IGL V FALSE 2649 -Oryctolagus cuniculus functional IGLV3S2 IGL V TRUE 2650 -Oryctolagus cuniculus non-functional IGLV3S3 IGL V FALSE 2651 -Oryctolagus cuniculus non-functional IGLV3S4 IGL V FALSE 2652 -Oryctolagus cuniculus non-functional IGLV3S5 IGL V FALSE 2653 -Oryctolagus cuniculus functional IGLV3S6 IGL V TRUE 2654 -Oryctolagus cuniculus functional IGLV3S7 IGL V TRUE 2655 -Oryctolagus cuniculus non-functional IGLV3S8 IGL V FALSE 2656 -Oryctolagus cuniculus functional IGLV3S9 IGL V TRUE 2657 -Oryctolagus cuniculus non-functional IGLV4S1 IGL V FALSE 2658 -Oryctolagus cuniculus non-functional IGLV4S2 IGL V FALSE 2659 -Oryctolagus cuniculus functional IGLV4S3 IGL V TRUE 2660 -Oryctolagus cuniculus functional IGLV4S4 IGL V TRUE 2661 -Oryctolagus cuniculus non-functional IGLV4S5 IGL V FALSE 2662 -Oryctolagus cuniculus non-functional IGLV4S6 IGL V FALSE 2663 -Oryctolagus cuniculus non-functional IGLV4S7 IGL V FALSE 2664 -Oryctolagus cuniculus functional IGLV5S1 IGL V TRUE 2665 -Oryctolagus cuniculus functional IGLV5S10 IGL V TRUE 2666 -Oryctolagus cuniculus functional IGLV5S2 IGL V TRUE 2667 -Oryctolagus cuniculus functional IGLV5S3 IGL V TRUE 2668 -Oryctolagus cuniculus non-functional IGLV5S4 IGL V FALSE 2669 -Oryctolagus cuniculus functional IGLV5S5 IGL V TRUE 2670 -Oryctolagus cuniculus functional IGLV5S6 IGL V TRUE 2671 -Oryctolagus cuniculus non-functional IGLV5S7 IGL V FALSE 2672 -Oryctolagus cuniculus non-functional IGLV5S8 IGL V FALSE 2673 -Oryctolagus cuniculus functional IGLV5S9 IGL V TRUE 2674 -Oryctolagus cuniculus functional IGLV6S1 IGL V TRUE 2675 -Oryctolagus cuniculus non-functional IGLV6S2 IGL V FALSE 2676 -Oryctolagus cuniculus functional IGLV6S3 IGL V TRUE 2677 -Oryctolagus cuniculus non-functional IGLV6S4 IGL V FALSE 2678 -Oryctolagus cuniculus functional IGLV6S5 IGL V TRUE 2679 -Oryctolagus cuniculus functional IGLV6S6 IGL V TRUE 2680 -Oryctolagus cuniculus functional IGLV6S7 IGL V TRUE 2681 -Oryctolagus cuniculus non-functional IGLV7S1 IGL V FALSE 2682 -Rattus norvegicus functional IGHD IGH D TRUE 2683 -Rattus norvegicus functional IGHD1-1 IGH D TRUE 2684 -Rattus norvegicus functional IGHD1-2 IGH D TRUE 2685 -Rattus norvegicus functional IGHD1-3 IGH D TRUE 2686 -Rattus norvegicus functional IGHD1-4 IGH D TRUE 2687 -Rattus norvegicus functional IGHD1-5 IGH D TRUE 2688 -Rattus norvegicus functional IGHD1-6 IGH D TRUE 2689 -Rattus norvegicus functional IGHD1-7 IGH D TRUE 2690 -Rattus norvegicus functional IGHD1-8 IGH D TRUE 2691 -Rattus norvegicus non-functional IGHD2-1 IGH D FALSE 2692 -Rattus norvegicus non-functional IGHD2-2 IGH D FALSE 2693 -Rattus norvegicus non-functional IGHD2-3 IGH D FALSE 2694 -Rattus norvegicus non-functional IGHD3-1 IGH D FALSE 2695 -Rattus norvegicus non-functional IGHD3-2 IGH D FALSE 2696 -Rattus norvegicus non-functional IGHD3-3 IGH D FALSE 2697 -Rattus norvegicus non-functional IGHD3-4 IGH D FALSE 2698 -Rattus norvegicus functional IGHD4-1 IGH D TRUE 2699 -Rattus norvegicus functional IGHD4-2 IGH D TRUE 2700 -Rattus norvegicus functional IGHD4-3 IGH D TRUE 2701 -Rattus norvegicus functional IGHD4-4 IGH D TRUE 2702 -Rattus norvegicus functional IGHD5-1 IGH D TRUE 2703 -Rattus norvegicus functional IGHJ1 IGH J TRUE 2704 -Rattus norvegicus functional IGHJ2 IGH J TRUE 2705 -Rattus norvegicus functional IGHJ3 IGH J TRUE 2706 -Rattus norvegicus functional IGHJ4 IGH J TRUE 2707 -Rattus norvegicus non-functional IGHV10S1 IGH V FALSE 2708 -Rattus norvegicus non-functional IGHV10S10 IGH V FALSE 2709 -Rattus norvegicus non-functional IGHV10S11 IGH V FALSE 2710 -Rattus norvegicus non-functional IGHV10S12 IGH V FALSE 2711 -Rattus norvegicus functional IGHV10S13 IGH V TRUE 2712 -Rattus norvegicus non-functional IGHV10S14 IGH V FALSE 2713 -Rattus norvegicus non-functional IGHV10S15 IGH V FALSE 2714 -Rattus norvegicus non-functional IGHV10S16 IGH V FALSE 2715 -Rattus norvegicus non-functional IGHV10S17 IGH V FALSE 2716 -Rattus norvegicus non-functional IGHV10S18 IGH V FALSE 2717 -Rattus norvegicus non-functional IGHV10S19 IGH V FALSE 2718 -Rattus norvegicus non-functional IGHV10S2 IGH V FALSE 2719 -Rattus norvegicus non-functional IGHV10S3 IGH V FALSE 2720 -Rattus norvegicus non-functional IGHV10S4 IGH V FALSE 2721 -Rattus norvegicus non-functional IGHV10S5 IGH V FALSE 2722 -Rattus norvegicus non-functional IGHV10S6 IGH V FALSE 2723 -Rattus norvegicus non-functional IGHV10S7 IGH V FALSE 2724 -Rattus norvegicus functional IGHV10S8 IGH V TRUE 2725 -Rattus norvegicus non-functional IGHV10S9 IGH V FALSE 2726 -Rattus norvegicus functional IGHV11S1 IGH V TRUE 2727 -Rattus norvegicus functional IGHV11S10 IGH V TRUE 2728 -Rattus norvegicus non-functional IGHV11S11 IGH V FALSE 2729 -Rattus norvegicus non-functional IGHV11S12 IGH V FALSE 2730 -Rattus norvegicus functional IGHV11S2 IGH V TRUE 2731 -Rattus norvegicus functional IGHV11S3 IGH V TRUE 2732 -Rattus norvegicus non-functional IGHV11S4 IGH V FALSE 2733 -Rattus norvegicus non-functional IGHV11S5 IGH V FALSE 2734 -Rattus norvegicus non-functional IGHV11S6 IGH V FALSE 2735 -Rattus norvegicus functional IGHV11S7 IGH V TRUE 2736 -Rattus norvegicus non-functional IGHV11S8 IGH V FALSE 2737 -Rattus norvegicus non-functional IGHV11S9 IGH V FALSE 2738 -Rattus norvegicus non-functional IGHV12S1 IGH V FALSE 2739 -Rattus norvegicus functional IGHV12S2 IGH V TRUE 2740 -Rattus norvegicus non-functional IGHV12S3 IGH V FALSE 2741 -Rattus norvegicus non-functional IGHV15S1 IGH V FALSE 2742 -Rattus norvegicus non-functional IGHV1S1 IGH V FALSE 2743 -Rattus norvegicus non-functional IGHV1S10 IGH V FALSE 2744 -Rattus norvegicus non-functional IGHV1S11 IGH V FALSE 2745 -Rattus norvegicus non-functional IGHV1S12 IGH V FALSE 2746 -Rattus norvegicus functional IGHV1S13 IGH V TRUE 2747 -Rattus norvegicus non-functional IGHV1S14 IGH V FALSE 2748 -Rattus norvegicus functional IGHV1S15 IGH V TRUE 2749 -Rattus norvegicus functional IGHV1S16 IGH V TRUE 2750 -Rattus norvegicus functional IGHV1S17 IGH V TRUE 2751 -Rattus norvegicus non-functional IGHV1S18 IGH V FALSE 2752 -Rattus norvegicus non-functional IGHV1S19 IGH V FALSE 2753 -Rattus norvegicus non-functional IGHV1S2 IGH V FALSE 2754 -Rattus norvegicus functional IGHV1S20 IGH V TRUE 2755 -Rattus norvegicus non-functional IGHV1S21 IGH V FALSE 2756 -Rattus norvegicus functional IGHV1S22 IGH V TRUE 2757 -Rattus norvegicus functional IGHV1S23 IGH V TRUE 2758 -Rattus norvegicus non-functional IGHV1S24 IGH V FALSE 2759 -Rattus norvegicus non-functional IGHV1S25 IGH V FALSE 2760 -Rattus norvegicus non-functional IGHV1S26 IGH V FALSE 2761 -Rattus norvegicus functional IGHV1S27 IGH V TRUE 2762 -Rattus norvegicus functional IGHV1S28 IGH V TRUE 2763 -Rattus norvegicus functional IGHV1S29 IGH V TRUE 2764 -Rattus norvegicus non-functional IGHV1S3 IGH V FALSE 2765 -Rattus norvegicus non-functional IGHV1S30 IGH V FALSE 2766 -Rattus norvegicus functional IGHV1S31 IGH V TRUE 2767 -Rattus norvegicus non-functional IGHV1S32 IGH V FALSE 2768 -Rattus norvegicus non-functional IGHV1S33 IGH V FALSE 2769 -Rattus norvegicus non-functional IGHV1S34 IGH V FALSE 2770 -Rattus norvegicus non-functional IGHV1S35 IGH V FALSE 2771 -Rattus norvegicus non-functional IGHV1S36 IGH V FALSE 2772 -Rattus norvegicus non-functional IGHV1S37 IGH V FALSE 2773 -Rattus norvegicus functional IGHV1S38 IGH V TRUE 2774 -Rattus norvegicus functional IGHV1S39 IGH V TRUE 2775 -Rattus norvegicus non-functional IGHV1S4 IGH V FALSE 2776 -Rattus norvegicus non-functional IGHV1S40 IGH V FALSE 2777 -Rattus norvegicus functional IGHV1S41 IGH V TRUE 2778 -Rattus norvegicus non-functional IGHV1S42 IGH V FALSE 2779 -Rattus norvegicus non-functional IGHV1S43 IGH V FALSE 2780 -Rattus norvegicus non-functional IGHV1S44 IGH V FALSE 2781 -Rattus norvegicus functional IGHV1S45 IGH V TRUE 2782 -Rattus norvegicus non-functional IGHV1S46 IGH V FALSE 2783 -Rattus norvegicus functional IGHV1S47 IGH V TRUE 2784 -Rattus norvegicus functional IGHV1S48 IGH V TRUE 2785 -Rattus norvegicus non-functional IGHV1S49 IGH V FALSE 2786 -Rattus norvegicus non-functional IGHV1S5 IGH V FALSE 2787 -Rattus norvegicus non-functional IGHV1S50 IGH V FALSE 2788 -Rattus norvegicus functional IGHV1S51 IGH V TRUE 2789 -Rattus norvegicus non-functional IGHV1S52 IGH V FALSE 2790 -Rattus norvegicus non-functional IGHV1S53 IGH V FALSE 2791 -Rattus norvegicus non-functional IGHV1S54 IGH V FALSE 2792 -Rattus norvegicus non-functional IGHV1S55 IGH V FALSE 2793 -Rattus norvegicus non-functional IGHV1S56 IGH V FALSE 2794 -Rattus norvegicus functional IGHV1S57 IGH V TRUE 2795 -Rattus norvegicus non-functional IGHV1S58 IGH V FALSE 2796 -Rattus norvegicus non-functional IGHV1S59 IGH V FALSE 2797 -Rattus norvegicus functional IGHV1S6 IGH V TRUE 2798 -Rattus norvegicus non-functional IGHV1S60 IGH V FALSE 2799 -Rattus norvegicus non-functional IGHV1S61 IGH V FALSE 2800 -Rattus norvegicus functional IGHV1S62 IGH V TRUE 2801 -Rattus norvegicus non-functional IGHV1S63 IGH V FALSE 2802 -Rattus norvegicus non-functional IGHV1S64 IGH V FALSE 2803 -Rattus norvegicus non-functional IGHV1S65 IGH V FALSE 2804 -Rattus norvegicus functional IGHV1S66 IGH V TRUE 2805 -Rattus norvegicus functional IGHV1S67 IGH V TRUE 2806 -Rattus norvegicus non-functional IGHV1S7 IGH V FALSE 2807 -Rattus norvegicus non-functional IGHV1S8 IGH V FALSE 2808 -Rattus norvegicus functional IGHV1S9 IGH V TRUE 2809 -Rattus norvegicus functional IGHV2S1 IGH V TRUE 2810 -Rattus norvegicus functional IGHV2S10 IGH V TRUE 2811 -Rattus norvegicus non-functional IGHV2S11 IGH V FALSE 2812 -Rattus norvegicus functional IGHV2S12 IGH V TRUE 2813 -Rattus norvegicus functional IGHV2S13 IGH V TRUE 2814 -Rattus norvegicus functional IGHV2S14 IGH V TRUE 2815 -Rattus norvegicus non-functional IGHV2S15 IGH V FALSE 2816 -Rattus norvegicus non-functional IGHV2S16 IGH V FALSE 2817 -Rattus norvegicus non-functional IGHV2S17 IGH V FALSE 2818 -Rattus norvegicus functional IGHV2S18 IGH V TRUE 2819 -Rattus norvegicus non-functional IGHV2S19 IGH V FALSE 2820 -Rattus norvegicus non-functional IGHV2S2 IGH V FALSE 2821 -Rattus norvegicus functional IGHV2S20 IGH V TRUE 2822 -Rattus norvegicus non-functional IGHV2S21 IGH V FALSE 2823 -Rattus norvegicus non-functional IGHV2S22 IGH V FALSE 2824 -Rattus norvegicus non-functional IGHV2S23 IGH V FALSE 2825 -Rattus norvegicus non-functional IGHV2S24 IGH V FALSE 2826 -Rattus norvegicus non-functional IGHV2S25 IGH V FALSE 2827 -Rattus norvegicus non-functional IGHV2S26 IGH V FALSE 2828 -Rattus norvegicus non-functional IGHV2S27 IGH V FALSE 2829 -Rattus norvegicus non-functional IGHV2S28 IGH V FALSE 2830 -Rattus norvegicus non-functional IGHV2S29 IGH V FALSE 2831 -Rattus norvegicus functional IGHV2S30 IGH V TRUE 2832 -Rattus norvegicus functional IGHV2S31 IGH V TRUE 2833 -Rattus norvegicus non-functional IGHV2S32 IGH V FALSE 2834 -Rattus norvegicus functional IGHV2S33 IGH V TRUE 2835 -Rattus norvegicus non-functional IGHV2S34 IGH V FALSE 2836 -Rattus norvegicus non-functional IGHV2S35 IGH V FALSE 2837 -Rattus norvegicus non-functional IGHV2S36 IGH V FALSE 2838 -Rattus norvegicus non-functional IGHV2S37 IGH V FALSE 2839 -Rattus norvegicus non-functional IGHV2S38 IGH V FALSE 2840 -Rattus norvegicus non-functional IGHV2S39 IGH V FALSE 2841 -Rattus norvegicus functional IGHV2S40 IGH V TRUE 2842 -Rattus norvegicus functional IGHV2S41 IGH V TRUE 2843 -Rattus norvegicus non-functional IGHV2S42 IGH V FALSE 2844 -Rattus norvegicus non-functional IGHV2S43 IGH V FALSE 2845 -Rattus norvegicus non-functional IGHV2S44 IGH V FALSE 2846 -Rattus norvegicus non-functional IGHV2S45 IGH V FALSE 2847 -Rattus norvegicus non-functional IGHV2S46 IGH V FALSE 2848 -Rattus norvegicus non-functional IGHV2S47 IGH V FALSE 2849 -Rattus norvegicus functional IGHV2S48 IGH V TRUE 2850 -Rattus norvegicus non-functional IGHV2S49 IGH V FALSE 2851 -Rattus norvegicus functional IGHV2S5 IGH V TRUE 2852 -Rattus norvegicus non-functional IGHV2S50 IGH V FALSE 2853 -Rattus norvegicus non-functional IGHV2S51 IGH V FALSE 2854 -Rattus norvegicus non-functional IGHV2S52 IGH V FALSE 2855 -Rattus norvegicus non-functional IGHV2S53 IGH V FALSE 2856 -Rattus norvegicus functional IGHV2S54 IGH V TRUE 2857 -Rattus norvegicus non-functional IGHV2S55 IGH V FALSE 2858 -Rattus norvegicus functional IGHV2S56 IGH V TRUE 2859 -Rattus norvegicus non-functional IGHV2S57 IGH V FALSE 2860 -Rattus norvegicus non-functional IGHV2S58 IGH V FALSE 2861 -Rattus norvegicus non-functional IGHV2S59 IGH V FALSE 2862 -Rattus norvegicus non-functional IGHV2S6 IGH V FALSE 2863 -Rattus norvegicus non-functional IGHV2S60 IGH V FALSE 2864 -Rattus norvegicus functional IGHV2S61 IGH V TRUE 2865 -Rattus norvegicus non-functional IGHV2S62 IGH V FALSE 2866 -Rattus norvegicus functional IGHV2S63 IGH V TRUE 2867 -Rattus norvegicus functional IGHV2S64 IGH V TRUE 2868 -Rattus norvegicus non-functional IGHV2S69 IGH V FALSE 2869 -Rattus norvegicus non-functional IGHV2S7 IGH V FALSE 2870 -Rattus norvegicus functional IGHV2S70 IGH V TRUE 2871 -Rattus norvegicus non-functional IGHV2S71 IGH V FALSE 2872 -Rattus norvegicus functional IGHV2S72 IGH V TRUE 2873 -Rattus norvegicus non-functional IGHV2S73 IGH V FALSE 2874 -Rattus norvegicus non-functional IGHV2S74 IGH V FALSE 2875 -Rattus norvegicus functional IGHV2S75 IGH V TRUE 2876 -Rattus norvegicus non-functional IGHV2S76 IGH V FALSE 2877 -Rattus norvegicus non-functional IGHV2S77 IGH V FALSE 2878 -Rattus norvegicus functional IGHV2S78 IGH V TRUE 2879 -Rattus norvegicus non-functional IGHV2S79 IGH V FALSE 2880 -Rattus norvegicus functional IGHV2S8 IGH V TRUE 2881 -Rattus norvegicus non-functional IGHV2S80 IGH V FALSE 2882 -Rattus norvegicus non-functional IGHV2S81 IGH V FALSE 2883 -Rattus norvegicus functional IGHV2S82 IGH V TRUE 2884 -Rattus norvegicus non-functional IGHV2S83 IGH V FALSE 2885 -Rattus norvegicus functional IGHV2S84 IGH V TRUE 2886 -Rattus norvegicus functional IGHV2S85 IGH V TRUE 2887 -Rattus norvegicus non-functional IGHV2S86 IGH V FALSE 2888 -Rattus norvegicus non-functional IGHV2S87 IGH V FALSE 2889 -Rattus norvegicus functional IGHV2S88 IGH V TRUE 2890 -Rattus norvegicus functional IGHV2S89 IGH V TRUE 2891 -Rattus norvegicus non-functional IGHV2S9 IGH V FALSE 2892 -Rattus norvegicus non-functional IGHV2S90 IGH V FALSE 2893 -Rattus norvegicus non-functional IGHV2S91 IGH V FALSE 2894 -Rattus norvegicus functional IGHV2S92 IGH V TRUE 2895 -Rattus norvegicus non-functional IGHV2S93 IGH V FALSE 2896 -Rattus norvegicus functional IGHV2S94 IGH V TRUE 2897 -Rattus norvegicus non-functional IGHV2S95 IGH V FALSE 2898 -Rattus norvegicus non-functional IGHV2S96 IGH V FALSE 2899 -Rattus norvegicus functional IGHV2S97 IGH V TRUE 2900 -Rattus norvegicus functional IGHV2S98 IGH V TRUE 2901 -Rattus norvegicus non-functional IGHV2S99 IGH V FALSE 2902 -Rattus norvegicus functional IGHV3S1 IGH V TRUE 2903 -Rattus norvegicus non-functional IGHV3S2 IGH V FALSE 2904 -Rattus norvegicus functional IGHV3S3 IGH V TRUE 2905 -Rattus norvegicus non-functional IGHV3S4 IGH V FALSE 2906 -Rattus norvegicus functional IGHV3S5 IGH V TRUE 2907 -Rattus norvegicus non-functional IGHV3S6 IGH V FALSE 2908 -Rattus norvegicus functional IGHV3S7 IGH V TRUE 2909 -Rattus norvegicus non-functional IGHV3S8 IGH V FALSE 2910 -Rattus norvegicus functional IGHV4S1 IGH V TRUE 2911 -Rattus norvegicus functional IGHV4S2 IGH V TRUE 2912 -Rattus norvegicus non-functional IGHV4S3 IGH V FALSE 2913 -Rattus norvegicus non-functional IGHV4S4 IGH V FALSE 2914 -Rattus norvegicus non-functional IGHV5S1 IGH V FALSE 2915 -Rattus norvegicus functional IGHV5S10 IGH V TRUE 2916 -Rattus norvegicus functional IGHV5S11 IGH V TRUE 2917 -Rattus norvegicus non-functional IGHV5S12 IGH V FALSE 2918 -Rattus norvegicus functional IGHV5S13 IGH V TRUE 2919 -Rattus norvegicus functional IGHV5S14 IGH V TRUE 2920 -Rattus norvegicus non-functional IGHV5S15 IGH V FALSE 2921 -Rattus norvegicus functional IGHV5S16 IGH V TRUE 2922 -Rattus norvegicus non-functional IGHV5S17 IGH V FALSE 2923 -Rattus norvegicus non-functional IGHV5S18 IGH V FALSE 2924 -Rattus norvegicus non-functional IGHV5S19 IGH V FALSE 2925 -Rattus norvegicus non-functional IGHV5S2 IGH V FALSE 2926 -Rattus norvegicus non-functional IGHV5S20 IGH V FALSE 2927 -Rattus norvegicus non-functional IGHV5S21 IGH V FALSE 2928 -Rattus norvegicus non-functional IGHV5S22 IGH V FALSE 2929 -Rattus norvegicus functional IGHV5S23 IGH V TRUE 2930 -Rattus norvegicus non-functional IGHV5S24 IGH V FALSE 2931 -Rattus norvegicus non-functional IGHV5S25 IGH V FALSE 2932 -Rattus norvegicus non-functional IGHV5S26 IGH V FALSE 2933 -Rattus norvegicus functional IGHV5S27 IGH V TRUE 2934 -Rattus norvegicus non-functional IGHV5S28 IGH V FALSE 2935 -Rattus norvegicus functional IGHV5S29 IGH V TRUE 2936 -Rattus norvegicus non-functional IGHV5S3 IGH V FALSE 2937 -Rattus norvegicus functional IGHV5S30 IGH V TRUE 2938 -Rattus norvegicus non-functional IGHV5S31 IGH V FALSE 2939 -Rattus norvegicus functional IGHV5S32 IGH V TRUE 2940 -Rattus norvegicus non-functional IGHV5S33 IGH V FALSE 2941 -Rattus norvegicus non-functional IGHV5S34 IGH V FALSE 2942 -Rattus norvegicus non-functional IGHV5S35 IGH V FALSE 2943 -Rattus norvegicus functional IGHV5S36 IGH V TRUE 2944 -Rattus norvegicus non-functional IGHV5S37 IGH V FALSE 2945 -Rattus norvegicus non-functional IGHV5S39 IGH V FALSE 2946 -Rattus norvegicus non-functional IGHV5S4 IGH V FALSE 2947 -Rattus norvegicus non-functional IGHV5S40 IGH V FALSE 2948 -Rattus norvegicus non-functional IGHV5S41 IGH V FALSE 2949 -Rattus norvegicus non-functional IGHV5S42 IGH V FALSE 2950 -Rattus norvegicus functional IGHV5S43 IGH V TRUE 2951 -Rattus norvegicus non-functional IGHV5S44 IGH V FALSE 2952 -Rattus norvegicus functional IGHV5S45 IGH V TRUE 2953 -Rattus norvegicus non-functional IGHV5S46 IGH V FALSE 2954 -Rattus norvegicus functional IGHV5S47 IGH V TRUE 2955 -Rattus norvegicus non-functional IGHV5S48 IGH V FALSE 2956 -Rattus norvegicus non-functional IGHV5S49 IGH V FALSE 2957 -Rattus norvegicus non-functional IGHV5S5 IGH V FALSE 2958 -Rattus norvegicus non-functional IGHV5S50 IGH V FALSE 2959 -Rattus norvegicus non-functional IGHV5S51 IGH V FALSE 2960 -Rattus norvegicus non-functional IGHV5S52 IGH V FALSE 2961 -Rattus norvegicus non-functional IGHV5S53 IGH V FALSE 2962 -Rattus norvegicus functional IGHV5S54 IGH V TRUE 2963 -Rattus norvegicus non-functional IGHV5S55 IGH V FALSE 2964 -Rattus norvegicus non-functional IGHV5S56 IGH V FALSE 2965 -Rattus norvegicus functional IGHV5S57 IGH V TRUE 2966 -Rattus norvegicus functional IGHV5S58 IGH V TRUE 2967 -Rattus norvegicus non-functional IGHV5S59 IGH V FALSE 2968 -Rattus norvegicus non-functional IGHV5S6 IGH V FALSE 2969 -Rattus norvegicus non-functional IGHV5S60 IGH V FALSE 2970 -Rattus norvegicus non-functional IGHV5S61 IGH V FALSE 2971 -Rattus norvegicus non-functional IGHV5S62 IGH V FALSE 2972 -Rattus norvegicus non-functional IGHV5S63 IGH V FALSE 2973 -Rattus norvegicus functional IGHV5S64 IGH V TRUE 2974 -Rattus norvegicus functional IGHV5S65 IGH V TRUE 2975 -Rattus norvegicus non-functional IGHV5S66 IGH V FALSE 2976 -Rattus norvegicus non-functional IGHV5S67 IGH V FALSE 2977 -Rattus norvegicus non-functional IGHV5S68 IGH V FALSE 2978 -Rattus norvegicus non-functional IGHV5S69 IGH V FALSE 2979 -Rattus norvegicus non-functional IGHV5S7 IGH V FALSE 2980 -Rattus norvegicus non-functional IGHV5S70 IGH V FALSE 2981 -Rattus norvegicus non-functional IGHV5S71 IGH V FALSE 2982 -Rattus norvegicus non-functional IGHV5S72 IGH V FALSE 2983 -Rattus norvegicus non-functional IGHV5S73 IGH V FALSE 2984 -Rattus norvegicus functional IGHV5S74 IGH V TRUE 2985 -Rattus norvegicus non-functional IGHV5S75 IGH V FALSE 2986 -Rattus norvegicus functional IGHV5S8 IGH V TRUE 2987 -Rattus norvegicus non-functional IGHV5S9 IGH V FALSE 2988 -Rattus norvegicus non-functional IGHV6S1 IGH V FALSE 2989 -Rattus norvegicus functional IGHV6S10 IGH V TRUE 2990 -Rattus norvegicus functional IGHV6S11 IGH V TRUE 2991 -Rattus norvegicus non-functional IGHV6S12 IGH V FALSE 2992 -Rattus norvegicus non-functional IGHV6S13 IGH V FALSE 2993 -Rattus norvegicus functional IGHV6S14 IGH V TRUE 2994 -Rattus norvegicus non-functional IGHV6S15 IGH V FALSE 2995 -Rattus norvegicus non-functional IGHV6S16 IGH V FALSE 2996 -Rattus norvegicus functional IGHV6S17 IGH V TRUE 2997 -Rattus norvegicus functional IGHV6S18 IGH V TRUE 2998 -Rattus norvegicus functional IGHV6S19 IGH V TRUE 2999 -Rattus norvegicus non-functional IGHV6S20 IGH V FALSE 3000 -Rattus norvegicus functional IGHV6S3 IGH V TRUE 3001 -Rattus norvegicus non-functional IGHV6S4 IGH V FALSE 3002 -Rattus norvegicus non-functional IGHV6S5 IGH V FALSE 3003 -Rattus norvegicus functional IGHV6S6 IGH V TRUE 3004 -Rattus norvegicus non-functional IGHV6S7 IGH V FALSE 3005 -Rattus norvegicus non-functional IGHV6S8 IGH V FALSE 3006 -Rattus norvegicus non-functional IGHV6S9 IGH V FALSE 3007 -Rattus norvegicus functional IGHV7S1 IGH V TRUE 3008 -Rattus norvegicus non-functional IGHV7S10 IGH V FALSE 3009 -Rattus norvegicus non-functional IGHV7S11 IGH V FALSE 3010 -Rattus norvegicus non-functional IGHV7S12 IGH V FALSE 3011 -Rattus norvegicus functional IGHV7S13 IGH V TRUE 3012 -Rattus norvegicus non-functional IGHV7S14 IGH V FALSE 3013 -Rattus norvegicus non-functional IGHV7S15 IGH V FALSE 3014 -Rattus norvegicus functional IGHV7S16 IGH V TRUE 3015 -Rattus norvegicus non-functional IGHV7S2 IGH V FALSE 3016 -Rattus norvegicus non-functional IGHV7S3 IGH V FALSE 3017 -Rattus norvegicus functional IGHV7S4 IGH V TRUE 3018 -Rattus norvegicus functional IGHV7S5 IGH V TRUE 3019 -Rattus norvegicus functional IGHV7S6 IGH V TRUE 3020 -Rattus norvegicus non-functional IGHV7S8 IGH V FALSE 3021 -Rattus norvegicus non-functional IGHV7S9 IGH V FALSE 3022 -Rattus norvegicus non-functional IGHV8S1 IGH V FALSE 3023 -Rattus norvegicus functional IGHV8S10 IGH V TRUE 3024 -Rattus norvegicus functional IGHV8S11 IGH V TRUE 3025 -Rattus norvegicus non-functional IGHV8S12 IGH V FALSE 3026 -Rattus norvegicus functional IGHV8S13 IGH V TRUE 3027 -Rattus norvegicus non-functional IGHV8S14 IGH V FALSE 3028 -Rattus norvegicus functional IGHV8S15 IGH V TRUE 3029 -Rattus norvegicus functional IGHV8S16 IGH V TRUE 3030 -Rattus norvegicus non-functional IGHV8S17 IGH V FALSE 3031 -Rattus norvegicus functional IGHV8S18 IGH V TRUE 3032 -Rattus norvegicus non-functional IGHV8S19 IGH V FALSE 3033 -Rattus norvegicus functional IGHV8S2 IGH V TRUE 3034 -Rattus norvegicus non-functional IGHV8S3 IGH V FALSE 3035 -Rattus norvegicus non-functional IGHV8S4 IGH V FALSE 3036 -Rattus norvegicus non-functional IGHV8S5 IGH V FALSE 3037 -Rattus norvegicus non-functional IGHV8S6 IGH V FALSE 3038 -Rattus norvegicus functional IGHV8S7 IGH V TRUE 3039 -Rattus norvegicus non-functional IGHV8S8 IGH V FALSE 3040 -Rattus norvegicus non-functional IGHV8S9 IGH V FALSE 3041 -Rattus norvegicus non-functional IGHV9S1 IGH V FALSE 3042 -Rattus norvegicus non-functional IGHV9S2 IGH V FALSE 3043 -Rattus norvegicus functional IGHV9S3 IGH V TRUE 3044 -Rattus norvegicus functional IGHV9S4 IGH V TRUE 3045 -Rattus norvegicus non-functional IGHV9S5 IGH V FALSE 3046 -Rattus norvegicus functional IGHV9S6 IGH V TRUE 3047 -Rattus norvegicus non-functional IGHV9S7 IGH V FALSE 3048 -Rattus norvegicus functional IGHV9S8 IGH V TRUE 3049 -Rattus norvegicus functional IGKJ1 IGK J TRUE 3050 -Rattus norvegicus functional IGKJ2-1 IGK J TRUE 3051 -Rattus norvegicus functional IGKJ2-2 IGK J TRUE 3052 -Rattus norvegicus functional IGKJ2-3 IGK J TRUE 3053 -Rattus norvegicus non-functional IGKJ3 IGK J FALSE 3054 -Rattus norvegicus functional IGKJ4 IGK J TRUE 3055 -Rattus norvegicus functional IGKJ5 IGK J TRUE 3056 -Rattus norvegicus functional IGKV10S11 IGK V TRUE 3057 -Rattus norvegicus functional IGKV10S12 IGK V TRUE 3058 -Rattus norvegicus functional IGKV10S5 IGK V TRUE 3059 -Rattus norvegicus functional IGKV10S6 IGK V TRUE 3060 -Rattus norvegicus functional IGKV10S9 IGK V TRUE 3061 -Rattus norvegicus functional IGKV12S1 IGK V TRUE 3062 -Rattus norvegicus functional IGKV12S11 IGK V TRUE 3063 -Rattus norvegicus non-functional IGKV12S12 IGK V FALSE 3064 -Rattus norvegicus functional IGKV12S14 IGK V TRUE 3065 -Rattus norvegicus functional IGKV12S16 IGK V TRUE 3066 -Rattus norvegicus functional IGKV12S17 IGK V TRUE 3067 -Rattus norvegicus functional IGKV12S20 IGK V TRUE 3068 -Rattus norvegicus functional IGKV12S22 IGK V TRUE 3069 -Rattus norvegicus functional IGKV12S24 IGK V TRUE 3070 -Rattus norvegicus functional IGKV12S25 IGK V TRUE 3071 -Rattus norvegicus functional IGKV12S26 IGK V TRUE 3072 -Rattus norvegicus non-functional IGKV12S27 IGK V FALSE 3073 -Rattus norvegicus non-functional IGKV12S28 IGK V FALSE 3074 -Rattus norvegicus functional IGKV12S29 IGK V TRUE 3075 -Rattus norvegicus functional IGKV12S30 IGK V TRUE 3076 -Rattus norvegicus functional IGKV12S31 IGK V TRUE 3077 -Rattus norvegicus functional IGKV12S32 IGK V TRUE 3078 -Rattus norvegicus functional IGKV12S34 IGK V TRUE 3079 -Rattus norvegicus functional IGKV12S36 IGK V TRUE 3080 -Rattus norvegicus functional IGKV12S38 IGK V TRUE 3081 -Rattus norvegicus functional IGKV12S39 IGK V TRUE 3082 -Rattus norvegicus non-functional IGKV12S5 IGK V FALSE 3083 -Rattus norvegicus functional IGKV12S7 IGK V TRUE 3084 -Rattus norvegicus functional IGKV12S8 IGK V TRUE 3085 -Rattus norvegicus functional IGKV12S9 IGK V TRUE 3086 -Rattus norvegicus non-functional IGKV13S7 IGK V FALSE 3087 -Rattus norvegicus functional IGKV14S1 IGK V TRUE 3088 -Rattus norvegicus functional IGKV14S13 IGK V TRUE 3089 -Rattus norvegicus functional IGKV14S14 IGK V TRUE 3090 -Rattus norvegicus functional IGKV14S15 IGK V TRUE 3091 -Rattus norvegicus functional IGKV14S16 IGK V TRUE 3092 -Rattus norvegicus functional IGKV14S18 IGK V TRUE 3093 -Rattus norvegicus functional IGKV14S19 IGK V TRUE 3094 -Rattus norvegicus functional IGKV14S2 IGK V TRUE 3095 -Rattus norvegicus functional IGKV14S22 IGK V TRUE 3096 -Rattus norvegicus non-functional IGKV14S4 IGK V FALSE 3097 -Rattus norvegicus functional IGKV14S8 IGK V TRUE 3098 -Rattus norvegicus functional IGKV14S9 IGK V TRUE 3099 -Rattus norvegicus non-functional IGKV15S2 IGK V FALSE 3100 -Rattus norvegicus non-functional IGKV15S3 IGK V FALSE 3101 -Rattus norvegicus functional IGKV15S4 IGK V TRUE 3102 -Rattus norvegicus functional IGKV16S1 IGK V TRUE 3103 -Rattus norvegicus non-functional IGKV16S2 IGK V FALSE 3104 -Rattus norvegicus functional IGKV17S1 IGK V TRUE 3105 -Rattus norvegicus functional IGKV18S1 IGK V TRUE 3106 -Rattus norvegicus functional IGKV19S1 IGK V TRUE 3107 -Rattus norvegicus functional IGKV19S2 IGK V TRUE 3108 -Rattus norvegicus functional IGKV1S1 IGK V TRUE 3109 -Rattus norvegicus functional IGKV1S12 IGK V TRUE 3110 -Rattus norvegicus functional IGKV1S14 IGK V TRUE 3111 -Rattus norvegicus non-functional IGKV1S15 IGK V FALSE 3112 -Rattus norvegicus functional IGKV1S18 IGK V TRUE 3113 -Rattus norvegicus functional IGKV1S19 IGK V TRUE 3114 -Rattus norvegicus functional IGKV1S21 IGK V TRUE 3115 -Rattus norvegicus functional IGKV1S22 IGK V TRUE 3116 -Rattus norvegicus functional IGKV1S23 IGK V TRUE 3117 -Rattus norvegicus functional IGKV1S24 IGK V TRUE 3118 -Rattus norvegicus functional IGKV1S25 IGK V TRUE 3119 -Rattus norvegicus functional IGKV1S26 IGK V TRUE 3120 -Rattus norvegicus functional IGKV1S27 IGK V TRUE 3121 -Rattus norvegicus functional IGKV1S28 IGK V TRUE 3122 -Rattus norvegicus functional IGKV1S29 IGK V TRUE 3123 -Rattus norvegicus functional IGKV1S30 IGK V TRUE 3124 -Rattus norvegicus functional IGKV1S31 IGK V TRUE 3125 -Rattus norvegicus non-functional IGKV1S32 IGK V FALSE 3126 -Rattus norvegicus functional IGKV1S34 IGK V TRUE 3127 -Rattus norvegicus functional IGKV1S42 IGK V TRUE 3128 -Rattus norvegicus functional IGKV1S5 IGK V TRUE 3129 -Rattus norvegicus functional IGKV1S7 IGK V TRUE 3130 -Rattus norvegicus functional IGKV1S8 IGK V TRUE 3131 -Rattus norvegicus functional IGKV20S1 IGK V TRUE 3132 -Rattus norvegicus functional IGKV21S2 IGK V TRUE 3133 -Rattus norvegicus functional IGKV21S3 IGK V TRUE 3134 -Rattus norvegicus functional IGKV22S1 IGK V TRUE 3135 -Rattus norvegicus functional IGKV22S2 IGK V TRUE 3136 -Rattus norvegicus functional IGKV22S4 IGK V TRUE 3137 -Rattus norvegicus functional IGKV22S5 IGK V TRUE 3138 -Rattus norvegicus non-functional IGKV22S6 IGK V FALSE 3139 -Rattus norvegicus functional IGKV22S7 IGK V TRUE 3140 -Rattus norvegicus non-functional IGKV22S8 IGK V FALSE 3141 -Rattus norvegicus functional IGKV22S9 IGK V TRUE 3142 -Rattus norvegicus functional IGKV2S11 IGK V TRUE 3143 -Rattus norvegicus functional IGKV2S16 IGK V TRUE 3144 -Rattus norvegicus functional IGKV2S17 IGK V TRUE 3145 -Rattus norvegicus non-functional IGKV2S20 IGK V FALSE 3146 -Rattus norvegicus non-functional IGKV2S21 IGK V FALSE 3147 -Rattus norvegicus non-functional IGKV2S22 IGK V FALSE 3148 -Rattus norvegicus non-functional IGKV2S23 IGK V FALSE 3149 -Rattus norvegicus non-functional IGKV2S24 IGK V FALSE 3150 -Rattus norvegicus functional IGKV2S25 IGK V TRUE 3151 -Rattus norvegicus functional IGKV2S26 IGK V TRUE 3152 -Rattus norvegicus functional IGKV2S27 IGK V TRUE 3153 -Rattus norvegicus functional IGKV2S3 IGK V TRUE 3154 -Rattus norvegicus functional IGKV2S6 IGK V TRUE 3155 -Rattus norvegicus functional IGKV2S9 IGK V TRUE 3156 -Rattus norvegicus functional IGKV3S1 IGK V TRUE 3157 -Rattus norvegicus functional IGKV3S10 IGK V TRUE 3158 -Rattus norvegicus functional IGKV3S11 IGK V TRUE 3159 -Rattus norvegicus non-functional IGKV3S12 IGK V FALSE 3160 -Rattus norvegicus functional IGKV3S13 IGK V TRUE 3161 -Rattus norvegicus non-functional IGKV3S14 IGK V FALSE 3162 -Rattus norvegicus non-functional IGKV3S15 IGK V FALSE 3163 -Rattus norvegicus functional IGKV3S17 IGK V TRUE 3164 -Rattus norvegicus functional IGKV3S18 IGK V TRUE 3165 -Rattus norvegicus functional IGKV3S19 IGK V TRUE 3166 -Rattus norvegicus functional IGKV3S5 IGK V TRUE 3167 -Rattus norvegicus functional IGKV3S6 IGK V TRUE 3168 -Rattus norvegicus non-functional IGKV3S7 IGK V FALSE 3169 -Rattus norvegicus functional IGKV3S8 IGK V TRUE 3170 -Rattus norvegicus functional IGKV3S9 IGK V TRUE 3171 -Rattus norvegicus functional IGKV4S10 IGK V TRUE 3172 -Rattus norvegicus functional IGKV4S11 IGK V TRUE 3173 -Rattus norvegicus functional IGKV4S12 IGK V TRUE 3174 -Rattus norvegicus functional IGKV4S13 IGK V TRUE 3175 -Rattus norvegicus functional IGKV4S14 IGK V TRUE 3176 -Rattus norvegicus functional IGKV4S15 IGK V TRUE 3177 -Rattus norvegicus functional IGKV4S16 IGK V TRUE 3178 -Rattus norvegicus functional IGKV4S18 IGK V TRUE 3179 -Rattus norvegicus functional IGKV4S19 IGK V TRUE 3180 -Rattus norvegicus functional IGKV4S2 IGK V TRUE 3181 -Rattus norvegicus functional IGKV4S20 IGK V TRUE 3182 -Rattus norvegicus functional IGKV4S21 IGK V TRUE 3183 -Rattus norvegicus functional IGKV4S3 IGK V TRUE 3184 -Rattus norvegicus functional IGKV4S4 IGK V TRUE 3185 -Rattus norvegicus functional IGKV4S5 IGK V TRUE 3186 -Rattus norvegicus functional IGKV4S6 IGK V TRUE 3187 -Rattus norvegicus functional IGKV4S7 IGK V TRUE 3188 -Rattus norvegicus functional IGKV4S8 IGK V TRUE 3189 -Rattus norvegicus functional IGKV4S9 IGK V TRUE 3190 -Rattus norvegicus non-functional IGKV5S1 IGK V FALSE 3191 -Rattus norvegicus functional IGKV5S10 IGK V TRUE 3192 -Rattus norvegicus functional IGKV5S12 IGK V TRUE 3193 -Rattus norvegicus functional IGKV5S2 IGK V TRUE 3194 -Rattus norvegicus non-functional IGKV5S4 IGK V FALSE 3195 -Rattus norvegicus functional IGKV5S5 IGK V TRUE 3196 -Rattus norvegicus functional IGKV5S6 IGK V TRUE 3197 -Rattus norvegicus functional IGKV6S10 IGK V TRUE 3198 -Rattus norvegicus functional IGKV6S11 IGK V TRUE 3199 -Rattus norvegicus non-functional IGKV6S2 IGK V FALSE 3200 -Rattus norvegicus non-functional IGKV6S3 IGK V FALSE 3201 -Rattus norvegicus functional IGKV6S4 IGK V TRUE 3202 -Rattus norvegicus functional IGKV6S5 IGK V TRUE 3203 -Rattus norvegicus non-functional IGKV6S6 IGK V FALSE 3204 -Rattus norvegicus functional IGKV6S7 IGK V TRUE 3205 -Rattus norvegicus functional IGKV6S8 IGK V TRUE 3206 -Rattus norvegicus functional IGKV6S9 IGK V TRUE 3207 -Rattus norvegicus functional IGKV7S1 IGK V TRUE 3208 -Rattus norvegicus functional IGKV8S10 IGK V TRUE 3209 -Rattus norvegicus non-functional IGKV8S11 IGK V FALSE 3210 -Rattus norvegicus non-functional IGKV8S2 IGK V FALSE 3211 -Rattus norvegicus functional IGKV8S4 IGK V TRUE 3212 -Rattus norvegicus functional IGKV8S5 IGK V TRUE 3213 -Rattus norvegicus functional IGKV8S6 IGK V TRUE 3214 -Rattus norvegicus functional IGKV8S7 IGK V TRUE 3215 -Rattus norvegicus functional IGKV8S8 IGK V TRUE 3216 -Rattus norvegicus functional IGKV8S9 IGK V TRUE 3217 -Rattus norvegicus functional IGKV9S1 IGK V TRUE 3218 -Rattus norvegicus functional IGKV9S2 IGK V TRUE 3219 -Rattus norvegicus functional IGLJ1 IGL J TRUE 3220 -Rattus norvegicus non-functional IGLJ2 IGL J FALSE 3221 -Rattus norvegicus functional IGLJ3 IGL J TRUE 3222 -Rattus norvegicus non-functional IGLJ4 IGL J FALSE 3223 -Rattus norvegicus functional IGLV1S1 IGL V TRUE 3224 -Rattus norvegicus functional IGLV2S1 IGL V TRUE 3225 -Rattus norvegicus functional IGLV3S1 IGL V TRUE 3226 -Rattus norvegicus functional IGLV3S2 IGL V TRUE 3227 -Rattus norvegicus functional IGLV3S3 IGL V TRUE 3228 -Rattus norvegicus functional IGLV3S4 IGL V TRUE 3229 -Rattus norvegicus functional IGLV3S5 IGL V TRUE 3230 -Sus scrofa functional IGHD IGH D TRUE 3231 -Sus scrofa functional IGHD1 IGH D TRUE 3232 -Sus scrofa functional IGHD2 IGH D TRUE 3233 -Sus scrofa functional IGHD3 IGH D TRUE 3234 -Sus scrofa functional IGHD4 IGH D TRUE 3235 -Sus scrofa functional IGHJ1 IGH J TRUE 3236 -Sus scrofa functional IGHJ2 IGH J TRUE 3237 -Sus scrofa functional IGHJ3 IGH J TRUE 3238 -Sus scrofa functional IGHJ4 IGH J TRUE 3239 -Sus scrofa functional IGHJ5 IGH J TRUE 3240 -Sus scrofa non-functional IGHV1-1 IGH V FALSE 3241 -Sus scrofa functional IGHV1-10 IGH V TRUE 3242 -Sus scrofa functional IGHV1-11 IGH V TRUE 3243 -Sus scrofa functional IGHV1-12 IGH V TRUE 3244 -Sus scrofa non-functional IGHV1-13 IGH V FALSE 3245 -Sus scrofa functional IGHV1-14 IGH V TRUE 3246 -Sus scrofa functional IGHV1-15 IGH V TRUE 3247 -Sus scrofa functional IGHV1-2 IGH V TRUE 3248 -Sus scrofa non-functional IGHV1-3 IGH V FALSE 3249 -Sus scrofa functional IGHV1-4 IGH V TRUE 3250 -Sus scrofa functional IGHV1-5 IGH V TRUE 3251 -Sus scrofa functional IGHV1-6 IGH V TRUE 3252 -Sus scrofa non-functional IGHV1-7 IGH V FALSE 3253 -Sus scrofa functional IGHV1-8 IGH V TRUE 3254 -Sus scrofa non-functional IGHV1-9 IGH V FALSE 3255 -Sus scrofa functional IGHV1S2 IGH V TRUE 3256 -Sus scrofa non-functional IGHV1S3 IGH V FALSE 3257 -Sus scrofa functional IGHV1S5 IGH V TRUE 3258 -Sus scrofa functional IGHV1S6 IGH V TRUE 3259 -Sus scrofa non-functional IGHV1S7 IGH V FALSE 3260 -Sus scrofa non-functional IGHV1S8 IGH V FALSE 3261 -Sus scrofa functional IGKJ1 IGK J TRUE 3262 -Sus scrofa functional IGKJ2 IGK J TRUE 3263 -Sus scrofa functional IGKJ3 IGK J TRUE 3264 -Sus scrofa functional IGKJ4 IGK J TRUE 3265 -Sus scrofa functional IGKJ5 IGK J TRUE 3266 -Sus scrofa functional IGKV1-11 IGK V TRUE 3267 -Sus scrofa functional IGKV1-14 IGK V TRUE 3268 -Sus scrofa functional IGKV1-7 IGK V TRUE 3269 -Sus scrofa functional IGKV1-9 IGK V TRUE 3270 -Sus scrofa functional IGKV1D-11 IGK V TRUE 3271 -Sus scrofa functional IGKV2-10 IGK V TRUE 3272 -Sus scrofa functional IGKV2-12 IGK V TRUE 3273 -Sus scrofa functional IGKV2-13 IGK V TRUE 3274 -Sus scrofa non-functional IGKV2-5 IGK V FALSE 3275 -Sus scrofa functional IGKV2-6 IGK V TRUE 3276 -Sus scrofa functional IGKV2-8 IGK V TRUE 3277 -Sus scrofa non-functional IGKV2/OR3-1 IGK V FALSE 3278 -Sus scrofa functional IGKV2D-12 IGK V TRUE 3279 -Sus scrofa non-functional IGKV3-3 IGK V FALSE 3280 -Sus scrofa non-functional IGKV5-4 IGK V FALSE 3281 -Sus scrofa functional IGLJ2 IGL J TRUE 3282 -Sus scrofa functional IGLJ3 IGL J TRUE 3283 -Sus scrofa functional IGLJ4 IGL J TRUE 3284 -Sus scrofa non-functional IGLV(III)-8 IGL V FALSE 3285 -Sus scrofa non-functional IGLV1-15 IGL V FALSE 3286 -Sus scrofa non-functional IGLV1-20 IGL V FALSE 3287 -Sus scrofa functional IGLV2-6 IGL V TRUE 3288 -Sus scrofa non-functional IGLV3-1 IGL V FALSE 3289 -Sus scrofa functional IGLV3-2 IGL V TRUE 3290 -Sus scrofa functional IGLV3-3 IGL V TRUE 3291 -Sus scrofa functional IGLV3-4 IGL V TRUE 3292 -Sus scrofa functional IGLV3-5 IGL V TRUE 3293 -Sus scrofa non-functional IGLV5-11 IGL V FALSE 3294 -Sus scrofa functional IGLV5-14 IGL V TRUE 3295 -Sus scrofa non-functional IGLV5-17 IGL V FALSE 3296 -Sus scrofa non-functional IGLV5-22 IGL V FALSE 3297 -Sus scrofa non-functional IGLV7-7 IGL V FALSE 3298 -Sus scrofa non-functional IGLV7-9 IGL V FALSE 3299 -Sus scrofa functional IGLV8-10 IGL V TRUE 3300 -Sus scrofa functional IGLV8-13 IGL V TRUE 3301 -Sus scrofa non-functional IGLV8-16 IGL V FALSE 3302 -Sus scrofa functional IGLV8-18 IGL V TRUE 3303 -Sus scrofa functional IGLV8-19 IGL V TRUE 3304 -Sus scrofa non-functional IGLV8-21 IGL V FALSE 3305 diff -r beaa487ecf43 -r 5ffd52fc35c4 report_clonality/jquery-1.11.0.min.js --- a/report_clonality/jquery-1.11.0.min.js Wed Dec 07 08:36:23 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -/*! jQuery v1.11.0 | (c) 2005, 2014 jQuery Foundation, Inc. | jquery.org/license */ -!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=c.slice,e=c.concat,f=c.push,g=c.indexOf,h={},i=h.toString,j=h.hasOwnProperty,k="".trim,l={},m="1.11.0",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return d.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:d.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a,b){return n.each(this,a,b)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(d.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor(null)},push:f,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(n.isPlainObject(c)||(b=n.isArray(c)))?(b?(b=!1,f=a&&n.isArray(a)?a:[]):f=a&&n.isPlainObject(a)?a:{},g[d]=n.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray||function(a){return"array"===n.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){return a-parseFloat(a)>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;try{if(a.constructor&&!j.call(a,"constructor")&&!j.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(l.ownLast)for(b in a)return j.call(a,b);for(b in a);return void 0===b||j.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?h[i.call(a)]||"object":typeof a},globalEval:function(b){b&&n.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b,c){var d,e=0,f=a.length,g=s(a);if(c){if(g){for(;f>e;e++)if(d=b.apply(a[e],c),d===!1)break}else for(e in a)if(d=b.apply(a[e],c),d===!1)break}else if(g){for(;f>e;e++)if(d=b.call(a[e],e,a[e]),d===!1)break}else for(e in a)if(d=b.call(a[e],e,a[e]),d===!1)break;return a},trim:k&&!k.call("\ufeff\xa0")?function(a){return null==a?"":k.call(a)}:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):f.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(g)return g.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,f=0,g=a.length,h=s(a),i=[];if(h)for(;g>f;f++)d=b(a[f],f,c),null!=d&&i.push(d);else for(f in a)d=b(a[f],f,c),null!=d&&i.push(d);return e.apply([],i)},guid:1,proxy:function(a,b){var c,e,f;return"string"==typeof b&&(f=a[b],b=a,a=f),n.isFunction(a)?(c=d.call(arguments,2),e=function(){return a.apply(b||this,c.concat(d.call(arguments)))},e.guid=a.guid=a.guid||n.guid++,e):void 0},now:function(){return+new Date},support:l}),n.each("Boolean Number String Function Array Date RegExp Object Error".split(" "),function(a,b){h["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:1===a.nodeType&&b?!0:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s="sizzle"+-new Date,t=a.document,u=0,v=0,w=eb(),x=eb(),y=eb(),z=function(a,b){return a===b&&(j=!0),0},A="undefined",B=1<<31,C={}.hasOwnProperty,D=[],E=D.pop,F=D.push,G=D.push,H=D.slice,I=D.indexOf||function(a){for(var b=0,c=this.length;c>b;b++)if(this[b]===a)return b;return-1},J="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",K="[\\x20\\t\\r\\n\\f]",L="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",M=L.replace("w","w#"),N="\\["+K+"*("+L+")"+K+"*(?:([*^$|!~]?=)"+K+"*(?:(['\"])((?:\\\\.|[^\\\\])*?)\\3|("+M+")|)|)"+K+"*\\]",O=":("+L+")(?:\\(((['\"])((?:\\\\.|[^\\\\])*?)\\3|((?:\\\\.|[^\\\\()[\\]]|"+N.replace(3,8)+")*)|.*)\\)|)",P=new RegExp("^"+K+"+|((?:^|[^\\\\])(?:\\\\.)*)"+K+"+$","g"),Q=new RegExp("^"+K+"*,"+K+"*"),R=new RegExp("^"+K+"*([>+~]|"+K+")"+K+"*"),S=new RegExp("="+K+"*([^\\]'\"]*?)"+K+"*\\]","g"),T=new RegExp(O),U=new RegExp("^"+M+"$"),V={ID:new RegExp("^#("+L+")"),CLASS:new RegExp("^\\.("+L+")"),TAG:new RegExp("^("+L.replace("w","w*")+")"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+K+"*(even|odd|(([+-]|)(\\d*)n|)"+K+"*(?:([+-]|)"+K+"*(\\d+)|))"+K+"*\\)|)","i"),bool:new RegExp("^(?:"+J+")$","i"),needsContext:new RegExp("^"+K+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+K+"*((?:-\\d)?\\d*)"+K+"*\\)|)(?=[^-]|$)","i")},W=/^(?:input|select|textarea|button)$/i,X=/^h\d$/i,Y=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,$=/[+~]/,_=/'|\\/g,ab=new RegExp("\\\\([\\da-f]{1,6}"+K+"?|("+K+")|.)","ig"),bb=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)};try{G.apply(D=H.call(t.childNodes),t.childNodes),D[t.childNodes.length].nodeType}catch(cb){G={apply:D.length?function(a,b){F.apply(a,H.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function db(a,b,d,e){var f,g,h,i,j,m,p,q,u,v;if((b?b.ownerDocument||b:t)!==l&&k(b),b=b||l,d=d||[],!a||"string"!=typeof a)return d;if(1!==(i=b.nodeType)&&9!==i)return[];if(n&&!e){if(f=Z.exec(a))if(h=f[1]){if(9===i){if(g=b.getElementById(h),!g||!g.parentNode)return d;if(g.id===h)return d.push(g),d}else if(b.ownerDocument&&(g=b.ownerDocument.getElementById(h))&&r(b,g)&&g.id===h)return d.push(g),d}else{if(f[2])return G.apply(d,b.getElementsByTagName(a)),d;if((h=f[3])&&c.getElementsByClassName&&b.getElementsByClassName)return G.apply(d,b.getElementsByClassName(h)),d}if(c.qsa&&(!o||!o.test(a))){if(q=p=s,u=b,v=9===i&&a,1===i&&"object"!==b.nodeName.toLowerCase()){m=ob(a),(p=b.getAttribute("id"))?q=p.replace(_,"\\$&"):b.setAttribute("id",q),q="[id='"+q+"'] ",j=m.length;while(j--)m[j]=q+pb(m[j]);u=$.test(a)&&mb(b.parentNode)||b,v=m.join(",")}if(v)try{return G.apply(d,u.querySelectorAll(v)),d}catch(w){}finally{p||b.removeAttribute("id")}}}return xb(a.replace(P,"$1"),b,d,e)}function eb(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function fb(a){return a[s]=!0,a}function gb(a){var b=l.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function hb(a,b){var c=a.split("|"),e=a.length;while(e--)d.attrHandle[c[e]]=b}function ib(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||B)-(~a.sourceIndex||B);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function jb(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function kb(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function lb(a){return fb(function(b){return b=+b,fb(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function mb(a){return a&&typeof a.getElementsByTagName!==A&&a}c=db.support={},f=db.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},k=db.setDocument=function(a){var b,e=a?a.ownerDocument||a:t,g=e.defaultView;return e!==l&&9===e.nodeType&&e.documentElement?(l=e,m=e.documentElement,n=!f(e),g&&g!==g.top&&(g.addEventListener?g.addEventListener("unload",function(){k()},!1):g.attachEvent&&g.attachEvent("onunload",function(){k()})),c.attributes=gb(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=gb(function(a){return a.appendChild(e.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Y.test(e.getElementsByClassName)&&gb(function(a){return a.innerHTML="
",a.firstChild.className="i",2===a.getElementsByClassName("i").length}),c.getById=gb(function(a){return m.appendChild(a).id=s,!e.getElementsByName||!e.getElementsByName(s).length}),c.getById?(d.find.ID=function(a,b){if(typeof b.getElementById!==A&&n){var c=b.getElementById(a);return c&&c.parentNode?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ab,bb);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ab,bb);return function(a){var c=typeof a.getAttributeNode!==A&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return typeof b.getElementsByTagName!==A?b.getElementsByTagName(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return typeof b.getElementsByClassName!==A&&n?b.getElementsByClassName(a):void 0},p=[],o=[],(c.qsa=Y.test(e.querySelectorAll))&&(gb(function(a){a.innerHTML="",a.querySelectorAll("[t^='']").length&&o.push("[*^$]="+K+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||o.push("\\["+K+"*(?:value|"+J+")"),a.querySelectorAll(":checked").length||o.push(":checked")}),gb(function(a){var b=e.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&o.push("name"+K+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||o.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),o.push(",.*:")})),(c.matchesSelector=Y.test(q=m.webkitMatchesSelector||m.mozMatchesSelector||m.oMatchesSelector||m.msMatchesSelector))&&gb(function(a){c.disconnectedMatch=q.call(a,"div"),q.call(a,"[s!='']:x"),p.push("!=",O)}),o=o.length&&new RegExp(o.join("|")),p=p.length&&new RegExp(p.join("|")),b=Y.test(m.compareDocumentPosition),r=b||Y.test(m.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},z=b?function(a,b){if(a===b)return j=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===e||a.ownerDocument===t&&r(t,a)?-1:b===e||b.ownerDocument===t&&r(t,b)?1:i?I.call(i,a)-I.call(i,b):0:4&d?-1:1)}:function(a,b){if(a===b)return j=!0,0;var c,d=0,f=a.parentNode,g=b.parentNode,h=[a],k=[b];if(!f||!g)return a===e?-1:b===e?1:f?-1:g?1:i?I.call(i,a)-I.call(i,b):0;if(f===g)return ib(a,b);c=a;while(c=c.parentNode)h.unshift(c);c=b;while(c=c.parentNode)k.unshift(c);while(h[d]===k[d])d++;return d?ib(h[d],k[d]):h[d]===t?-1:k[d]===t?1:0},e):l},db.matches=function(a,b){return db(a,null,null,b)},db.matchesSelector=function(a,b){if((a.ownerDocument||a)!==l&&k(a),b=b.replace(S,"='$1']"),!(!c.matchesSelector||!n||p&&p.test(b)||o&&o.test(b)))try{var d=q.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return db(b,l,null,[a]).length>0},db.contains=function(a,b){return(a.ownerDocument||a)!==l&&k(a),r(a,b)},db.attr=function(a,b){(a.ownerDocument||a)!==l&&k(a);var e=d.attrHandle[b.toLowerCase()],f=e&&C.call(d.attrHandle,b.toLowerCase())?e(a,b,!n):void 0;return void 0!==f?f:c.attributes||!n?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},db.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},db.uniqueSort=function(a){var b,d=[],e=0,f=0;if(j=!c.detectDuplicates,i=!c.sortStable&&a.slice(0),a.sort(z),j){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return i=null,a},e=db.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=db.selectors={cacheLength:50,createPseudo:fb,match:V,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ab,bb),a[3]=(a[4]||a[5]||"").replace(ab,bb),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||db.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&db.error(a[0]),a},PSEUDO:function(a){var b,c=!a[5]&&a[2];return V.CHILD.test(a[0])?null:(a[3]&&void 0!==a[4]?a[2]=a[4]:c&&T.test(c)&&(b=ob(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ab,bb).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=w[a+" "];return b||(b=new RegExp("(^|"+K+")"+a+"("+K+"|$)"))&&w(a,function(a){return b.test("string"==typeof a.className&&a.className||typeof a.getAttribute!==A&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=db.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),t=!i&&!h;if(q){if(f){while(p){l=b;while(l=l[p])if(h?l.nodeName.toLowerCase()===r:1===l.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&t){k=q[s]||(q[s]={}),j=k[a]||[],n=j[0]===u&&j[1],m=j[0]===u&&j[2],l=n&&q.childNodes[n];while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if(1===l.nodeType&&++m&&l===b){k[a]=[u,n,m];break}}else if(t&&(j=(b[s]||(b[s]={}))[a])&&j[0]===u)m=j[1];else while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if((h?l.nodeName.toLowerCase()===r:1===l.nodeType)&&++m&&(t&&((l[s]||(l[s]={}))[a]=[u,m]),l===b))break;return m-=e,m===d||m%d===0&&m/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||db.error("unsupported pseudo: "+a);return e[s]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?fb(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=I.call(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:fb(function(a){var b=[],c=[],d=g(a.replace(P,"$1"));return d[s]?fb(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),!c.pop()}}),has:fb(function(a){return function(b){return db(a,b).length>0}}),contains:fb(function(a){return function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:fb(function(a){return U.test(a||"")||db.error("unsupported lang: "+a),a=a.replace(ab,bb).toLowerCase(),function(b){var c;do if(c=n?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===m},focus:function(a){return a===l.activeElement&&(!l.hasFocus||l.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return X.test(a.nodeName)},input:function(a){return W.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:lb(function(){return[0]}),last:lb(function(a,b){return[b-1]}),eq:lb(function(a,b,c){return[0>c?c+b:c]}),even:lb(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:lb(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:lb(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:lb(function(a,b,c){for(var d=0>c?c+b:c;++db;b++)d+=a[b].value;return d}function qb(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=v++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j=[u,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(i=b[s]||(b[s]={}),(h=i[d])&&h[0]===u&&h[1]===f)return j[2]=h[2];if(i[d]=j,j[2]=a(b,c,g))return!0}}}function rb(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function sb(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(!c||c(f,d,e))&&(g.push(f),j&&b.push(h));return g}function tb(a,b,c,d,e,f){return d&&!d[s]&&(d=tb(d)),e&&!e[s]&&(e=tb(e,f)),fb(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||wb(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:sb(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=sb(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?I.call(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=sb(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):G.apply(g,r)})}function ub(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],i=g||d.relative[" "],j=g?1:0,k=qb(function(a){return a===b},i,!0),l=qb(function(a){return I.call(b,a)>-1},i,!0),m=[function(a,c,d){return!g&&(d||c!==h)||((b=c).nodeType?k(a,c,d):l(a,c,d))}];f>j;j++)if(c=d.relative[a[j].type])m=[qb(rb(m),c)];else{if(c=d.filter[a[j].type].apply(null,a[j].matches),c[s]){for(e=++j;f>e;e++)if(d.relative[a[e].type])break;return tb(j>1&&rb(m),j>1&&pb(a.slice(0,j-1).concat({value:" "===a[j-2].type?"*":""})).replace(P,"$1"),c,e>j&&ub(a.slice(j,e)),f>e&&ub(a=a.slice(e)),f>e&&pb(a))}m.push(c)}return rb(m)}function vb(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,i,j,k){var m,n,o,p=0,q="0",r=f&&[],s=[],t=h,v=f||e&&d.find.TAG("*",k),w=u+=null==t?1:Math.random()||.1,x=v.length;for(k&&(h=g!==l&&g);q!==x&&null!=(m=v[q]);q++){if(e&&m){n=0;while(o=a[n++])if(o(m,g,i)){j.push(m);break}k&&(u=w)}c&&((m=!o&&m)&&p--,f&&r.push(m))}if(p+=q,c&&q!==p){n=0;while(o=b[n++])o(r,s,g,i);if(f){if(p>0)while(q--)r[q]||s[q]||(s[q]=E.call(j));s=sb(s)}G.apply(j,s),k&&!f&&s.length>0&&p+b.length>1&&db.uniqueSort(j)}return k&&(u=w,h=t),r};return c?fb(f):f}g=db.compile=function(a,b){var c,d=[],e=[],f=y[a+" "];if(!f){b||(b=ob(a)),c=b.length;while(c--)f=ub(b[c]),f[s]?d.push(f):e.push(f);f=y(a,vb(e,d))}return f};function wb(a,b,c){for(var d=0,e=b.length;e>d;d++)db(a,b[d],c);return c}function xb(a,b,e,f){var h,i,j,k,l,m=ob(a);if(!f&&1===m.length){if(i=m[0]=m[0].slice(0),i.length>2&&"ID"===(j=i[0]).type&&c.getById&&9===b.nodeType&&n&&d.relative[i[1].type]){if(b=(d.find.ID(j.matches[0].replace(ab,bb),b)||[])[0],!b)return e;a=a.slice(i.shift().value.length)}h=V.needsContext.test(a)?0:i.length;while(h--){if(j=i[h],d.relative[k=j.type])break;if((l=d.find[k])&&(f=l(j.matches[0].replace(ab,bb),$.test(i[0].type)&&mb(b.parentNode)||b))){if(i.splice(h,1),a=f.length&&pb(i),!a)return G.apply(e,f),e;break}}}return g(a,m)(f,b,!n,e,$.test(a)&&mb(b.parentNode)||b),e}return c.sortStable=s.split("").sort(z).join("")===s,c.detectDuplicates=!!j,k(),c.sortDetached=gb(function(a){return 1&a.compareDocumentPosition(l.createElement("div"))}),gb(function(a){return a.innerHTML="","#"===a.firstChild.getAttribute("href")})||hb("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&gb(function(a){return a.innerHTML="",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||hb("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),gb(function(a){return null==a.getAttribute("disabled")})||hb(J,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),db}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=n.expr.match.needsContext,v=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,w=/^.[^:#\[\.,]*$/;function x(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(w.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return n.inArray(a,b)>=0!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;e>b;b++)if(n.contains(d[b],this))return!0}));for(b=0;e>b;b++)n.find(a,d[b],c);return c=this.pushStack(e>1?n.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(x(this,a||[],!1))},not:function(a){return this.pushStack(x(this,a||[],!0))},is:function(a){return!!x(this,"string"==typeof a&&u.test(a)?n(a):a||[],!1).length}});var y,z=a.document,A=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,B=n.fn.init=function(a,b){var c,d;if(!a)return this;if("string"==typeof a){if(c="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:A.exec(a),!c||!c[1]&&b)return!b||b.jquery?(b||y).find(a):this.constructor(b).find(a);if(c[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(c[1],b&&b.nodeType?b.ownerDocument||b:z,!0)),v.test(c[1])&&n.isPlainObject(b))for(c in b)n.isFunction(this[c])?this[c](b[c]):this.attr(c,b[c]);return this}if(d=z.getElementById(c[2]),d&&d.parentNode){if(d.id!==c[2])return y.find(a);this.length=1,this[0]=d}return this.context=z,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof y.ready?y.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};B.prototype=n.fn,y=n(z);var C=/^(?:parents|prev(?:Until|All))/,D={children:!0,contents:!0,next:!0,prev:!0};n.extend({dir:function(a,b,c){var d=[],e=a[b];while(e&&9!==e.nodeType&&(void 0===c||1!==e.nodeType||!n(e).is(c)))1===e.nodeType&&d.push(e),e=e[b];return d},sibling:function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c}}),n.fn.extend({has:function(a){var b,c=n(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(n.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=u.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.unique(f):f)},index:function(a){return a?"string"==typeof a?n.inArray(this[0],n(a)):n.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.unique(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function E(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return n.dir(a,"parentNode")},parentsUntil:function(a,b,c){return n.dir(a,"parentNode",c)},next:function(a){return E(a,"nextSibling")},prev:function(a){return E(a,"previousSibling")},nextAll:function(a){return n.dir(a,"nextSibling")},prevAll:function(a){return n.dir(a,"previousSibling")},nextUntil:function(a,b,c){return n.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return n.dir(a,"previousSibling",c)},siblings:function(a){return n.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return n.sibling(a.firstChild)},contents:function(a){return n.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(D[a]||(e=n.unique(e)),C.test(a)&&(e=e.reverse())),this.pushStack(e)}});var F=/\S+/g,G={};function H(a){var b=G[a]={};return n.each(a.match(F)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?G[a]||H(a):n.extend({},a);var b,c,d,e,f,g,h=[],i=!a.once&&[],j=function(l){for(c=a.memory&&l,d=!0,f=g||0,g=0,e=h.length,b=!0;h&&e>f;f++)if(h[f].apply(l[0],l[1])===!1&&a.stopOnFalse){c=!1;break}b=!1,h&&(i?i.length&&j(i.shift()):c?h=[]:k.disable())},k={add:function(){if(h){var d=h.length;!function f(b){n.each(b,function(b,c){var d=n.type(c);"function"===d?a.unique&&k.has(c)||h.push(c):c&&c.length&&"string"!==d&&f(c)})}(arguments),b?e=h.length:c&&(g=d,j(c))}return this},remove:function(){return h&&n.each(arguments,function(a,c){var d;while((d=n.inArray(c,h,d))>-1)h.splice(d,1),b&&(e>=d&&e--,f>=d&&f--)}),this},has:function(a){return a?n.inArray(a,h)>-1:!(!h||!h.length)},empty:function(){return h=[],e=0,this},disable:function(){return h=i=c=void 0,this},disabled:function(){return!h},lock:function(){return i=void 0,c||k.disable(),this},locked:function(){return!i},fireWith:function(a,c){return!h||d&&!i||(c=c||[],c=[a,c.slice?c.slice():c],b?i.push(c):j(c)),this},fire:function(){return k.fireWith(this,arguments),this},fired:function(){return!!d}};return k},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().done(c.resolve).fail(c.reject).progress(c.notify):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=d.call(arguments),e=c.length,f=1!==e||a&&n.isFunction(a.promise)?e:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(e){b[a]=this,c[a]=arguments.length>1?d.call(arguments):e,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(e>1)for(i=new Array(e),j=new Array(e),k=new Array(e);e>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().done(h(b,k,c)).fail(g.reject).progress(h(b,j,i)):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){if(a===!0?!--n.readyWait:!n.isReady){if(!z.body)return setTimeout(n.ready);n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(z,[n]),n.fn.trigger&&n(z).trigger("ready").off("ready"))}}});function J(){z.addEventListener?(z.removeEventListener("DOMContentLoaded",K,!1),a.removeEventListener("load",K,!1)):(z.detachEvent("onreadystatechange",K),a.detachEvent("onload",K))}function K(){(z.addEventListener||"load"===event.type||"complete"===z.readyState)&&(J(),n.ready())}n.ready.promise=function(b){if(!I)if(I=n.Deferred(),"complete"===z.readyState)setTimeout(n.ready);else if(z.addEventListener)z.addEventListener("DOMContentLoaded",K,!1),a.addEventListener("load",K,!1);else{z.attachEvent("onreadystatechange",K),a.attachEvent("onload",K);var c=!1;try{c=null==a.frameElement&&z.documentElement}catch(d){}c&&c.doScroll&&!function e(){if(!n.isReady){try{c.doScroll("left")}catch(a){return setTimeout(e,50)}J(),n.ready()}}()}return I.promise(b)};var L="undefined",M;for(M in n(l))break;l.ownLast="0"!==M,l.inlineBlockNeedsLayout=!1,n(function(){var a,b,c=z.getElementsByTagName("body")[0];c&&(a=z.createElement("div"),a.style.cssText="border:0;width:0;height:0;position:absolute;top:0;left:-9999px;margin-top:1px",b=z.createElement("div"),c.appendChild(a).appendChild(b),typeof b.style.zoom!==L&&(b.style.cssText="border:0;margin:0;width:1px;padding:1px;display:inline;zoom:1",(l.inlineBlockNeedsLayout=3===b.offsetWidth)&&(c.style.zoom=1)),c.removeChild(a),a=b=null)}),function(){var a=z.createElement("div");if(null==l.deleteExpando){l.deleteExpando=!0;try{delete a.test}catch(b){l.deleteExpando=!1}}a=null}(),n.acceptData=function(a){var b=n.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b};var N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(O,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}n.data(a,b,c)}else c=void 0}return c}function Q(a){var b;for(b in a)if(("data"!==b||!n.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function R(a,b,d,e){if(n.acceptData(a)){var f,g,h=n.expando,i=a.nodeType,j=i?n.cache:a,k=i?a[h]:a[h]&&h;if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||n.guid++:h),j[k]||(j[k]=i?{}:{toJSON:n.noop}),("object"==typeof b||"function"==typeof b)&&(e?j[k]=n.extend(j[k],b):j[k].data=n.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[n.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[n.camelCase(b)])):f=g,f -}}function S(a,b,c){if(n.acceptData(a)){var d,e,f=a.nodeType,g=f?n.cache:a,h=f?a[n.expando]:n.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){n.isArray(b)?b=b.concat(n.map(b,n.camelCase)):b in d?b=[b]:(b=n.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!Q(d):!n.isEmptyObject(d))return}(c||(delete g[h].data,Q(g[h])))&&(f?n.cleanData([a],!0):l.deleteExpando||g!=g.window?delete g[h]:g[h]=null)}}}n.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?n.cache[a[n.expando]]:a[n.expando],!!a&&!Q(a)},data:function(a,b,c){return R(a,b,c)},removeData:function(a,b){return S(a,b)},_data:function(a,b,c){return R(a,b,c,!0)},_removeData:function(a,b){return S(a,b,!0)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=n.data(f),1===f.nodeType&&!n._data(f,"parsedAttrs"))){c=g.length;while(c--)d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d]));n._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){n.data(this,a)}):arguments.length>1?this.each(function(){n.data(this,a,b)}):f?P(f,a,n.data(f,a)):void 0},removeData:function(a){return this.each(function(){n.removeData(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=n._data(a,b),c&&(!d||n.isArray(c)?d=n._data(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return n._data(a,c)||n._data(a,c,{empty:n.Callbacks("once memory").add(function(){n._removeData(a,b+"queue"),n._removeData(a,c)})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.lengthh;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},X=/^(?:checkbox|radio)$/i;!function(){var a=z.createDocumentFragment(),b=z.createElement("div"),c=z.createElement("input");if(b.setAttribute("className","t"),b.innerHTML="
info
-----------------------------------
Sample $count of patient $id is an archive file, using IMGT Loader
Sample $count of patient $id is not a zip file so assuming fasta/fastq, using igBLASTn
-----------------------------------
merging
done
-----------------------------------
plotting
a",l.leadingWhitespace=3===b.firstChild.nodeType,l.tbody=!b.getElementsByTagName("tbody").length,l.htmlSerialize=!!b.getElementsByTagName("link").length,l.html5Clone="<:nav>"!==z.createElement("nav").cloneNode(!0).outerHTML,c.type="checkbox",c.checked=!0,a.appendChild(c),l.appendChecked=c.checked,b.innerHTML="",l.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue,a.appendChild(b),b.innerHTML="",l.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,l.noCloneEvent=!0,b.attachEvent&&(b.attachEvent("onclick",function(){l.noCloneEvent=!1}),b.cloneNode(!0).click()),null==l.deleteExpando){l.deleteExpando=!0;try{delete b.test}catch(d){l.deleteExpando=!1}}a=b=c=null}(),function(){var b,c,d=z.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(l[b+"Bubbles"]=c in a)||(d.setAttribute(c,"t"),l[b+"Bubbles"]=d.attributes[c].expando===!1);d=null}();var Y=/^(?:input|select|textarea)$/i,Z=/^key/,$=/^(?:mouse|contextmenu)|click/,_=/^(?:focusinfocus|focusoutblur)$/,ab=/^([^.]*)(?:\.(.+)|)$/;function bb(){return!0}function cb(){return!1}function db(){try{return z.activeElement}catch(a){}}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=n.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return typeof n===L||a&&n.event.triggered===a.type?void 0:n.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(F)||[""],h=b.length;while(h--)f=ab.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=n.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=n.event.special[o]||{},l=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},i),(m=g[o])||(m=g[o]=[],m.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,l):m.push(l),n.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n.hasData(a)&&n._data(a);if(r&&(k=r.events)){b=(b||"").match(F)||[""],j=b.length;while(j--)if(h=ab.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=m.length;while(f--)g=m[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(m.splice(f,1),g.selector&&m.delegateCount--,l.remove&&l.remove.call(a,g));i&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(k)&&(delete r.handle,n._removeData(a,"events"))}},trigger:function(b,c,d,e){var f,g,h,i,k,l,m,o=[d||z],p=j.call(b,"type")?b.type:b,q=j.call(b,"namespace")?b.namespace.split("."):[];if(h=l=d=d||z,3!==d.nodeType&&8!==d.nodeType&&!_.test(p+n.event.triggered)&&(p.indexOf(".")>=0&&(q=p.split("."),p=q.shift(),q.sort()),g=p.indexOf(":")<0&&"on"+p,b=b[n.expando]?b:new n.Event(p,"object"==typeof b&&b),b.isTrigger=e?2:3,b.namespace=q.join("."),b.namespace_re=b.namespace?new RegExp("(^|\\.)"+q.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=d),c=null==c?[b]:n.makeArray(c,[b]),k=n.event.special[p]||{},e||!k.trigger||k.trigger.apply(d,c)!==!1)){if(!e&&!k.noBubble&&!n.isWindow(d)){for(i=k.delegateType||p,_.test(i+p)||(h=h.parentNode);h;h=h.parentNode)o.push(h),l=h;l===(d.ownerDocument||z)&&o.push(l.defaultView||l.parentWindow||a)}m=0;while((h=o[m++])&&!b.isPropagationStopped())b.type=m>1?i:k.bindType||p,f=(n._data(h,"events")||{})[b.type]&&n._data(h,"handle"),f&&f.apply(h,c),f=g&&h[g],f&&f.apply&&n.acceptData(h)&&(b.result=f.apply(h,c),b.result===!1&&b.preventDefault());if(b.type=p,!e&&!b.isDefaultPrevented()&&(!k._default||k._default.apply(o.pop(),c)===!1)&&n.acceptData(d)&&g&&d[p]&&!n.isWindow(d)){l=d[g],l&&(d[g]=null),n.event.triggered=p;try{d[p]()}catch(r){}n.event.triggered=void 0,l&&(d[g]=l)}return b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,e,f,g,h=[],i=d.call(arguments),j=(n._data(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,g=0;while((e=f.handlers[g++])&&!a.isImmediatePropagationStopped())(!a.namespace_re||a.namespace_re.test(e.namespace))&&(a.handleObj=e,a.data=e.data,c=((n.event.special[e.origType]||{}).handle||e.handler).apply(f.elem,i),void 0!==c&&(a.result=c)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&(!a.button||"click"!==a.type))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(e=[],f=0;h>f;f++)d=b[f],c=d.selector+" ",void 0===e[c]&&(e[c]=d.needsContext?n(c,this).index(i)>=0:n.find(c,this,null,[i]).length),e[c]&&e.push(d);e.length&&g.push({elem:i,handlers:e})}return h]","i"),ib=/^\s+/,jb=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,kb=/<([\w:]+)/,lb=/\s*$/g,sb={option:[1,""],legend:[1,"
","
"],area:[1,"",""],param:[1,"",""],thead:[1,"","
"],tr:[2,"","
"],col:[2,"","
"],td:[3,"","
"],_default:l.htmlSerialize?[0,"",""]:[1,"X
","
"]},tb=eb(z),ub=tb.appendChild(z.createElement("div"));sb.optgroup=sb.option,sb.tbody=sb.tfoot=sb.colgroup=sb.caption=sb.thead,sb.th=sb.td;function vb(a,b){var c,d,e=0,f=typeof a.getElementsByTagName!==L?a.getElementsByTagName(b||"*"):typeof a.querySelectorAll!==L?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||n.nodeName(d,b)?f.push(d):n.merge(f,vb(d,b));return void 0===b||b&&n.nodeName(a,b)?n.merge([a],f):f}function wb(a){X.test(a.type)&&(a.defaultChecked=a.checked)}function xb(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function yb(a){return a.type=(null!==n.find.attr(a,"type"))+"/"+a.type,a}function zb(a){var b=qb.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Ab(a,b){for(var c,d=0;null!=(c=a[d]);d++)n._data(c,"globalEval",!b||n._data(b[d],"globalEval"))}function Bb(a,b){if(1===b.nodeType&&n.hasData(a)){var c,d,e,f=n._data(a),g=n._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)n.event.add(b,c,h[c][d])}g.data&&(g.data=n.extend({},g.data))}}function Cb(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!l.noCloneEvent&&b[n.expando]){e=n._data(b);for(d in e.events)n.removeEvent(b,d,e.handle);b.removeAttribute(n.expando)}"script"===c&&b.text!==a.text?(yb(b).text=a.text,zb(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),l.html5Clone&&a.innerHTML&&!n.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&X.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:("input"===c||"textarea"===c)&&(b.defaultValue=a.defaultValue)}}n.extend({clone:function(a,b,c){var d,e,f,g,h,i=n.contains(a.ownerDocument,a);if(l.html5Clone||n.isXMLDoc(a)||!hb.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(ub.innerHTML=a.outerHTML,ub.removeChild(f=ub.firstChild)),!(l.noCloneEvent&&l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(d=vb(f),h=vb(a),g=0;null!=(e=h[g]);++g)d[g]&&Cb(e,d[g]);if(b)if(c)for(h=h||vb(a),d=d||vb(f),g=0;null!=(e=h[g]);g++)Bb(e,d[g]);else Bb(a,f);return d=vb(f,"script"),d.length>0&&Ab(d,!i&&vb(a,"script")),d=h=e=null,f},buildFragment:function(a,b,c,d){for(var e,f,g,h,i,j,k,m=a.length,o=eb(b),p=[],q=0;m>q;q++)if(f=a[q],f||0===f)if("object"===n.type(f))n.merge(p,f.nodeType?[f]:f);else if(mb.test(f)){h=h||o.appendChild(b.createElement("div")),i=(kb.exec(f)||["",""])[1].toLowerCase(),k=sb[i]||sb._default,h.innerHTML=k[1]+f.replace(jb,"<$1>")+k[2],e=k[0];while(e--)h=h.lastChild;if(!l.leadingWhitespace&&ib.test(f)&&p.push(b.createTextNode(ib.exec(f)[0])),!l.tbody){f="table"!==i||lb.test(f)?""!==k[1]||lb.test(f)?0:h:h.firstChild,e=f&&f.childNodes.length;while(e--)n.nodeName(j=f.childNodes[e],"tbody")&&!j.childNodes.length&&f.removeChild(j)}n.merge(p,h.childNodes),h.textContent="";while(h.firstChild)h.removeChild(h.firstChild);h=o.lastChild}else p.push(b.createTextNode(f));h&&o.removeChild(h),l.appendChecked||n.grep(vb(p,"input"),wb),q=0;while(f=p[q++])if((!d||-1===n.inArray(f,d))&&(g=n.contains(f.ownerDocument,f),h=vb(o.appendChild(f),"script"),g&&Ab(h),c)){e=0;while(f=h[e++])pb.test(f.type||"")&&c.push(f)}return h=null,o},cleanData:function(a,b){for(var d,e,f,g,h=0,i=n.expando,j=n.cache,k=l.deleteExpando,m=n.event.special;null!=(d=a[h]);h++)if((b||n.acceptData(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)m[e]?n.event.remove(d,e):n.removeEvent(d,e,g.handle);j[f]&&(delete j[f],k?delete d[i]:typeof d.removeAttribute!==L?d.removeAttribute(i):d[i]=null,c.push(f))}}}),n.fn.extend({text:function(a){return W(this,function(a){return void 0===a?n.text(this):this.empty().append((this[0]&&this[0].ownerDocument||z).createTextNode(a))},null,a,arguments.length)},append:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=xb(this,a);b.appendChild(a)}})},prepend:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=xb(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},remove:function(a,b){for(var c,d=a?n.filter(a,this):this,e=0;null!=(c=d[e]);e++)b||1!==c.nodeType||n.cleanData(vb(c)),c.parentNode&&(b&&n.contains(c.ownerDocument,c)&&Ab(vb(c,"script")),c.parentNode.removeChild(c));return this},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&n.cleanData(vb(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&n.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return W(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(gb,""):void 0;if(!("string"!=typeof a||nb.test(a)||!l.htmlSerialize&&hb.test(a)||!l.leadingWhitespace&&ib.test(a)||sb[(kb.exec(a)||["",""])[1].toLowerCase()])){a=a.replace(jb,"<$1>");try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(vb(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=arguments[0];return this.domManip(arguments,function(b){a=this.parentNode,n.cleanData(vb(this)),a&&a.replaceChild(b,this)}),a&&(a.length||a.nodeType)?this:this.remove()},detach:function(a){return this.remove(a,!0)},domManip:function(a,b){a=e.apply([],a);var c,d,f,g,h,i,j=0,k=this.length,m=this,o=k-1,p=a[0],q=n.isFunction(p);if(q||k>1&&"string"==typeof p&&!l.checkClone&&ob.test(p))return this.each(function(c){var d=m.eq(c);q&&(a[0]=p.call(this,c,d.html())),d.domManip(a,b)});if(k&&(i=n.buildFragment(a,this[0].ownerDocument,!1,this),c=i.firstChild,1===i.childNodes.length&&(i=c),c)){for(g=n.map(vb(i,"script"),yb),f=g.length;k>j;j++)d=i,j!==o&&(d=n.clone(d,!0,!0),f&&n.merge(g,vb(d,"script"))),b.call(this[j],d,j);if(f)for(h=g[g.length-1].ownerDocument,n.map(g,zb),j=0;f>j;j++)d=g[j],pb.test(d.type||"")&&!n._data(d,"globalEval")&&n.contains(h,d)&&(d.src?n._evalUrl&&n._evalUrl(d.src):n.globalEval((d.text||d.textContent||d.innerHTML||"").replace(rb,"")));i=c=null}return this}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=0,e=[],g=n(a),h=g.length-1;h>=d;d++)c=d===h?this:this.clone(!0),n(g[d])[b](c),f.apply(e,c.get());return this.pushStack(e)}});var Db,Eb={};function Fb(b,c){var d=n(c.createElement(b)).appendTo(c.body),e=a.getDefaultComputedStyle?a.getDefaultComputedStyle(d[0]).display:n.css(d[0],"display");return d.detach(),e}function Gb(a){var b=z,c=Eb[a];return c||(c=Fb(a,b),"none"!==c&&c||(Db=(Db||n("