comparison report_clonality/RScript.r @ 26:28fbbdfd7a87 draft

Uploaded
author davidvanzessen
date Mon, 13 Feb 2017 09:08:46 -0500
parents 94765af0db1f
children 1f83e14f173b
comparison
equal deleted inserted replaced
25:94765af0db1f 26:28fbbdfd7a87
612 612
613 coincidence.table = data.frame(table(res$type)) 613 coincidence.table = data.frame(table(res$type))
614 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") 614 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq")
615 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) 615 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
616 } 616 }
617 } else if(clonality_method == "old") { 617 }
618 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) 618 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
619 619
620 #write files for every coincidence group of >1 620 #write files for every coincidence group of >1
621 samples = unique(clonalFreq$Sample) 621 samples = unique(clonalFreq$Sample)
622 for(sample in samples){ 622 for(sample in samples){
623 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] 623 clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,]
624 if(max(clonalFreqSample$Type) > 1){ 624 if(max(clonalFreqSample$Type) > 1){
625 for(i in 2:max(clonalFreqSample$Type)){ 625 for(i in 2:max(clonalFreqSample$Type)){
626 clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,] 626 clonalFreqSampleType = clonalFreqSample[clonalFreqSample$Type == i,]
627 clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,] 627 clonalityFrame.sub = clonalityFrame[clonalityFrame$clonaltype %in% clonalFreqSampleType$clonaltype,]
628 clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),] 628 clonalityFrame.sub = clonalityFrame.sub[order(clonalityFrame.sub$clonaltype),]
629 write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) 629 write.table(clonalityFrame.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
630 } 630 }
631 } 631 }
632 } 632 }
633 633
634 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) 634 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
635 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count 635 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
636 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) 636 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
637 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") 637 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
638 638
639 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') 639 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15')
640 tcct = textConnection(ct) 640 tcct = textConnection(ct)
641 CT = read.table(tcct, sep="\t", header=TRUE) 641 CT = read.table(tcct, sep="\t", header=TRUE)
642 close(tcct) 642 close(tcct)
643 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) 643 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
644 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight 644 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
645 645
646 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) 646 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")])
647 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) 647 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
648 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) 648 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
649 ReplicateReads$Reads = as.numeric(ReplicateReads$Reads) 649 ReplicateReads$Reads = as.numeric(ReplicateReads$Reads)
650 ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads) 650 ReplicateReads$squared = as.numeric(ReplicateReads$Reads * ReplicateReads$Reads)
651 651
652 ReplicatePrint <- function(dat){ 652 ReplicatePrint <- function(dat){
653 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) 653 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F)
654 } 654 }
655 655
656 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) 656 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
657 lapply(ReplicateSplit, FUN=ReplicatePrint) 657 lapply(ReplicateSplit, FUN=ReplicatePrint)
658 658
659 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) 659 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])
660 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) 660 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)
661 661
662 ReplicateSumPrint <- function(dat){ 662 ReplicateSumPrint <- function(dat){
663 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) 663 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F)
664 } 664 }
665 665
666 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) 666 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
667 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) 667 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
668 668
669 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) 669 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
670 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) 670 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
671 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow 671 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
672 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) 672 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)
673 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) 673 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)
674 674
675 ClonalityScorePrint <- function(dat){ 675 ClonalityScorePrint <- function(dat){
676 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) 676 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F)
677 } 677 }
678 678
679 clonalityScore = clonalFreqCount[c("Sample", "Result")] 679 clonalityScore = clonalFreqCount[c("Sample", "Result")]
680 clonalityScore = unique(clonalityScore) 680 clonalityScore = unique(clonalityScore)
681 681
682 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) 682 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])
683 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) 683 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)
684 684
685 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] 685 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]
686 686
687 687
688 688
689 ClonalityOverviewPrint <- function(dat){ 689 ClonalityOverviewPrint <- function(dat){
690 dat = dat[order(dat[,2]),] 690 dat = dat[order(dat[,2]),]
691 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) 691 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".txt", sep=""), sep="\t",quote=F,na="-",row.names=F,col.names=F)
692 } 692 }
693 693
694 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) 694 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
696 } 696
697 } 697 }
698 698
699 bak = PRODF 699 bak = PRODF
700 bakun = UNPROD 700 bakun = UNPROD
701 701
722 } 722 }
723 } 723 }
724 724
725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] 725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,]
726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] 726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,]
727 write.table(PRODF.no.D, "productive_no_D.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T)
727 728
728 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,] 729 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,]
729 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,] 730 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,]
731 write.table(UNPROD.no.D, "unproductive_no_D.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T)
730 732
731 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } 733 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) }
732
733 print("---- table prod.with.d cdr3.length ----")
734 print(table(PRODF.with.D$CDR3.Length, useNA="ifany"))
735 print(median(PRODF.with.D$CDR3.Length, na.rm=T))
736 734
737 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, 735 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N,
738 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 736 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
739 P1=mean(.SD$P3V.nt.nb, na.rm=T), 737 P1=mean(.SD$P3V.nt.nb, na.rm=T),
740 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), 738 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
770 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), 768 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),
771 by=c("Sample")]) 769 by=c("Sample")])
772 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 770 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
773 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 771 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
774 772
775 print("---- table unprod.with.d cdr3.length ----")
776 print(table(UNPROD.with.D$CDR3.Length, useNA="ifany"))
777 print(median(UNPROD.with.D$CDR3.Length, na.rm=T))
778
779 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, 773 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N,
780 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 774 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
781 P1=mean(.SD$P3V.nt.nb, na.rm=T), 775 P1=mean(.SD$P3V.nt.nb, na.rm=T),
782 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), 776 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
783 P2=mean(.SD$P5D.nt.nb, na.rm=T), 777 P2=mean(.SD$P5D.nt.nb, na.rm=T),
817 #---------------- again for no-D 811 #---------------- again for no-D
818 812
819 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, 813 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N,
820 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 814 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
821 P1=mean(.SD$P3V.nt.nb, na.rm=T), 815 P1=mean(.SD$P3V.nt.nb, na.rm=T),
822 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 816 N1=mean(.SD$N.REGION.nt.nb, na.rm=T),
823 P2=mean(.SD$P5J.nt.nb, na.rm=T), 817 P2=mean(.SD$P5J.nt.nb, na.rm=T),
824 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 818 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
825 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 819 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
826 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 820 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T),
827 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 821 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 822 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
829 by=c("Sample")]) 823 by=c("Sample")])
830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 824 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
831 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 825 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
832 826
833 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, 827 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N,
834 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 828 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
835 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 829 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
836 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 830 N1=median(.SD$N.REGION.nt.nb, na.rm=T),
837 P2=num_median(.SD$P5J.nt.nb, na.rm=T), 831 P2=num_median(.SD$P5J.nt.nb, na.rm=T),
838 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 832 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
839 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 833 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
840 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 834 Total.N=median(.SD$N.REGION.nt.nb, na.rm=T),
841 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 835 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
842 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 836 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
843 by=c("Sample")]) 837 by=c("Sample")])
844 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 838 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
845 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 839 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
846 840
841 print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T)))
842
847 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, 843 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
848 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 844 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
849 P1=mean(.SD$P3V.nt.nb, na.rm=T), 845 P1=mean(.SD$P3V.nt.nb, na.rm=T),
850 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 846 N1=mean(.SD$N.REGION.nt.nb, na.rm=T),
851 P2=mean(.SD$P5J.nt.nb, na.rm=T), 847 P2=mean(.SD$P5J.nt.nb, na.rm=T),
852 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 848 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
853 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 849 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
854 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 850 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T),
855 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 851 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
856 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 852 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
857 by=c("Sample")]) 853 by=c("Sample")])
858 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 854 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
859 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 855 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
860 856
857 print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T)))
858
861 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, 859 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
862 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 860 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
863 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 861 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
864 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 862 N1=median(.SD$N.REGION.nt.nb, na.rm=T),
865 P2=num_median(.SD$P5J.nt.nb, na.rm=T), 863 P2=num_median(.SD$P5J.nt.nb, na.rm=T),
866 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 864 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
867 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 865 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
868 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 866 Total.N=median(.SD$N.REGION.nt.nb, na.rm=T),
869 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 867 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
870 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 868 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
871 by=c("Sample")]) 869 by=c("Sample")])
872 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 870 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
873 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 871 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
958 clonaltype = clonaltype[-which(clonaltype == "Sample")] 956 clonaltype = clonaltype[-which(clonaltype == "Sample")]
959 957
960 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":")) 958 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":"))
961 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")] 959 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")]
962 960
963 print(head(clonaltype.in.replicates))
964
965 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype)) 961 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype))
966 names(clonaltype.counts) = c("clonaltype", "coincidence") 962 names(clonaltype.counts) = c("clonaltype", "coincidence")
967 963
968 print(head(clonaltype.counts))
969
970 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,] 964 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,]
971
972 head(clonaltype.counts)
973 965
974 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,] 966 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,]
975 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype") 967 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype")
976 print(head(clonaltype.in.replicates))
977 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")] 968 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")]
978 969
979 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) 970 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T)
980 971
981 972