comparison report_clonality/RScript.r @ 25:94765af0db1f draft

Uploaded
author davidvanzessen
date Thu, 09 Feb 2017 07:20:09 -0500
parents d5d203d38c8a
children 28fbbdfd7a87
comparison
equal deleted inserted replaced
24:d5d203d38c8a 25:94765af0db1f
156 156
157 157
158 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive 158 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive
159 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) 159 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T)
160 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) 160 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T)
161 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) 161 write.table(UNPROD, "allUnproductive.txt", sep="\t",quote=F,row.names=F,col.names=T)
162 162
163 print("SAMPLE TABLE:") 163 print("SAMPLE TABLE:")
164 print(table(PRODF$Sample)) 164 print(table(PRODF$Sample))
165 165
166 #write the samples to a file 166 #write the samples to a file
695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) 695 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
696 } 696 }
697 } 697 }
698 698
699 bak = PRODF 699 bak = PRODF
700 bakun = UNPROD
700 701
701 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") 702 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
702 if(all(imgtcolumns %in% colnames(inputdata))) 703 if(all(imgtcolumns %in% colnames(inputdata)))
703 { 704 {
704 print("found IMGT columns, running junction analysis") 705 print("found IMGT columns, running junction analysis")
722 } 723 }
723 724
724 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,] 725 PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,]
725 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,] 726 PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,]
726 727
728 UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,]
729 UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,]
730
727 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } 731 num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) }
728 732
733 print("---- table prod.with.d cdr3.length ----")
734 print(table(PRODF.with.D$CDR3.Length, useNA="ifany"))
735 print(median(PRODF.with.D$CDR3.Length, na.rm=T))
736
729 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, 737 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N,
730 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 738 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
731 P1=mean(.SD$P3V.nt.nb, na.rm=T), 739 P1=mean(.SD$P3V.nt.nb, na.rm=T),
732 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), 740 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
733 P2=mean(.SD$P5D.nt.nb, na.rm=T), 741 P2=mean(.SD$P5D.nt.nb, na.rm=T),
738 P4=mean(.SD$P5J.nt.nb, na.rm=T), 746 P4=mean(.SD$P5J.nt.nb, na.rm=T),
739 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 747 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
740 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 748 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
741 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 749 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
742 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 750 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
743 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 751 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),
744 by=c("Sample")]) 752 by=c("Sample")])
745 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 753 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
746 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 754 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
747 755
748 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, 756 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N,
757 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 765 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
758 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 766 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
759 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 767 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
760 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 768 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
761 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 769 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
762 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 770 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),
763 by=c("Sample")]) 771 by=c("Sample")])
764 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 772 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
765 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 773 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
766 774
767 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, 775 print("---- table unprod.with.d cdr3.length ----")
776 print(table(UNPROD.with.D$CDR3.Length, useNA="ifany"))
777 print(median(UNPROD.with.D$CDR3.Length, na.rm=T))
778
779 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N,
768 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 780 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
769 P1=mean(.SD$P3V.nt.nb, na.rm=T), 781 P1=mean(.SD$P3V.nt.nb, na.rm=T),
770 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), 782 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
771 P2=mean(.SD$P5D.nt.nb, na.rm=T), 783 P2=mean(.SD$P5D.nt.nb, na.rm=T),
772 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), 784 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
776 P4=mean(.SD$P5J.nt.nb, na.rm=T), 788 P4=mean(.SD$P5J.nt.nb, na.rm=T),
777 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 789 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
778 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 790 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
779 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 791 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
780 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 792 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
781 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 793 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
782 by=c("Sample")]) 794 by=c("Sample")])
783 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 795 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
784 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 796 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
785 797
786 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, 798 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N,
787 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 799 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
788 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 800 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
789 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)), 801 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb"), with=F], na.rm=T)),
790 P2=num_median(.SD$P5D.nt.nb, na.rm=T), 802 P2=num_median(.SD$P5D.nt.nb, na.rm=T),
791 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), 803 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T),
795 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 807 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
796 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 808 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
797 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 809 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
798 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 810 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
799 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 811 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
800 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 812 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
801 by=c("Sample")]) 813 by=c("Sample")])
802 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 814 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
803 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 815 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
804 816
805 #---------------- again for no-D 817 #---------------- again for no-D
811 P2=mean(.SD$P5J.nt.nb, na.rm=T), 823 P2=mean(.SD$P5J.nt.nb, na.rm=T),
812 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 824 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
813 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 825 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
814 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 826 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),
815 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 827 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
816 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
817 by=c("Sample")]) 829 by=c("Sample")])
818 print(newData)
819 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
820 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 831 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
821 832
822 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, 833 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N,
823 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 834 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
826 P2=num_median(.SD$P5J.nt.nb, na.rm=T), 837 P2=num_median(.SD$P5J.nt.nb, na.rm=T),
827 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 838 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
828 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 839 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
829 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 840 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),
830 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 841 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
831 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 842 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
832 by=c("Sample")]) 843 by=c("Sample")])
833 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 844 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
834 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 845 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
835 846
836 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, 847 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
837 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 848 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
838 P1=mean(.SD$P3V.nt.nb, na.rm=T), 849 P1=mean(.SD$P3V.nt.nb, na.rm=T),
839 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 850 N1=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),
840 P2=mean(.SD$P5J.nt.nb, na.rm=T), 851 P2=mean(.SD$P5J.nt.nb, na.rm=T),
841 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 852 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
842 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 853 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
843 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 854 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),
844 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 855 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
845 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 856 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
846 by=c("Sample")]) 857 by=c("Sample")])
847 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 858 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
848 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 859 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
849 860
850 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, 861 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
851 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 862 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
852 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 863 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
853 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 864 N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),
854 P2=num_median(.SD$P5J.nt.nb, na.rm=T), 865 P2=num_median(.SD$P5J.nt.nb, na.rm=T),
855 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 866 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
856 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 867 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
857 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)), 868 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),
858 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 869 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
859 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), 870 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
860 by=c("Sample")]) 871 by=c("Sample")])
861 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 872 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
862 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 873 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
863 } 874 }
864 875
865 PRODF = bak 876 PRODF = bak
877 UNPROD = bakun
866 878
867 879
868 # ---------------------- D reading frame ---------------------- 880 # ---------------------- D reading frame ----------------------
869 881
870 D.REGION.reading.frame = PRODF[,c("Sample", "D.REGION.reading.frame")] 882 D.REGION.reading.frame = PRODF[,c("Sample", "D.REGION.reading.frame")]
937 # ---------------------- AA median CDR3 length ---------------------- 949 # ---------------------- AA median CDR3 length ----------------------
938 950
939 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")]) 951 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")])
940 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 952 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
941 953
954
955 #generate the "Sequences that are present in more than one replicate" dataset
956 clonaltype.in.replicates = inputdata
957 clonaltype = unlist(strsplit(clonaltype, ","))
958 clonaltype = clonaltype[-which(clonaltype == "Sample")]
959
960 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":"))
961 clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")]
962
963 print(head(clonaltype.in.replicates))
964
965 clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype))
966 names(clonaltype.counts) = c("clonaltype", "coincidence")
967
968 print(head(clonaltype.counts))
969
970 clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,]
971
972 head(clonaltype.counts)
973
974 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,]
975 clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype")
976 print(head(clonaltype.in.replicates))
977 clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")]
978
979 write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T)
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004