comparison report_clonality/RScript.r @ 38:b6936fb52ab9 draft

Uploaded
author davidvanzessen
date Wed, 19 Apr 2017 10:21:01 -0400
parents f37e072affc0
children 106275b54470
comparison
equal deleted inserted replaced
37:f37e072affc0 38:b6936fb52ab9
766 P4=mean(.SD$P5J.nt.nb, na.rm=T), 766 P4=mean(.SD$P5J.nt.nb, na.rm=T),
767 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 767 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
768 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 768 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
769 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 769 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
770 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 770 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
771 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), 771 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
772 by=c("Sample")]) 772 by=c("Sample")])
773 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 773 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
774 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 774 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
775 775
776 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, 776 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N,
785 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 785 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
786 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 786 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
787 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 787 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
788 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 788 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
789 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 789 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
790 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), 790 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
791 by=c("Sample")]) 791 by=c("Sample")])
792 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 792 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
793 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 793 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
794 794
795 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, 795 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N,
804 P4=mean(.SD$P5J.nt.nb, na.rm=T), 804 P4=mean(.SD$P5J.nt.nb, na.rm=T),
805 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 805 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
806 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 806 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
807 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 807 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
808 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 808 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
809 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 809 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
810 by=c("Sample")]) 810 by=c("Sample")])
811 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 811 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
812 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 812 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
813 813
814 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, 814 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N,
823 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 823 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
824 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 824 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
825 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 825 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
826 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 826 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
827 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 827 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 828 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
829 by=c("Sample")]) 829 by=c("Sample")])
830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
831 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 831 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
832 832
833 #---------------- again for no-D 833 #---------------- again for no-D
839 P2=mean(.SD$P5J.nt.nb, na.rm=T), 839 P2=mean(.SD$P5J.nt.nb, na.rm=T),
840 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 840 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
841 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 841 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
842 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), 842 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T),
843 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 843 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
844 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 844 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
845 by=c("Sample")]) 845 by=c("Sample")])
846 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 846 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
847 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 847 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
848 848
849 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, 849 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N,
853 P2=num_median(.SD$P5J.nt.nb, na.rm=T), 853 P2=num_median(.SD$P5J.nt.nb, na.rm=T),
854 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 854 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
855 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 855 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
856 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), 856 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T),
857 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 857 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
858 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 858 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
859 by=c("Sample")]) 859 by=c("Sample")])
860 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 860 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
861 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 861 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
862
863 print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T)))
864 862
865 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, 863 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
866 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 864 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
867 P1=mean(.SD$P3V.nt.nb, na.rm=T), 865 P1=mean(.SD$P3V.nt.nb, na.rm=T),
868 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), 866 N1=mean(.SD$N.REGION.nt.nb, na.rm=T),
869 P2=mean(.SD$P5J.nt.nb, na.rm=T), 867 P2=mean(.SD$P5J.nt.nb, na.rm=T),
870 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 868 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
871 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 869 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
872 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), 870 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T),
873 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 871 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
874 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 872 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
875 by=c("Sample")]) 873 by=c("Sample")])
876 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 874 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
877 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 875 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
878 876
879 print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T)))
880 877
881 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, 878 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
882 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), 879 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
883 P1=num_median(.SD$P3V.nt.nb, na.rm=T), 880 P1=num_median(.SD$P3V.nt.nb, na.rm=T),
884 N1=num_median(.SD$N.REGION.nt.nb, na.rm=T), 881 N1=num_median(.SD$N.REGION.nt.nb, na.rm=T),
885 P2=num_median(.SD$P5J.nt.nb, na.rm=T), 882 P2=num_median(.SD$P5J.nt.nb, na.rm=T),
886 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 883 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
887 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 884 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
888 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), 885 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T),
889 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 886 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
890 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), 887 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
891 by=c("Sample")]) 888 by=c("Sample")])
892 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 889 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
893 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 890 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
894 } 891 }
895 892
924 png("DReadingFrame.png") 921 png("DReadingFrame.png")
925 D.REGION.reading.frame 922 D.REGION.reading.frame
926 dev.off() 923 dev.off()
927 924
928 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) 925 ggsave("DReadingFrame.pdf", D.REGION.reading.frame)
929
930 926
931 # ---------------------- AA composition in CDR3 ---------------------- 927 # ---------------------- AA composition in CDR3 ----------------------
932 928
933 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] 929 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
934 930
975 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) 971 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
976 972
977 973
978 #generate the "Sequences that are present in more than one replicate" dataset 974 #generate the "Sequences that are present in more than one replicate" dataset
979 clonaltype.in.replicates = inputdata 975 clonaltype.in.replicates = inputdata
976 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),]
980 clonaltype.in.replicates = na.omit(clonaltype.in.replicates) 977 clonaltype.in.replicates = na.omit(clonaltype.in.replicates)
981 clonaltype = unlist(strsplit(clonaltype, ",")) 978 clonaltype = unlist(strsplit(clonaltype, ","))
982 979
983 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[c(clonaltype, "Replicate")], sep = ":")) 980 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[c(clonaltype, "Replicate")], sep = ":"))
984 981
1021 1018
1022 1019
1023 1020
1024 1021
1025 1022
1026
1027