Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 38:b6936fb52ab9 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 19 Apr 2017 10:21:01 -0400 |
parents | f37e072affc0 |
children | 106275b54470 |
comparison
equal
deleted
inserted
replaced
37:f37e072affc0 | 38:b6936fb52ab9 |
---|---|
766 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 766 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
767 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 767 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
768 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 768 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
769 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 769 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
770 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 770 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
771 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), | 771 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
772 by=c("Sample")]) | 772 by=c("Sample")]) |
773 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 773 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
774 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 774 write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
775 | 775 |
776 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, | 776 newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, |
785 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 785 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
786 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 786 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
787 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 787 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
788 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 788 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
789 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 789 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
790 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))), | 790 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
791 by=c("Sample")]) | 791 by=c("Sample")]) |
792 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 792 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
793 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 793 write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
794 | 794 |
795 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | 795 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
804 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 804 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
805 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 805 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
806 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 806 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
807 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 807 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
808 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 808 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
809 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 809 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
810 by=c("Sample")]) | 810 by=c("Sample")]) |
811 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 811 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
812 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 812 write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
813 | 813 |
814 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, | 814 newData = data.frame(data.table(UNPROD.with.D)[,list(unique=.N, |
823 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 823 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
824 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 824 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
825 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 825 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
826 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 826 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
827 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 827 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
828 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 828 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
829 by=c("Sample")]) | 829 by=c("Sample")]) |
830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 830 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
831 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 831 write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
832 | 832 |
833 #---------------- again for no-D | 833 #---------------- again for no-D |
839 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 839 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
840 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 840 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
841 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 841 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
842 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), | 842 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
843 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 843 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
844 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 844 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
845 by=c("Sample")]) | 845 by=c("Sample")]) |
846 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 846 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
847 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 847 write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
848 | 848 |
849 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, | 849 newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, |
853 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 853 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
854 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 854 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
855 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 855 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
856 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), | 856 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), |
857 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 857 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
858 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 858 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
859 by=c("Sample")]) | 859 by=c("Sample")]) |
860 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 860 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
861 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 861 write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
862 | |
863 print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
864 | 862 |
865 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 863 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
866 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 864 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
867 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 865 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
868 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), | 866 N1=mean(.SD$N.REGION.nt.nb, na.rm=T), |
869 P2=mean(.SD$P5J.nt.nb, na.rm=T), | 867 P2=mean(.SD$P5J.nt.nb, na.rm=T), |
870 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 868 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
871 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 869 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
872 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), | 870 Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T), |
873 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 871 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
874 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 872 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
875 by=c("Sample")]) | 873 by=c("Sample")]) |
876 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 874 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
877 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 875 write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
878 | 876 |
879 print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T))) | |
880 | 877 |
881 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, | 878 newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, |
882 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 879 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
883 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 880 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
884 N1=num_median(.SD$N.REGION.nt.nb, na.rm=T), | 881 N1=num_median(.SD$N.REGION.nt.nb, na.rm=T), |
885 P2=num_median(.SD$P5J.nt.nb, na.rm=T), | 882 P2=num_median(.SD$P5J.nt.nb, na.rm=T), |
886 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 883 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
887 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 884 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
888 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), | 885 Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T), |
889 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 886 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
890 Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))), | 887 Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))), |
891 by=c("Sample")]) | 888 by=c("Sample")]) |
892 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 889 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
893 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 890 write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
894 } | 891 } |
895 | 892 |
924 png("DReadingFrame.png") | 921 png("DReadingFrame.png") |
925 D.REGION.reading.frame | 922 D.REGION.reading.frame |
926 dev.off() | 923 dev.off() |
927 | 924 |
928 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) | 925 ggsave("DReadingFrame.pdf", D.REGION.reading.frame) |
929 | |
930 | 926 |
931 # ---------------------- AA composition in CDR3 ---------------------- | 927 # ---------------------- AA composition in CDR3 ---------------------- |
932 | 928 |
933 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] | 929 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] |
934 | 930 |
975 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) | 971 write.table(median.aa.l, "AAMedianBySample.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F) |
976 | 972 |
977 | 973 |
978 #generate the "Sequences that are present in more than one replicate" dataset | 974 #generate the "Sequences that are present in more than one replicate" dataset |
979 clonaltype.in.replicates = inputdata | 975 clonaltype.in.replicates = inputdata |
976 clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),] | |
980 clonaltype.in.replicates = na.omit(clonaltype.in.replicates) | 977 clonaltype.in.replicates = na.omit(clonaltype.in.replicates) |
981 clonaltype = unlist(strsplit(clonaltype, ",")) | 978 clonaltype = unlist(strsplit(clonaltype, ",")) |
982 | 979 |
983 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[c(clonaltype, "Replicate")], sep = ":")) | 980 clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[c(clonaltype, "Replicate")], sep = ":")) |
984 | 981 |
1021 | 1018 |
1022 | 1019 |
1023 | 1020 |
1024 | 1021 |
1025 | 1022 |
1026 | |
1027 |