diff report_clonality/RScript.r @ 3:beaa487ecf43 draft

Uploaded
author davidvanzessen
date Wed, 07 Dec 2016 08:36:23 -0500
parents 90a05ff900db
children
line wrap: on
line diff
--- a/report_clonality/RScript.r	Mon Sep 05 11:15:22 2016 -0400
+++ b/report_clonality/RScript.r	Wed Dec 07 08:36:23 2016 -0500
@@ -10,7 +10,7 @@
 library(ggplot2)
 if (!("plyr" %in% rownames(installed.packages()))) {
   install.packages("plyr", repos="http://cran.xl-mirror.nl/") 
-}			
+}
 library(plyr)
 
 if (!("data.table" %in% rownames(installed.packages()))) {
@@ -717,7 +717,8 @@
                                                DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
                                                Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
                                                Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
-                                               Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
+                                               Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+                                               Median.CDR3.l=median(.SD$CDR3.Length.DNA)),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -735,7 +736,8 @@
                                                DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
 											   Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
 											   Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
-											   Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
+											   Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+											   Median.CDR3.l=median(.SD$CDR3.Length.DNA)),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -753,7 +755,8 @@
                                                 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
                                                 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
                                                 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
-                                                Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
+                                                Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+                                                Median.CDR3.l=median(.SD$CDR3.Length.DNA)),
                                           by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -771,7 +774,8 @@
                                                 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
                                                 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
                                                 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
-                                                Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T))),
+                                                Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
+                                                Median.CDR3.l=median(.SD$CDR3.Length.DNA)),
 															by=c("Sample")])
 															
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)