diff report_clonality/RScript.r @ 13:d3ebaa2d2fe0 draft

Uploaded
author davidvanzessen
date Tue, 20 Dec 2016 06:02:44 -0500
parents efa1f5a17b6e
children 15961ca8d9ce
line wrap: on
line diff
--- a/report_clonality/RScript.r	Mon Dec 19 10:03:41 2016 -0500
+++ b/report_clonality/RScript.r	Tue Dec 20 06:02:44 2016 -0500
@@ -47,7 +47,7 @@
 
 print("Report Clonality - Data preperation")
 
-inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="")
+inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F)
 
 print(paste("nrows: ", nrow(inputdata)))
 
@@ -92,6 +92,12 @@
   }
 }
 
+for(i in 1:nrow(UNPROD)){
+    if(!is.numeric(UNPROD[i,"CDR3.Length"])){
+        UNPROD[i,"CDR3.Length"] = 0
+    }
+}
+
 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")])
 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")])
 
@@ -591,7 +597,7 @@
       res[is.na(res)] = 0      
       infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))
       
-      print(infer.result)
+      #print(infer.result)
       
       write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
       
@@ -734,7 +740,7 @@
                                                Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
                                                Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
-                                               Median.CDR3.l=median(.SD$CDR3.Length)),
+                                               Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -753,7 +759,7 @@
 											   Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
 											   Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
 											   Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
-											   Median.CDR3.l=median(.SD$CDR3.Length)),
+											   Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
                                          by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -772,7 +778,7 @@
                                                 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
                                                 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
-                                                Median.CDR3.l=median(.SD$CDR3.Length)),
+                                                Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
                                           by=c("Sample")])
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
   write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
@@ -791,7 +797,7 @@
                                                 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
                                                 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
                                                 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
-                                                Median.CDR3.l=median(.SD$CDR3.Length)),
+                                                Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
 															by=c("Sample")])
 															
   newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)