comparison report_clonality/RScript.r @ 13:d3ebaa2d2fe0 draft

Uploaded
author davidvanzessen
date Tue, 20 Dec 2016 06:02:44 -0500
parents efa1f5a17b6e
children 15961ca8d9ce
comparison
equal deleted inserted replaced
12:5f5d29c5e711 13:d3ebaa2d2fe0
45 45
46 # ---------------------- Data preperation ---------------------- 46 # ---------------------- Data preperation ----------------------
47 47
48 print("Report Clonality - Data preperation") 48 print("Report Clonality - Data preperation")
49 49
50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F)
51 51
52 print(paste("nrows: ", nrow(inputdata))) 52 print(paste("nrows: ", nrow(inputdata)))
53 53
54 setwd(outdir) 54 setwd(outdir)
55 55
88 UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ] 88 UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ]
89 } else { 89 } else {
90 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] 90 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ]
91 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] 91 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ]
92 } 92 }
93 }
94
95 for(i in 1:nrow(UNPROD)){
96 if(!is.numeric(UNPROD[i,"CDR3.Length"])){
97 UNPROD[i,"CDR3.Length"] = 0
98 }
93 } 99 }
94 100
95 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")]) 101 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")])
96 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")]) 102 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")])
97 103
589 } 595 }
590 596
591 res[is.na(res)] = 0 597 res[is.na(res)] = 0
592 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) 598 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))
593 599
594 print(infer.result) 600 #print(infer.result)
595 601
596 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) 602 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
597 603
598 res$type = rowSums(res[,2:ncol(res)]) 604 res$type = rowSums(res[,2:ncol(res)])
599 605
732 P4=mean(.SD$P5J.nt.nb, na.rm=T), 738 P4=mean(.SD$P5J.nt.nb, na.rm=T),
733 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 739 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
734 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 740 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
735 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 741 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
736 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 742 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
737 Median.CDR3.l=median(.SD$CDR3.Length)), 743 Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
738 by=c("Sample")]) 744 by=c("Sample")])
739 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 745 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
740 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 746 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
741 747
742 newData = data.frame(data.table(PRODF)[,list(unique=.N, 748 newData = data.frame(data.table(PRODF)[,list(unique=.N,
751 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 757 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
752 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 758 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
753 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 759 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
754 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 760 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
755 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 761 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
756 Median.CDR3.l=median(.SD$CDR3.Length)), 762 Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
757 by=c("Sample")]) 763 by=c("Sample")])
758 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 764 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
759 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 765 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
760 766
761 newData = data.frame(data.table(UNPROD)[,list(unique=.N, 767 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
770 P4=mean(.SD$P5J.nt.nb, na.rm=T), 776 P4=mean(.SD$P5J.nt.nb, na.rm=T),
771 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 777 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
772 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 778 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
773 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 779 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
774 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 780 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
775 Median.CDR3.l=median(.SD$CDR3.Length)), 781 Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
776 by=c("Sample")]) 782 by=c("Sample")])
777 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 783 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
778 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 784 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
779 785
780 newData = data.frame(data.table(UNPROD)[,list(unique=.N, 786 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
789 P4=num_median(.SD$P5J.nt.nb, na.rm=T), 795 P4=num_median(.SD$P5J.nt.nb, na.rm=T),
790 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), 796 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T),
791 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), 797 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
792 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), 798 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
793 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), 799 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
794 Median.CDR3.l=median(.SD$CDR3.Length)), 800 Median.CDR3.l=as.double(median(.SD$CDR3.Length))),
795 by=c("Sample")]) 801 by=c("Sample")])
796 802
797 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) 803 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
798 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 804 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
799 } 805 }