Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 13:d3ebaa2d2fe0 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 20 Dec 2016 06:02:44 -0500 |
parents | efa1f5a17b6e |
children | 15961ca8d9ce |
comparison
equal
deleted
inserted
replaced
12:5f5d29c5e711 | 13:d3ebaa2d2fe0 |
---|---|
45 | 45 |
46 # ---------------------- Data preperation ---------------------- | 46 # ---------------------- Data preperation ---------------------- |
47 | 47 |
48 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") |
49 | 49 |
50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") | 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F) |
51 | 51 |
52 print(paste("nrows: ", nrow(inputdata))) | 52 print(paste("nrows: ", nrow(inputdata))) |
53 | 53 |
54 setwd(outdir) | 54 setwd(outdir) |
55 | 55 |
88 UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ] | 88 UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ] |
89 } else { | 89 } else { |
90 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] | 90 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] |
91 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] | 91 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] |
92 } | 92 } |
93 } | |
94 | |
95 for(i in 1:nrow(UNPROD)){ | |
96 if(!is.numeric(UNPROD[i,"CDR3.Length"])){ | |
97 UNPROD[i,"CDR3.Length"] = 0 | |
98 } | |
93 } | 99 } |
94 | 100 |
95 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")]) | 101 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")]) |
96 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")]) | 102 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")]) |
97 | 103 |
589 } | 595 } |
590 | 596 |
591 res[is.na(res)] = 0 | 597 res[is.na(res)] = 0 |
592 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) | 598 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) |
593 | 599 |
594 print(infer.result) | 600 #print(infer.result) |
595 | 601 |
596 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) | 602 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) |
597 | 603 |
598 res$type = rowSums(res[,2:ncol(res)]) | 604 res$type = rowSums(res[,2:ncol(res)]) |
599 | 605 |
732 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 738 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
733 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 739 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
734 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 740 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
735 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 741 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
736 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 742 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
737 Median.CDR3.l=median(.SD$CDR3.Length)), | 743 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), |
738 by=c("Sample")]) | 744 by=c("Sample")]) |
739 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 745 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
740 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 746 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
741 | 747 |
742 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 748 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
751 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 757 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
752 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 758 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
753 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 759 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
754 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 760 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
755 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 761 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
756 Median.CDR3.l=median(.SD$CDR3.Length)), | 762 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), |
757 by=c("Sample")]) | 763 by=c("Sample")]) |
758 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 764 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
759 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 765 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
760 | 766 |
761 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 767 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
770 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 776 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
771 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 777 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
772 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 778 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
773 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 779 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
774 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 780 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
775 Median.CDR3.l=median(.SD$CDR3.Length)), | 781 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), |
776 by=c("Sample")]) | 782 by=c("Sample")]) |
777 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 783 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
778 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 784 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
779 | 785 |
780 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 786 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
789 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 795 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
790 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 796 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
791 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 797 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), |
792 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 798 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), |
793 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 799 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), |
794 Median.CDR3.l=median(.SD$CDR3.Length)), | 800 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), |
795 by=c("Sample")]) | 801 by=c("Sample")]) |
796 | 802 |
797 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 803 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
798 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 804 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
799 } | 805 } |