Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 13:d3ebaa2d2fe0 draft
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Tue, 20 Dec 2016 06:02:44 -0500 | 
| parents | efa1f5a17b6e | 
| children | 15961ca8d9ce | 
   comparison
  equal
  deleted
  inserted
  replaced
| 12:5f5d29c5e711 | 13:d3ebaa2d2fe0 | 
|---|---|
| 45 | 45 | 
| 46 # ---------------------- Data preperation ---------------------- | 46 # ---------------------- Data preperation ---------------------- | 
| 47 | 47 | 
| 48 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") | 
| 49 | 49 | 
| 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") | 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="", stringsAsFactors=F) | 
| 51 | 51 | 
| 52 print(paste("nrows: ", nrow(inputdata))) | 52 print(paste("nrows: ", nrow(inputdata))) | 
| 53 | 53 | 
| 54 setwd(outdir) | 54 setwd(outdir) | 
| 55 | 55 | 
| 88 UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ] | 88 UNPROD = inputdata[inputdata$Functionality %in% c("unproductive (see comment)","unproductive"), ] | 
| 89 } else { | 89 } else { | 
| 90 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] | 90 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] | 
| 91 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] | 91 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] | 
| 92 } | 92 } | 
| 93 } | |
| 94 | |
| 95 for(i in 1:nrow(UNPROD)){ | |
| 96 if(!is.numeric(UNPROD[i,"CDR3.Length"])){ | |
| 97 UNPROD[i,"CDR3.Length"] = 0 | |
| 98 } | |
| 93 } | 99 } | 
| 94 | 100 | 
| 95 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")]) | 101 prod.sample.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample")]) | 
| 96 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")]) | 102 prod.rep.count = data.frame(data.table(PRODF)[, list(Productive=.N), by=c("Sample", "Replicate")]) | 
| 97 | 103 | 
| 589 } | 595 } | 
| 590 | 596 | 
| 591 res[is.na(res)] = 0 | 597 res[is.na(res)] = 0 | 
| 592 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) | 598 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) | 
| 593 | 599 | 
| 594 print(infer.result) | 600 #print(infer.result) | 
| 595 | 601 | 
| 596 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) | 602 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) | 
| 597 | 603 | 
| 598 res$type = rowSums(res[,2:ncol(res)]) | 604 res$type = rowSums(res[,2:ncol(res)]) | 
| 599 | 605 | 
| 732 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 738 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 
| 733 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 739 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 
| 734 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 740 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 
| 735 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 741 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 736 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 742 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 737 Median.CDR3.l=median(.SD$CDR3.Length)), | 743 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 738 by=c("Sample")]) | 744 by=c("Sample")]) | 
| 739 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 745 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 740 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 746 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 
| 741 | 747 | 
| 742 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 748 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 
| 751 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 757 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 
| 752 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 758 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 
| 753 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 759 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 
| 754 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 760 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 755 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 761 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 756 Median.CDR3.l=median(.SD$CDR3.Length)), | 762 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 757 by=c("Sample")]) | 763 by=c("Sample")]) | 
| 758 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 764 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 759 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 765 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 
| 760 | 766 | 
| 761 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 767 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 
| 770 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 776 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 
| 771 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 777 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 
| 772 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 778 Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 
| 773 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 779 Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 774 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 780 Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 775 Median.CDR3.l=median(.SD$CDR3.Length)), | 781 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 776 by=c("Sample")]) | 782 by=c("Sample")]) | 
| 777 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 783 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 778 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 784 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 
| 779 | 785 | 
| 780 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 786 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 
| 789 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 795 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 
| 790 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 796 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 
| 791 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 797 Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), | 
| 792 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 798 Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), | 
| 793 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 799 Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), | 
| 794 Median.CDR3.l=median(.SD$CDR3.Length)), | 800 Median.CDR3.l=as.double(median(.SD$CDR3.Length))), | 
| 795 by=c("Sample")]) | 801 by=c("Sample")]) | 
| 796 | 802 | 
| 797 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 803 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 
| 798 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 804 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 
| 799 } | 805 } | 
