# HG changeset patch # User davidvanzessen # Date 1482157381 18000 # Node ID efa1f5a17b6e31f135399456efae838edb66a7e5 # Parent 8cbc1a8d27ae6f582a8865e91c8a0f6c0f3b71c5 Uploaded diff -r 8cbc1a8d27ae -r efa1f5a17b6e report_clonality/RScript.r --- a/report_clonality/RScript.r Mon Dec 19 08:50:31 2016 -0500 +++ b/report_clonality/RScript.r Mon Dec 19 09:23:01 2016 -0500 @@ -363,12 +363,12 @@ print("Report Clonality - CDR3 length plot") -CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length.DNA")]) +CDR3Length = data.frame(data.table(PRODF)[, list(Count=.N), by=c("Sample", "CDR3.Length")]) TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total CDR3LengthPlot = ggplot(CDR3Length) -CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + +CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle("Length distribution of CDR3") + xlab("CDR3 Length") + ylab("Percentage of sequences") + @@ -400,7 +400,8 @@ scale_fill_gradient(low="gold", high="blue", na.value="white") + ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + xlab("D genes") + - ylab("V Genes") + ylab("V Genes") + + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major = element_line(colour = "gainsboro")) png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) print(img) @@ -449,7 +450,8 @@ scale_fill_gradient(low="gold", high="blue", na.value="white") + ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + xlab("J genes") + - ylab("V Genes") + ylab("V Genes") + + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major = element_line(colour = "gainsboro")) png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) print(img) @@ -497,7 +499,8 @@ scale_fill_gradient(low="gold", high="blue", na.value="white") + ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + xlab("J genes") + - ylab("D Genes") + ylab("D Genes") + + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major = element_line(colour = "gainsboro")) png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) print(img) @@ -731,7 +734,7 @@ Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), + Median.CDR3.l=median(.SD$CDR3.Length)), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) @@ -750,7 +753,7 @@ Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), + Median.CDR3.l=median(.SD$CDR3.Length)), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) @@ -769,7 +772,7 @@ Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), + Median.CDR3.l=median(.SD$CDR3.Length)), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) @@ -788,7 +791,7 @@ Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)), Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)), Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)), - Median.CDR3.l=median(.SD$CDR3.Length.DNA)), + Median.CDR3.l=median(.SD$CDR3.Length)), by=c("Sample")]) newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) @@ -863,6 +866,6 @@ # ---------------------- AA median CDR3 length ---------------------- -median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length.DNA))), by=c("Sample")]) +median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)