comparison report_clonality/RScript.r @ 18:5d11c9139a55 draft

Uploaded
author davidvanzessen
date Wed, 21 Dec 2016 11:53:03 -0500
parents da95be204ebc
children 9185c3dfc679
comparison
equal deleted inserted replaced
17:da95be204ebc 18:5d11c9139a55
157 print(sample.colors) 157 print(sample.colors)
158 158
159 159
160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive 160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive
161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) 161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T)
162 write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) 162 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T)
163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) 163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T)
164 164
165 #write the samples to a file 165 #write the samples to a file
166 sampleFile <- file("samples.txt") 166 sampleFile <- file("samples.txt")
167 un = unique(inputdata$Sample) 167 un = unique(inputdata$Sample)
295 295
296 pV = ggplot(PRODFV) 296 pV = ggplot(PRODFV)
297 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 297 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
298 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors) 298 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors)
299 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 299 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
300 write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 300 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
301 301
302 png("VPlot.png",width = 1280, height = 720) 302 png("VPlot.png",width = 1280, height = 720)
303 pV 303 pV
304 dev.off(); 304 dev.off();
305 305
306 if(useD){ 306 if(useD){
307 pD = ggplot(PRODFD) 307 pD = ggplot(PRODFD)
308 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 308 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
309 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) 309 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors)
310 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 310 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
311 write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 311 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
312 312
313 png("DPlot.png",width = 800, height = 600) 313 png("DPlot.png",width = 800, height = 600)
314 print(pD) 314 print(pD)
315 dev.off(); 315 dev.off();
316 } 316 }
317 317
318 pJ = ggplot(PRODFJ) 318 pJ = ggplot(PRODFJ)
319 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 319 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
320 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) 320 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors)
321 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 321 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
322 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 322 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
323 323
324 png("JPlot.png",width = 800, height = 600) 324 png("JPlot.png",width = 800, height = 600)
325 pJ 325 pJ
326 dev.off(); 326 dev.off();
327 327
342 scale_fill_manual(values=sample.colors) + 342 scale_fill_manual(values=sample.colors) +
343 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 343 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
344 png("VFPlot.png") 344 png("VFPlot.png")
345 VPlot 345 VPlot
346 dev.off(); 346 dev.off();
347 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 347 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
348 348
349 if(useD){ 349 if(useD){
350 DGenes = PRODF[,c("Sample", "Top.D.Gene")] 350 DGenes = PRODF[,c("Sample", "Top.D.Gene")]
351 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) 351 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene)
352 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) 352 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")])
360 scale_fill_manual(values=sample.colors) + 360 scale_fill_manual(values=sample.colors) +
361 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 361 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
362 png("DFPlot.png") 362 png("DFPlot.png")
363 print(DPlot) 363 print(DPlot)
364 dev.off(); 364 dev.off();
365 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 365 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T)
366 } 366 }
367 367
368 # ---------------------- Plotting the cdr3 length ---------------------- 368 # ---------------------- Plotting the cdr3 length ----------------------
369 369
370 print("Report Clonality - CDR3 length plot") 370 print("Report Clonality - CDR3 length plot")
577 # ---------------------- calculating the clonality score ---------------------- 577 # ---------------------- calculating the clonality score ----------------------
578 578
579 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available 579 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available
580 { 580 {
581 print("Report Clonality - Clonality") 581 print("Report Clonality - Clonality")
582 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) 582 write.table(clonalityFrame, "clonalityComplete.txt", sep="\t",quote=F,row.names=F,col.names=T)
583 if(clonality_method == "boyd"){ 583 if(clonality_method == "boyd"){
584 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) 584 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)
585 585
586 for (sample in samples){ 586 for (sample in samples){
587 res = data.frame(paste=character(0)) 587 res = data.frame(paste=character(0))
869 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) 869 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank())
870 870
871 png("AAComposition.png",width = 1280, height = 720) 871 png("AAComposition.png",width = 1280, height = 720)
872 AAfreqplot 872 AAfreqplot
873 dev.off() 873 dev.off()
874 write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) 874 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T)
875 875
876 # ---------------------- AA median CDR3 length ---------------------- 876 # ---------------------- AA median CDR3 length ----------------------
877 877
878 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) 878 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")])
879 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 879 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)