Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 18:5d11c9139a55 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 21 Dec 2016 11:53:03 -0500 |
parents | da95be204ebc |
children | 9185c3dfc679 |
comparison
equal
deleted
inserted
replaced
17:da95be204ebc | 18:5d11c9139a55 |
---|---|
157 print(sample.colors) | 157 print(sample.colors) |
158 | 158 |
159 | 159 |
160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive | 160 #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive |
161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) | 161 write.table(PRODF, "allUnique.txt", sep="\t",quote=F,row.names=F,col.names=T) |
162 write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) | 162 #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) |
163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) | 163 write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T) |
164 | 164 |
165 #write the samples to a file | 165 #write the samples to a file |
166 sampleFile <- file("samples.txt") | 166 sampleFile <- file("samples.txt") |
167 un = unique(inputdata$Sample) | 167 un = unique(inputdata$Sample) |
295 | 295 |
296 pV = ggplot(PRODFV) | 296 pV = ggplot(PRODFV) |
297 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 297 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
298 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors) | 298 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors) |
299 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 299 pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
300 write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 300 write.table(x=PRODFV, file="VFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
301 | 301 |
302 png("VPlot.png",width = 1280, height = 720) | 302 png("VPlot.png",width = 1280, height = 720) |
303 pV | 303 pV |
304 dev.off(); | 304 dev.off(); |
305 | 305 |
306 if(useD){ | 306 if(useD){ |
307 pD = ggplot(PRODFD) | 307 pD = ggplot(PRODFD) |
308 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 308 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
309 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) | 309 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) |
310 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 310 pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
311 write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 311 write.table(x=PRODFD, file="DFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
312 | 312 |
313 png("DPlot.png",width = 800, height = 600) | 313 png("DPlot.png",width = 800, height = 600) |
314 print(pD) | 314 print(pD) |
315 dev.off(); | 315 dev.off(); |
316 } | 316 } |
317 | 317 |
318 pJ = ggplot(PRODFJ) | 318 pJ = ggplot(PRODFJ) |
319 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 319 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
320 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) | 320 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) |
321 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 321 pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
322 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 322 write.table(x=PRODFJ, file="JFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
323 | 323 |
324 png("JPlot.png",width = 800, height = 600) | 324 png("JPlot.png",width = 800, height = 600) |
325 pJ | 325 pJ |
326 dev.off(); | 326 dev.off(); |
327 | 327 |
342 scale_fill_manual(values=sample.colors) + | 342 scale_fill_manual(values=sample.colors) + |
343 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 343 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
344 png("VFPlot.png") | 344 png("VFPlot.png") |
345 VPlot | 345 VPlot |
346 dev.off(); | 346 dev.off(); |
347 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 347 write.table(x=VGenes, file="VFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
348 | 348 |
349 if(useD){ | 349 if(useD){ |
350 DGenes = PRODF[,c("Sample", "Top.D.Gene")] | 350 DGenes = PRODF[,c("Sample", "Top.D.Gene")] |
351 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) | 351 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) |
352 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) | 352 DGenes = data.frame(data.table(DGenes)[, list(Count=.N), by=c("Sample", "Top.D.Gene")]) |
360 scale_fill_manual(values=sample.colors) + | 360 scale_fill_manual(values=sample.colors) + |
361 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 361 theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
362 png("DFPlot.png") | 362 png("DFPlot.png") |
363 print(DPlot) | 363 print(DPlot) |
364 dev.off(); | 364 dev.off(); |
365 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 365 write.table(x=DGenes, file="DFFrequency.txt", sep="\t",quote=F,row.names=F,col.names=T) |
366 } | 366 } |
367 | 367 |
368 # ---------------------- Plotting the cdr3 length ---------------------- | 368 # ---------------------- Plotting the cdr3 length ---------------------- |
369 | 369 |
370 print("Report Clonality - CDR3 length plot") | 370 print("Report Clonality - CDR3 length plot") |
577 # ---------------------- calculating the clonality score ---------------------- | 577 # ---------------------- calculating the clonality score ---------------------- |
578 | 578 |
579 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available | 579 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available |
580 { | 580 { |
581 print("Report Clonality - Clonality") | 581 print("Report Clonality - Clonality") |
582 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) | 582 write.table(clonalityFrame, "clonalityComplete.txt", sep="\t",quote=F,row.names=F,col.names=T) |
583 if(clonality_method == "boyd"){ | 583 if(clonality_method == "boyd"){ |
584 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) | 584 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) |
585 | 585 |
586 for (sample in samples){ | 586 for (sample in samples){ |
587 res = data.frame(paste=character(0)) | 587 res = data.frame(paste=character(0)) |
869 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | 869 AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
870 | 870 |
871 png("AAComposition.png",width = 1280, height = 720) | 871 png("AAComposition.png",width = 1280, height = 720) |
872 AAfreqplot | 872 AAfreqplot |
873 dev.off() | 873 dev.off() |
874 write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) | 874 write.table(AAfreq, "AAComposition.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=T) |
875 | 875 |
876 # ---------------------- AA median CDR3 length ---------------------- | 876 # ---------------------- AA median CDR3 length ---------------------- |
877 | 877 |
878 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) | 878 median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length))), by=c("Sample")]) |
879 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 879 write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |