Mercurial > repos > davidvanzessen > argalaxy_tools
diff report_clonality/RScript.r @ 8:8cbc1a8d27ae draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 19 Dec 2016 08:50:31 -0500 |
parents | d001d0c05dbe |
children | efa1f5a17b6e |
line wrap: on
line diff
--- a/report_clonality/RScript.r Fri Dec 16 09:01:40 2016 -0500 +++ b/report_clonality/RScript.r Mon Dec 19 08:50:31 2016 -0500 @@ -135,6 +135,20 @@ } } +#make a names list with sample -> color +naive.colors = c('blue4', 'darkred', 'olivedrab3', 'red', 'gray74', 'darkviolet', 'lightblue1', 'gold', 'chartreuse2', 'pink', 'Paleturquoise3', 'Chocolate1', 'Yellow', 'Deeppink3', 'Mediumorchid1', 'Darkgreen', 'Blue', 'Gray36', 'Hotpink', 'Yellow4') +unique.samples = unique(PRODF$Sample) + +if(length(unique.samples) <= length(naive.colors)){ + sample.colors = naive.colors[1:length(unique.samples)] +} else { + sample.colors = rainbow(length(unique.samples)) +} + +names(sample.colors) = unique.samples + +print("Sample.colors") +print(sample.colors) #write the complete dataset that is left over, will be the input if 'none' for clonaltype and 'no' for filterproductive @@ -275,7 +289,8 @@ pV = ggplot(PRODFV) pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") +pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") + scale_fill_manual(values=sample.colors) +pV = pV + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) png("VPlot.png",width = 1280, height = 720) @@ -285,7 +300,8 @@ if(useD){ pD = ggplot(PRODFD) pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) - pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") + scale_fill_manual(values=sample.colors) + pD = pD + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) png("DPlot.png",width = 800, height = 600) @@ -295,16 +311,8 @@ pJ = ggplot(PRODFJ) pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") -write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) - -png("JPlot.png",width = 800, height = 600) -pJ -dev.off(); - -pJ = ggplot(PRODFJ) -pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") +pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + scale_fill_manual(values=sample.colors) +pJ = pJ + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) png("JPlot.png",width = 800, height = 600) @@ -324,7 +332,9 @@ VPlot = ggplot(VGenes) VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle("Distribution of V gene families") + - ylab("Percentage of sequences") + ylab("Percentage of sequences") + + scale_fill_manual(values=sample.colors) + + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) png("VFPlot.png") VPlot dev.off(); @@ -340,7 +350,9 @@ DPlot = ggplot(DGenes) DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle("Distribution of D gene families") + - ylab("Percentage of sequences") + ylab("Percentage of sequences") + + scale_fill_manual(values=sample.colors) + + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) png("DFPlot.png") print(DPlot) dev.off(); @@ -359,7 +371,9 @@ CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle("Length distribution of CDR3") + xlab("CDR3 Length") + - ylab("Percentage of sequences") + ylab("Percentage of sequences") + + scale_fill_manual(values=sample.colors) + + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) png("CDR3LengthPlot.png",width = 1280, height = 720) CDR3LengthPlot dev.off() @@ -786,16 +800,21 @@ # ---------------------- D reading frame ---------------------- -D.REGION.reading.frame = PRODF$D.REGION.reading.frame +D.REGION.reading.frame = PRODF[,c("Sample", "D.REGION.reading.frame")] -D.REGION.reading.frame[is.na(D.REGION.reading.frame)] = "No D" +chck = is.na(D.REGION.reading.frame$D.REGION.reading.frame) +if(any(chck)){ + D.REGION.reading.frame[chck,"D.REGION.reading.frame"] = "No D" +} -D.REGION.reading.frame = data.frame(table(D.REGION.reading.frame)) +D.REGION.reading.frame = data.frame(data.table(D.REGION.reading.frame)[, list(Freq=.N), by=c("Sample", "D.REGION.reading.frame")]) write.table(D.REGION.reading.frame, "DReadingFrame.csv" , sep="\t",quote=F,row.names=F,col.names=T) D.REGION.reading.frame = ggplot(D.REGION.reading.frame) -D.REGION.reading.frame = D.REGION.reading.frame + geom_bar(aes( x = D.REGION.reading.frame, y = Freq), stat='identity', position='dodge' ) + ggtitle("D reading frame") + xlab("Frequency") + ylab("Frame") +D.REGION.reading.frame = D.REGION.reading.frame + geom_bar(aes( x = D.REGION.reading.frame, y = Freq, fill=Sample), stat='identity', position='dodge' ) + ggtitle("D reading frame") + xlab("Frequency") + ylab("Frame") +D.REGION.reading.frame = D.REGION.reading.frame + scale_fill_manual(values=sample.colors) +D.REGION.reading.frame = D.REGION.reading.frame + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) png("DReadingFrame.png") D.REGION.reading.frame @@ -834,11 +853,16 @@ AAfreqplot = AAfreqplot + annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) AAfreqplot = AAfreqplot + annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = "blue", alpha = 0.2) AAfreqplot = AAfreqplot + annotate("rect", xmin = 6.5, xmax = 7.5, ymin = 0, ymax = Inf, fill = "red", alpha = 0.2) -AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") +AAfreqplot = AAfreqplot + ggtitle("Amino Acid Composition in the CDR3") + xlab("Amino Acid, from Hydrophilic (left) to Hydrophobic (right)") + ylab("Percentage") + scale_fill_manual(values=sample.colors) +AAfreqplot = AAfreqplot + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) png("AAComposition.png",width = 1280, height = 720) AAfreqplot dev.off() write.table(AAfreq, "AAComposition.csv" , sep=",",quote=F,na="-",row.names=F,col.names=T) +# ---------------------- AA median CDR3 length ---------------------- +median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(.SD$CDR3.Length.DNA))), by=c("Sample")]) +write.table(median.aa.l, "AAMedianBySample.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) +