Mercurial > repos > davidvanzessen > shm_csr
comparison aa_histogram.r @ 23:81453585dfc3 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 01 Dec 2016 09:32:06 -0500 |
parents | 0bea8c187a90 |
children | 80c4eebf7bc9 |
comparison
equal
deleted
inserted
replaced
22:0bea8c187a90 | 23:81453585dfc3 |
---|---|
13 | 13 |
14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") | 14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") |
15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") | 15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") |
16 | 16 |
17 for(gene in genes){ | 17 for(gene in genes){ |
18 if(gene == ""){ | |
19 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] | |
20 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] | |
21 } else { | |
22 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] | |
23 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] | |
24 } | |
25 print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) | |
26 if(nrow(mutations.by.id.gene) == 0){ | |
27 next | |
28 } | |
18 | 29 |
19 if(gene == ""){ | 30 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) |
20 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] | 31 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) |
21 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] | |
22 } else { | |
23 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] | |
24 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] | |
25 } | |
26 print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) | |
27 if(nrow(mutations.by.id.gene) == 0){ | |
28 next | |
29 } | |
30 | 32 |
31 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) | 33 dat_freq = mutations.at.position / aa.at.position |
32 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) | 34 dat_freq[is.na(dat_freq)] = 0 |
35 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) | |
33 | 36 |
34 dat_freq = mutations.at.position / aa.at.position | 37 print("---------------- plot ----------------") |
35 dat_freq[is.na(dat_freq)] = 0 | |
36 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) | |
37 | 38 |
38 print("---------------- plot ----------------") | 39 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black")) |
40 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) | |
41 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") | |
42 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") | |
43 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") | |
44 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") | |
45 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") | |
46 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) | |
47 m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | |
48 #m = m + scale_colour_manual(values=c("black")) | |
39 | 49 |
40 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black")) | 50 print("---------------- write/print ----------------") |
41 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) | |
42 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") | |
43 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") | |
44 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") | |
45 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") | |
46 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") | |
47 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) | |
48 m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) | |
49 m = m + scale_colour_manual(values=c("black")) | |
50 | |
51 print("---------------- write/print ----------------") | |
52 | 51 |
53 | 52 |
54 dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) | 53 dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) |
55 | 54 |
56 write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 55 write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
57 write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 56 write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
58 write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 57 write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
59 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 58 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
60 | 59 |
61 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) | 60 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) |
62 print(m) | 61 print(m) |
63 dev.off() | 62 dev.off() |
64 } | 63 } |