Mercurial > repos > davidvanzessen > shm_csr
annotate aa_histogram.r @ 94:84e9e5c8c101 draft
"planemo upload commit d4be85014b638f1d50b318d4b735be7f6e973140"
author | rhpvorderman |
---|---|
date | Fri, 24 Mar 2023 16:58:28 +0000 |
parents | 729738462297 |
children |
rev | line source |
---|---|
83
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
1 library(ggplot2) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
2 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
3 args <- commandArgs(trailingOnly = TRUE) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
4 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
5 mutations.by.id.file = args[1] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
6 absent.aa.by.id.file = args[2] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
7 genes = strsplit(args[3], ",")[[1]] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
8 genes = c(genes, "") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
9 outdir = args[4] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
10 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
11 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
12 print("---------------- read input ----------------") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
13 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
16 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
17 for(gene in genes){ |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
18 graph.title = paste(gene, "AA mutation frequency") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
19 if(gene == ""){ |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
20 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
21 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
22 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
23 graph.title = "AA mutation frequency all" |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
24 } else { |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
25 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
26 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
27 } |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
28 print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
29 if(nrow(mutations.by.id.gene) == 0){ |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
30 next |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
31 } |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
32 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
33 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
34 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
35 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
36 dat_freq = mutations.at.position / aa.at.position |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
37 dat_freq[is.na(dat_freq)] = 0 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
38 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
39 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
40 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
41 print("---------------- plot ----------------") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
42 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
43 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1), text = element_text(size=13, colour="black")) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
44 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
45 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
46 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
47 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
48 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
49 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
50 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(graph.title) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
51 m = m + theme(panel.background = element_rect(fill = "white", colour="black"), panel.grid.major.y = element_line(colour = "black"), panel.grid.major.x = element_blank()) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
52 #m = m + scale_colour_manual(values=c("black")) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
53 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
54 print("---------------- write/print ----------------") |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
55 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
56 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
57 dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
58 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
59 write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
60 write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
61 write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
62 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
63 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
64 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
65 print(m) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
66 dev.off() |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
67 |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
68 ggsave(paste(outdir, "/aa_histogram_", gene, ".pdf", sep=""), m, width=14, height=7) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
81
diff
changeset
|
69 } |