Mercurial > repos > davidvanzessen > shm_csr
annotate nt_overview.r @ 93:8fcf31272f6e draft
planemo upload commit a43893724cc769bed8a1f19a5b19ec1ba20cb63c
author | rhpvorderman |
---|---|
date | Mon, 06 Mar 2023 11:36:32 +0000 |
parents | cf8ad181628f |
children | 385dea3c6cb5 |
rev | line source |
---|---|
92
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
1 args <- commandArgs(trailingOnly = TRUE) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
2 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
3 merged.file = args[1] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
4 outputdir = args[2] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
5 gene.classes = unlist(strsplit(args[3], ",")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
6 hotspot.analysis.sum.file = args[4] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
7 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
8 empty.region.filter = args[5] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
9 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
10 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
11 setwd(outputdir) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
12 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
13 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
14 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
15 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
16 #ACGT overview |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
17 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
18 NToverview = merged |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
19 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
20 if(empty.region.filter == "leader"){ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
21 NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
22 } else if(empty.region.filter == "FR1"){ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
23 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
24 } else if(empty.region.filter == "CDR1"){ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
25 NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
26 } else if(empty.region.filter == "FR2"){ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
27 NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
28 } |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
29 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
30 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
31 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
32 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
33 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
34 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
35 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
36 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
37 #NToverview = rbind(NToverview, NTsum) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
38 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
39 NTresult = data.frame(nt=c("A", "C", "T", "G")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
40 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
41 for(clazz in gene.classes){ |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
42 print(paste("class:", clazz)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
43 NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
44 print(paste("nrow:", nrow(NToverview.sub))) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
45 new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
46 new.col.y = sum(new.col.x) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
47 new.col.z = round(new.col.x / new.col.y * 100, 2) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
48 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
49 tmp = names(NTresult) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
50 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
51 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep="")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
52 } |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
53 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
54 NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
55 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
56 names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T") |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
57 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
58 write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
59 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
60 NToverview = NToverview[!grepl("unmatched", NToverview$best_match),] |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
61 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
62 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
63 new.col.y = sum(new.col.x) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
64 new.col.z = round(new.col.x / new.col.y * 100, 2) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
65 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
66 tmp = names(NTresult) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
67 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
68 names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep="")) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
69 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
70 names(hotspot.analysis.sum) = names(NTresult) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
71 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
72 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
73 |
cf8ad181628f
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff
changeset
|
74 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") |