Mercurial > repos > davidvanzessen > shm_csr
comparison nt_overview.r @ 92:cf8ad181628f draft
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
author | rhpvorderman |
---|---|
date | Mon, 12 Dec 2022 12:32:44 +0000 |
parents | |
children | 385dea3c6cb5 |
comparison
equal
deleted
inserted
replaced
91:f387cc1580c6 | 92:cf8ad181628f |
---|---|
1 args <- commandArgs(trailingOnly = TRUE) | |
2 | |
3 merged.file = args[1] | |
4 outputdir = args[2] | |
5 gene.classes = unlist(strsplit(args[3], ",")) | |
6 hotspot.analysis.sum.file = args[4] | |
7 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | |
8 empty.region.filter = args[5] | |
9 | |
10 | |
11 setwd(outputdir) | |
12 | |
13 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | |
14 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") | |
15 | |
16 #ACGT overview | |
17 | |
18 NToverview = merged | |
19 | |
20 if(empty.region.filter == "leader"){ | |
21 NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
22 } else if(empty.region.filter == "FR1"){ | |
23 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
24 } else if(empty.region.filter == "CDR1"){ | |
25 NToverview$seq = paste(NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
26 } else if(empty.region.filter == "FR2"){ | |
27 NToverview$seq = paste(NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) | |
28 } | |
29 | |
30 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | |
31 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | |
32 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | |
33 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) | |
34 | |
35 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) | |
36 | |
37 #NToverview = rbind(NToverview, NTsum) | |
38 | |
39 NTresult = data.frame(nt=c("A", "C", "T", "G")) | |
40 | |
41 for(clazz in gene.classes){ | |
42 print(paste("class:", clazz)) | |
43 NToverview.sub = NToverview[grepl(paste("^", clazz, sep=""), NToverview$best_match),] | |
44 print(paste("nrow:", nrow(NToverview.sub))) | |
45 new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G)) | |
46 new.col.y = sum(new.col.x) | |
47 new.col.z = round(new.col.x / new.col.y * 100, 2) | |
48 | |
49 tmp = names(NTresult) | |
50 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) | |
51 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep="")) | |
52 } | |
53 | |
54 NToverview.tmp = NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")] | |
55 | |
56 names(NToverview.tmp) = c("Sequence.ID", "best_match", "Sequence of the analysed region", "A", "C", "G", "T") | |
57 | |
58 write.table(NToverview.tmp, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) | |
59 | |
60 NToverview = NToverview[!grepl("unmatched", NToverview$best_match),] | |
61 | |
62 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G)) | |
63 new.col.y = sum(new.col.x) | |
64 new.col.z = round(new.col.x / new.col.y * 100, 2) | |
65 | |
66 tmp = names(NTresult) | |
67 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) | |
68 names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep="")) | |
69 | |
70 names(hotspot.analysis.sum) = names(NTresult) | |
71 | |
72 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) | |
73 | |
74 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") |