annotate new_imgt.r @ 58:1a8e1dd21b16 draft

Uploaded
author davidvanzessen
date Tue, 18 Jul 2017 04:58:22 -0400
parents cb779a45537b
children dc06e94bc1e3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
2
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
3 imgt.dir = args[1]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
4 merged.file = args[2]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
5 gene = args[3]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
6
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 0
diff changeset
7 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="")
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
8
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
9 if(gene != "-"){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
10 merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
55
6cd12c71c3d3 Uploaded
davidvanzessen
parents: 31
diff changeset
11 }
6cd12c71c3d3 Uploaded
davidvanzessen
parents: 31
diff changeset
12
6cd12c71c3d3 Uploaded
davidvanzessen
parents: 31
diff changeset
13 if("best_match" %in% names(merged)){
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
14 merged = merged[!grepl("unmatched", merged$best_match),]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
15 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
16
57
cb779a45537b Uploaded
davidvanzessen
parents: 55
diff changeset
17 nrow_dat = 0
cb779a45537b Uploaded
davidvanzessen
parents: 55
diff changeset
18
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
19 for(f in list.files(imgt.dir, pattern="*.txt$")){
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
20 #print(paste("filtering", f))
55
6cd12c71c3d3 Uploaded
davidvanzessen
parents: 31
diff changeset
21 path = file.path(imgt.dir, f)
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 0
diff changeset
22 dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="")
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
23
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
24 dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
25
57
cb779a45537b Uploaded
davidvanzessen
parents: 55
diff changeset
26 nrow_dat = nrow(dat)
cb779a45537b Uploaded
davidvanzessen
parents: 55
diff changeset
27
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
28 if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
29 dat[,grepl("^FR1", names(dat))] = 0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
30 }
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
31
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
32 write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
33 }
57
cb779a45537b Uploaded
davidvanzessen
parents: 55
diff changeset
34
cb779a45537b Uploaded
davidvanzessen
parents: 55
diff changeset
35 print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))