81
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 imgt.dir = args[1]
|
|
4 merged.file = args[2]
|
|
5 gene = args[3]
|
|
6
|
|
7 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="")
|
|
8
|
|
9 if(!("Sequence.ID" %in% names(merged))){ #change-o db
|
|
10 print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'")
|
|
11 names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID"
|
|
12 }
|
|
13
|
|
14 if(gene != "-"){
|
|
15 merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
|
|
16 }
|
|
17
|
|
18 if("best_match" %in% names(merged)){
|
|
19 merged = merged[!grepl("unmatched", merged$best_match),]
|
|
20 }
|
|
21
|
|
22 nrow_dat = 0
|
|
23
|
|
24 for(f in list.files(imgt.dir, pattern="*.txt$")){
|
|
25 #print(paste("filtering", f))
|
|
26 path = file.path(imgt.dir, f)
|
|
27 dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="")
|
|
28
|
|
29 dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
|
|
30
|
|
31 nrow_dat = nrow(dat)
|
|
32
|
|
33 if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
|
|
34 dat[,grepl("^FR1", names(dat))] = 0
|
|
35 }
|
|
36
|
|
37 write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
|
|
38 }
|
|
39
|
|
40 print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))
|