Mercurial > repos > davidvanzessen > shm_csr
comparison new_imgt.r @ 81:b6f9a640e098 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 19 Feb 2021 15:10:54 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
80:a4617f1d1d89 | 81:b6f9a640e098 |
---|---|
1 args <- commandArgs(trailingOnly = TRUE) | |
2 | |
3 imgt.dir = args[1] | |
4 merged.file = args[2] | |
5 gene = args[3] | |
6 | |
7 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="", quote="") | |
8 | |
9 if(!("Sequence.ID" %in% names(merged))){ #change-o db | |
10 print("Change-O DB changing 'SEQUENCE_ID' to 'Sequence.ID'") | |
11 names(merged)[which(names[merged] == "SEQUENCE_ID")] = "Sequence.ID" | |
12 } | |
13 | |
14 if(gene != "-"){ | |
15 merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),] | |
16 } | |
17 | |
18 if("best_match" %in% names(merged)){ | |
19 merged = merged[!grepl("unmatched", merged$best_match),] | |
20 } | |
21 | |
22 nrow_dat = 0 | |
23 | |
24 for(f in list.files(imgt.dir, pattern="*.txt$")){ | |
25 #print(paste("filtering", f)) | |
26 path = file.path(imgt.dir, f) | |
27 dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="") | |
28 | |
29 dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] | |
30 | |
31 nrow_dat = nrow(dat) | |
32 | |
33 if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file | |
34 dat[,grepl("^FR1", names(dat))] = 0 | |
35 } | |
36 | |
37 write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="") | |
38 } | |
39 | |
40 print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences")) |