diff new_imgt.r @ 0:c33d93683a09 draft

Uploaded
author davidvanzessen
date Thu, 13 Oct 2016 10:52:24 -0400
parents
children fe44a905aee9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/new_imgt.r	Thu Oct 13 10:52:24 2016 -0400
@@ -0,0 +1,29 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+imgt.dir = args[1]
+merged.file = args[2]
+gene = args[3]
+
+merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F)
+
+if(gene != "-"){
+	merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
+} else {
+	merged = merged[!grepl("unmatched", merged$best_match),]
+}
+
+merged = merged[!grepl("unmatched", merged$best_match),]
+
+for(f in list.files(imgt.dir, pattern="*.txt$")){
+	#print(paste("filtering", f))
+	path = paste(imgt.dir, f, sep="")
+	dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE)
+	
+	dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
+	
+	if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
+		dat[,grepl("^FR1", names(dat))] = 0
+	}
+	
+	write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
+}