# HG changeset patch
# User davidvanzessen
# Date 1479386001 18000
# Node ID 61d0a6318711d3b52e248a6f4f787ee033628df0
# Parent 59765d2c8890bf8a5ae82fe78374052b80c5a36d
Uploaded
diff -r 59765d2c8890 -r 61d0a6318711 merge_and_filter.r
--- a/merge_and_filter.r Fri Nov 11 07:31:48 2016 -0500
+++ b/merge_and_filter.r Thu Nov 17 07:33:21 2016 -0500
@@ -175,14 +175,12 @@
result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
}
- if(grepl("keep", filter.unique)){
- result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes
- result = result[!duplicated(result$unique.def),]
- } else {
+ if(filter.unique == "remove"){
result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
- result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it
- result = result[!duplicated(result$unique.def),]
}
+ result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it
+
+ result = result[!duplicated(result$unique.def),]
}
write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T)
@@ -197,8 +195,21 @@
result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":"))
+
+
+
+result.matched = result[!grepl("unmatched", result$best_match),]
+result.unmatched = result[grepl("unmatched", result$best_match),]
+
+result = rbind(result.matched, result.unmatched)
+
result = result[!(duplicated(result$past)), ]
+
+
+
+
+
result = result[,!(names(result) %in% c("past", "best_match_class"))]
print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))
diff -r 59765d2c8890 -r 61d0a6318711 shm_csr.xml
--- a/shm_csr.xml Fri Nov 11 07:31:48 2016 -0500
+++ b/shm_csr.xml Thu Nov 17 07:33:21 2016 -0500
@@ -75,27 +75,115 @@
10.1093/bioinformatics/btv359
- Takes an IMGT zip (http://www.imgt.org/HighV-QUEST/search.action) file and creates a summarization of the mutation analysis.
-
- +--------------------------+
- | unique filter |
- +--------+--------+--------+
- | values | remove | keep |
- +--------+--------+--------+
- | A | A | A |
- +--------+--------+--------+
- | A | B | B |
- +--------+--------+--------+
- | B | D | C |
- +--------+--------+--------+
- | B | | D |
- +--------+--------+--------+
- | C | | |
- +--------+--------+--------+
- | D | | |
- +--------+--------+--------+
- | D | | |
- +--------+--------+--------+
-
+