diff merge_and_filter.r @ 38:05c62efdc393 draft

Uploaded
author davidvanzessen
date Tue, 20 Dec 2016 09:03:15 -0500
parents 33a7c49d48a7
children ca2512e1e3ab
line wrap: on
line diff
--- a/merge_and_filter.r	Fri Dec 16 10:17:16 2016 -0500
+++ b/merge_and_filter.r	Tue Dec 20 09:03:15 2016 -0500
@@ -47,6 +47,9 @@
 filtering.steps[,2] = as.character(filtering.steps[,2])
 #filtering.steps[,3] = as.numeric(filtering.steps[,3])
 
+print("summary files columns")
+print(names(summ))
+
 summ = merge(summ, gene_identification, by="Sequence.ID")
 
 summ = summ[summ$Functionality != "No results",]
@@ -67,14 +70,23 @@
 
 filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
 
+#print("mutation analysis files columns")
+#print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
+
 result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
 
+#print("mutation stats files columns")
+#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
+
 result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation stats file:", nrow(result)))
 
+#print("hotspots files columns")
+#print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
+
 result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with hotspots file:", nrow(result)))