diff merge_and_filter.r @ 40:ca2512e1e3ab draft

Uploaded
author davidvanzessen
date Thu, 29 Dec 2016 07:05:45 -0500
parents 05c62efdc393
children b8ac74723ab0
line wrap: on
line diff
--- a/merge_and_filter.r	Thu Dec 22 09:39:27 2016 -0500
+++ b/merge_and_filter.r	Thu Dec 29 07:05:45 2016 -0500
@@ -47,8 +47,8 @@
 filtering.steps[,2] = as.character(filtering.steps[,2])
 #filtering.steps[,3] = as.numeric(filtering.steps[,3])
 
-print("summary files columns")
-print(names(summ))
+#print("summary files columns")
+#print(names(summ))
 
 summ = merge(summ, gene_identification, by="Sequence.ID")
 
@@ -171,7 +171,6 @@
 
 if(filter.unique != "no"){
 	clmns = names(result)
-	
 	if(empty.region.filter == "leader"){
 		result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
 	} else if(empty.region.filter == "FR1"){
@@ -185,6 +184,7 @@
 	if(filter.unique == "remove"){
 		result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),]
 	}
+	
 	result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it
 	
 	result = result[!duplicated(result$unique.def),]
@@ -194,16 +194,21 @@
 
 filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
 
+print(paste("Number of sequences in result after unique filtering:", nrow(result)))
+
 if(nrow(summ) == 0){
 	stop("No data remaining after filter")
 }
 
 result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it
 
-result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":"))
+#result$past = ""
+#cls = unlist(strsplit(unique.type, ","))
+#for (i in 1:nrow(result)){
+#	result[i,"past"] = paste(result[i,cls], collapse=":")
+#}
 
-
-
+result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":"))
 
 result.matched = result[!grepl("unmatched", result$best_match),]
 result.unmatched = result[grepl("unmatched", result$best_match),]