# HG changeset patch # User davidvanzessen # Date 1494939168 14400 # Node ID 8fa8836bd6054aea8a4f2a163952c67074d5c795 # Parent 75ee66a691a0803bb50d221dea6f2aafda4d83b4 Uploaded diff -r 75ee66a691a0 -r 8fa8836bd605 merge_and_filter.r --- a/merge_and_filter.r Mon May 15 03:13:16 2017 -0400 +++ b/merge_and_filter.r Tue May 16 08:52:48 2017 -0400 @@ -224,16 +224,13 @@ if(filter.unique == "remove"){ result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] + unique.defs = data.frame(table(result$unique.def)) + unique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,] + result = result[result$unique.def %in% unique.defs$Var1,] } - + result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it - - if(filter.unique == "remove"){ - unique.defs = data.frame(table(result$unique.def)) - unique.defs = unique.defs[unique.defs$Freq >= filter.unique.count,] - result = result[result$unique.def %in% unique.defs$Var1,] - } - + result = result[!duplicated(result$unique.def),] } diff -r 75ee66a691a0 -r 8fa8836bd605 shm_csr.xml --- a/shm_csr.xml Mon May 15 03:13:16 2017 -0400 +++ b/shm_csr.xml Tue May 16 08:52:48 2017 -0400 @@ -20,7 +20,7 @@ - +