Mercurial > repos > davidvanzessen > shm_csr
diff merge_and_filter.r @ 40:ca2512e1e3ab draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 29 Dec 2016 07:05:45 -0500 |
parents | 05c62efdc393 |
children | b8ac74723ab0 |
line wrap: on
line diff
--- a/merge_and_filter.r Thu Dec 22 09:39:27 2016 -0500 +++ b/merge_and_filter.r Thu Dec 29 07:05:45 2016 -0500 @@ -47,8 +47,8 @@ filtering.steps[,2] = as.character(filtering.steps[,2]) #filtering.steps[,3] = as.numeric(filtering.steps[,3]) -print("summary files columns") -print(names(summ)) +#print("summary files columns") +#print(names(summ)) summ = merge(summ, gene_identification, by="Sequence.ID") @@ -171,7 +171,6 @@ if(filter.unique != "no"){ clmns = names(result) - if(empty.region.filter == "leader"){ result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) } else if(empty.region.filter == "FR1"){ @@ -185,6 +184,7 @@ if(filter.unique == "remove"){ result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] } + result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it result = result[!duplicated(result$unique.def),] @@ -194,16 +194,21 @@ filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) +print(paste("Number of sequences in result after unique filtering:", nrow(result))) + if(nrow(summ) == 0){ stop("No data remaining after filter") } result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it -result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) +#result$past = "" +#cls = unlist(strsplit(unique.type, ",")) +#for (i in 1:nrow(result)){ +# result[i,"past"] = paste(result[i,cls], collapse=":") +#} - - +result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) result.matched = result[!grepl("unmatched", result$best_match),] result.unmatched = result[grepl("unmatched", result$best_match),]