Mercurial > repos > davidvanzessen > shm_csr
diff merge_and_filter.r @ 30:33a7c49d48a7 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 12 Dec 2016 05:19:58 -0500 |
parents | 949a30f04d9b |
children | 05c62efdc393 |
line wrap: on
line diff
--- a/merge_and_filter.r Thu Dec 08 04:51:09 2016 -0500 +++ b/merge_and_filter.r Mon Dec 12 05:19:58 2016 -0500 @@ -119,22 +119,17 @@ if(empty.region.filter == "leader"){ result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty FR1, CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty FR1, CDR1, FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "FR1"){ result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "CDR1"){ result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "FR2"){ result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result))) } +print(paste("After removal sequences that are missing a gene region:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) + if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR1"){