# HG changeset patch # User davidvanzessen # Date 1480326082 18000 # Node ID 949a30f04d9b7c1d1061bee693b633d782cd3eca # Parent b95fa7e426c3a634e7f7e54f0ce7d502424bb185 Uploaded diff -r b95fa7e426c3 -r 949a30f04d9b merge_and_filter.r --- a/merge_and_filter.r Thu Nov 24 10:24:19 2016 -0500 +++ b/merge_and_filter.r Mon Nov 28 04:41:22 2016 -0500 @@ -96,8 +96,6 @@ result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele) result$JGene = gsub("[*].*", "", result$JGene) -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - splt = strsplit(class.filter, "_")[[1]] chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) @@ -112,8 +110,6 @@ result$best_match = "all" } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == ""))) @@ -139,8 +135,6 @@ filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result))) } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR1"){ @@ -151,9 +145,6 @@ result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) -print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")]) - print(paste("Number of sequences in result after n filtering:", nrow(result))) filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) @@ -192,9 +183,6 @@ result = result[!duplicated(result$unique.def),] } -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) -print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")]) - write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) @@ -217,11 +205,6 @@ result = result[!(duplicated(result$past)), ] - - -print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) - - result = result[,!(names(result) %in% c("past", "best_match_class"))] print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result))) diff -r b95fa7e426c3 -r 949a30f04d9b shm_csr.xml --- a/shm_csr.xml Thu Nov 24 10:24:19 2016 -0500 +++ b/shm_csr.xml Mon Nov 28 04:41:22 2016 -0500 @@ -33,13 +33,15 @@ - - - - - - - + + + + + + + + + @@ -59,15 +61,23 @@ naive_output_cond['naive_output'] == "yes" + class_filter_cond['class_filter'] != "101_101" naive_output_cond['naive_output'] == "yes" + class_filter_cond['class_filter'] != "101_101" naive_output_cond['naive_output'] == "yes" + class_filter_cond['class_filter'] != "101_101" naive_output_cond['naive_output'] == "yes" + class_filter_cond['class_filter'] != "101_101" + + + naive_output_cond['naive_output'] == "yes" + class_filter_cond['class_filter'] == "101_101" diff -r b95fa7e426c3 -r 949a30f04d9b wrapper.sh --- a/wrapper.sh Thu Nov 24 10:24:19 2016 -0500 +++ b/wrapper.sh Mon Nov 28 04:41:22 2016 -0500 @@ -651,10 +651,15 @@ if [[ "$naive_output" == "yes" ]] then - cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} - cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} - cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} - cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} + if [[ "${class_filter}" == "101_101" ]] + then + cp $outdir/new_IMGT.txz ${naive_output_ca} + else + cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} + cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} + cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} + cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} + fi fi echo "" >> $outdir/base_overview.html