# HG changeset patch
# User davidvanzessen
# Date 1480326082 18000
# Node ID 949a30f04d9b7c1d1061bee693b633d782cd3eca
# Parent b95fa7e426c3a634e7f7e54f0ce7d502424bb185
Uploaded
diff -r b95fa7e426c3 -r 949a30f04d9b merge_and_filter.r
--- a/merge_and_filter.r Thu Nov 24 10:24:19 2016 -0500
+++ b/merge_and_filter.r Mon Nov 28 04:41:22 2016 -0500
@@ -96,8 +96,6 @@
result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele)
result$JGene = gsub("[*].*", "", result$JGene)
-print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
-
splt = strsplit(class.filter, "_")[[1]]
chunk_hit_threshold = as.numeric(splt[1])
nt_hit_threshold = as.numeric(splt[2])
@@ -112,8 +110,6 @@
result$best_match = "all"
}
-print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
-
write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "")))
@@ -139,8 +135,6 @@
filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result)))
}
-print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
-
if(empty.region.filter == "leader"){
result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
} else if(empty.region.filter == "FR1"){
@@ -151,9 +145,6 @@
result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
}
-print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
-print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")])
-
print(paste("Number of sequences in result after n filtering:", nrow(result)))
filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result)))
@@ -192,9 +183,6 @@
result = result[!duplicated(result$unique.def),]
}
-print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
-print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")])
-
write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T)
filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
@@ -217,11 +205,6 @@
result = result[!(duplicated(result$past)), ]
-
-
-print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
-
-
result = result[,!(names(result) %in% c("past", "best_match_class"))]
print(paste("Number of sequences in result after", unique.type, "filtering:", nrow(result)))
diff -r b95fa7e426c3 -r 949a30f04d9b shm_csr.xml
--- a/shm_csr.xml Thu Nov 24 10:24:19 2016 -0500
+++ b/shm_csr.xml Mon Nov 28 04:41:22 2016 -0500
@@ -33,13 +33,15 @@
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
@@ -59,15 +61,23 @@
naive_output_cond['naive_output'] == "yes"
+ class_filter_cond['class_filter'] != "101_101"
naive_output_cond['naive_output'] == "yes"
+ class_filter_cond['class_filter'] != "101_101"
naive_output_cond['naive_output'] == "yes"
+ class_filter_cond['class_filter'] != "101_101"
naive_output_cond['naive_output'] == "yes"
+ class_filter_cond['class_filter'] != "101_101"
+
+
+ naive_output_cond['naive_output'] == "yes"
+ class_filter_cond['class_filter'] == "101_101"
diff -r b95fa7e426c3 -r 949a30f04d9b wrapper.sh
--- a/wrapper.sh Thu Nov 24 10:24:19 2016 -0500
+++ b/wrapper.sh Mon Nov 28 04:41:22 2016 -0500
@@ -651,10 +651,15 @@
if [[ "$naive_output" == "yes" ]]
then
- cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
- cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
- cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
- cp $outdir/new_IMGT_IGE.txz ${naive_output_ce}
+ if [[ "${class_filter}" == "101_101" ]]
+ then
+ cp $outdir/new_IMGT.txz ${naive_output_ca}
+ else
+ cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
+ cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
+ cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
+ cp $outdir/new_IMGT_IGE.txz ${naive_output_ce}
+ fi
fi
echo "" >> $outdir/base_overview.html