# HG changeset patch
# User davidvanzessen
# Date 1481537998 18000
# Node ID 33a7c49d48a77dd20b6437ea160bbf8f99e662f2
# Parent  80c4eebf7bc91cb2684986873882daa0a153e0eb
Uploaded

diff -r 80c4eebf7bc9 -r 33a7c49d48a7 merge_and_filter.r
--- a/merge_and_filter.r	Thu Dec 08 04:51:09 2016 -0500
+++ b/merge_and_filter.r	Mon Dec 12 05:19:58 2016 -0500
@@ -119,22 +119,17 @@
 
 if(empty.region.filter == "leader"){
 	result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty FR1, CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty FR1, CDR1, FR2, CDR2, FR3 filter", nrow(result)))
 } else if(empty.region.filter == "FR1"){
 	result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result)))
 } else if(empty.region.filter == "CDR1"){
 	result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty FR2, CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty FR2, CDR2, FR3 filter", nrow(result)))
 } else if(empty.region.filter == "FR2"){
 	result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result)))
 }
 
+print(paste("After removal sequences that are missing a gene region:", nrow(result)))
+filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result)))
+
 if(empty.region.filter == "leader"){
 	result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
 } else if(empty.region.filter == "FR1"){
diff -r 80c4eebf7bc9 -r 33a7c49d48a7 shm_csr.xml
--- a/shm_csr.xml	Thu Dec 08 04:51:09 2016 -0500
+++ b/shm_csr.xml	Mon Dec 12 05:19:58 2016 -0500
@@ -122,19 +122,20 @@
 
 *Remove unique:*
 
-  This filter consists of two different steps.
 
-  Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
+This filter consists of two different steps.
+
+Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
 
-  Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
+Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
 
-  .. class:: infomark
+.. class:: infomark
 
-  Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
+Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
 
 *Keep unique:*
 
-  Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
+Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
 
 Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter”
 
diff -r 80c4eebf7bc9 -r 33a7c49d48a7 wrapper.sh
--- a/wrapper.sh	Thu Dec 08 04:51:09 2016 -0500
+++ b/wrapper.sh	Mon Dec 12 05:19:58 2016 -0500
@@ -410,27 +410,27 @@
 fi
 
 
-if [ -e $outdir/baseline.png ]
+if [ -e $outdir/baseline.pdf ]
 then
 	echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
 fi
 
-if [ -e $outdir/baseline_IGA.png ]
+if [ -e $outdir/baseline_IGA.pdf ]
 then
 	echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
 fi
 
-if [ -e $outdir/baseline_IGG.png ]
+if [ -e $outdir/baseline_IGG.pdf ]
 then
 	echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
 fi
 
-if [ -e $outdir/baseline_IGM.png ]
+if [ -e $outdir/baseline_IGM.pdf ]
 then
 	echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
 fi
 
-if [ -e $outdir/baseline_IGE.png ]
+if [ -e $outdir/baseline_IGE.pdf ]
 then
 	echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output
 fi
@@ -582,7 +582,7 @@
 echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output