Mercurial > repos > davidvanzessen > shm_csr

--- a/merge_and_filter.r	Thu Dec 08 04:51:09 2016 -0500
+++ b/merge_and_filter.r	Mon Dec 12 05:19:58 2016 -0500
@@ -119,22 +119,17 @@

 if(empty.region.filter == "leader"){
 	result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty FR1, CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty FR1, CDR1, FR2, CDR2, FR3 filter", nrow(result)))
 } else if(empty.region.filter == "FR1"){
 	result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result)))
 } else if(empty.region.filter == "CDR1"){
 	result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty FR2, CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty FR2, CDR2, FR3 filter", nrow(result)))
 } else if(empty.region.filter == "FR2"){
 	result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
-	print(paste("Number of sequences after empty CDR2 and FR3 column filter:", nrow(result)))
-	filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result)))
 }

+print(paste("After removal sequences that are missing a gene region:", nrow(result)))
+filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result)))
+
 if(empty.region.filter == "leader"){
 	result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
 } else if(empty.region.filter == "FR1"){
--- a/shm_csr.xml	Thu Dec 08 04:51:09 2016 -0500
+++ b/shm_csr.xml	Mon Dec 12 05:19:58 2016 -0500
@@ -122,19 +122,20 @@

 *Remove unique:*

-  This filter consists of two different steps.

-  Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
+This filter consists of two different steps.
+
+Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.

-  Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
+Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).

-  .. class:: infomark
+.. class:: infomark

-  Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
+Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.

 *Keep unique:*

-  Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
+Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).

 Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter”
--- a/wrapper.sh	Thu Dec 08 04:51:09 2016 -0500
+++ b/wrapper.sh	Mon Dec 12 05:19:58 2016 -0500
@@ -410,27 +410,27 @@
 fi


-if [ -e $outdir/baseline.png ]
+if [ -e $outdir/baseline.pdf ]
 then
 	echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
 fi

-if [ -e $outdir/baseline_IGA.png ]
+if [ -e $outdir/baseline_IGA.pdf ]
 then
 	echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
 fi

-if [ -e $outdir/baseline_IGG.png ]
+if [ -e $outdir/baseline_IGG.pdf ]
 then
 	echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
 fi

-if [ -e $outdir/baseline_IGM.png ]
+if [ -e $outdir/baseline_IGM.pdf ]
 then
 	echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
 fi

-if [ -e $outdir/baseline_IGE.png ]
+if [ -e $outdir/baseline_IGE.pdf ]
 then
 	echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output
 fi
@@ -582,7 +582,7 @@
 echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output