Mercurial > repos > davidvanzessen > shm_csr
changeset 30:33a7c49d48a7 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 12 Dec 2016 05:19:58 -0500 |
parents | 80c4eebf7bc9 |
children | fe44a905aee9 |
files | merge_and_filter.r shm_csr.xml wrapper.sh |
diffstat | 3 files changed, 16 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/merge_and_filter.r Thu Dec 08 04:51:09 2016 -0500 +++ b/merge_and_filter.r Mon Dec 12 05:19:58 2016 -0500 @@ -119,22 +119,17 @@ if(empty.region.filter == "leader"){ result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty FR1, CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty FR1, CDR1, FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "FR1"){ result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "CDR1"){ result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "FR2"){ result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result))) } +print(paste("After removal sequences that are missing a gene region:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) + if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR1"){
--- a/shm_csr.xml Thu Dec 08 04:51:09 2016 -0500 +++ b/shm_csr.xml Mon Dec 12 05:19:58 2016 -0500 @@ -122,19 +122,20 @@ *Remove unique:* - This filter consists of two different steps. - Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step. +This filter consists of two different steps. + +Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step. - Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). +Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). - .. class:: infomark +.. class:: infomark - Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes. +Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes. *Keep unique:* - Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). +Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter”
--- a/wrapper.sh Thu Dec 08 04:51:09 2016 -0500 +++ b/wrapper.sh Mon Dec 12 05:19:58 2016 -0500 @@ -410,27 +410,27 @@ fi -if [ -e $outdir/baseline.png ] +if [ -e $outdir/baseline.pdf ] then echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output fi -if [ -e $outdir/baseline_IGA.png ] +if [ -e $outdir/baseline_IGA.pdf ] then echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output fi -if [ -e $outdir/baseline_IGG.png ] +if [ -e $outdir/baseline_IGG.pdf ] then echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output fi -if [ -e $outdir/baseline_IGM.png ] +if [ -e $outdir/baseline_IGM.pdf ] then echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output fi -if [ -e $outdir/baseline_IGE.png ] +if [ -e $outdir/baseline_IGE.pdf ] then echo "<embed src='baseline_IGE.pdf' width='700px' height='1000px'>" >> $output fi @@ -582,7 +582,7 @@ echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transitions_all_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transitions_IGA_sum.txt' download='transitions_all_sum.txt' >Download</a></td></tr>" >> $output echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA1_sum.txt' download='transitions_IGA1_sum.txt' >Download</a></td></tr>" >> $output -echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output +echo "<tr><td>The data for the 'IGA2' transition plot</td><td><a href='transitions_IGA2_sum.txt' download='transitions_IGA2_sum.txt' >Download</a></td></tr>" >> $output echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transitions_IGG_sum.txt' download='transitions_IGG_sum.txt' >Download</a></td></tr>" >> $output echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transitions_IGG1_sum.txt' download='transitions_IGG1_sum.txt' >Download</a></td></tr>" >> $output echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transitions_IGG2_sum.txt' download='transitions_IGG2_sum.txt' >Download</a></td></tr>" >> $output