# HG changeset patch # User davidvanzessen # Date 1481537998 18000 # Node ID 33a7c49d48a77dd20b6437ea160bbf8f99e662f2 # Parent 80c4eebf7bc91cb2684986873882daa0a153e0eb Uploaded diff -r 80c4eebf7bc9 -r 33a7c49d48a7 merge_and_filter.r --- a/merge_and_filter.r Thu Dec 08 04:51:09 2016 -0500 +++ b/merge_and_filter.r Mon Dec 12 05:19:58 2016 -0500 @@ -119,22 +119,17 @@ if(empty.region.filter == "leader"){ result = result[result$FR1.IMGT.seq != "" & result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty FR1, CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty FR1, CDR1, FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "FR1"){ result = result[result$CDR1.IMGT.seq != "" & result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty CDR1, FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty CDR1, FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "CDR1"){ result = result[result$FR2.IMGT.seq != "" & result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty FR2, CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty FR2, CDR2, FR3 filter", nrow(result))) } else if(empty.region.filter == "FR2"){ result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] - print(paste("Number of sequences after empty CDR2 and FR3 column filter:", nrow(result))) - filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result))) } +print(paste("After removal sequences that are missing a gene region:", nrow(result))) +filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) + if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR1"){ diff -r 80c4eebf7bc9 -r 33a7c49d48a7 shm_csr.xml --- a/shm_csr.xml Thu Dec 08 04:51:09 2016 -0500 +++ b/shm_csr.xml Mon Dec 12 05:19:58 2016 -0500 @@ -122,19 +122,20 @@ *Remove unique:* - This filter consists of two different steps. - Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step. +This filter consists of two different steps. + +Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step. - Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). +Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). - .. class:: infomark +.. class:: infomark - Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes. +Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes. *Keep unique:* - Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). +Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class). Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter” diff -r 80c4eebf7bc9 -r 33a7c49d48a7 wrapper.sh --- a/wrapper.sh Thu Dec 08 04:51:09 2016 -0500 +++ b/wrapper.sh Mon Dec 12 05:19:58 2016 -0500 @@ -410,27 +410,27 @@ fi -if [ -e $outdir/baseline.png ] +if [ -e $outdir/baseline.pdf ] then echo "" >> $output fi -if [ -e $outdir/baseline_IGA.png ] +if [ -e $outdir/baseline_IGA.pdf ] then echo "" >> $output fi -if [ -e $outdir/baseline_IGG.png ] +if [ -e $outdir/baseline_IGG.pdf ] then echo "" >> $output fi -if [ -e $outdir/baseline_IGM.png ] +if [ -e $outdir/baseline_IGM.pdf ] then echo "" >> $output fi -if [ -e $outdir/baseline_IGE.png ] +if [ -e $outdir/baseline_IGE.pdf ] then echo "" >> $output fi @@ -582,7 +582,7 @@ echo "The data for the 'all' transition plotDownload" >> $output echo "The data for the 'IGA' transition plotDownload" >> $output echo "The data for the 'IGA1' transition plotDownload" >> $output -echo "The data for the 'IGA1' transition plotDownload" >> $output +echo "The data for the 'IGA2' transition plotDownload" >> $output echo "The data for the 'IGG' transition plotDownload" >> $output echo "The data for the 'IGG1' transition plotDownload" >> $output echo "The data for the 'IGG2' transition plotDownload" >> $output