Mercurial > repos > bgruening > bismark

--- a/bismark_bowtie2_wrapper.xml	Wed Aug 21 12:59:48 2019 -0400
+++ b/bismark_bowtie2_wrapper.xml	Wed Aug 28 07:08:45 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.22.1+galaxy3" profile="18.01">
+<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.22.1+galaxy4" profile="18.01">
     <description>Bisulfite reads mapper</description>
     <requirements>
         <requirement type="package" version="0.22.1">bismark</requirement>
@@ -12,11 +12,20 @@
             #if $singlePaired.input_singles.ext == "fasta":
                 #set read1 = 'input_1.fa'
             #elif $singlePaired.input_singles.ext in ["fastq.gz", "fastqsanger.gz"]:
-                #set read1 = 'input_1.fq.gz'
+                #if $params.pbat == "--pbat"
+                    #set read1 = 'input_1.fq'
+                #else
+                    #set read1 = 'input_1.fq.gz'
+                #end if
             #else
                 #set read1 = 'input_1.fq'
             #end if
-            ln -s '${singlePaired.input_singles}' '${read1}' &&
+
+            #if $params.pbat == "--pbat" and $singlePaired.input_singles.ext in ["fastq.gz", "fastqsanger.gz"]:
+                zcat '${singlePaired.input_singles}' > '${read1}' &&
+            #else
+                ln -s '${singlePaired.input_singles}' '${read1}' &&
+            #end if
         #else:
             #set $mate1 = list()
             #set $mate2 = list()
@@ -25,20 +34,38 @@
                 #if $mate_pair.input_mate1.ext == "fasta":
                     #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fa'
                 #elif $mate_pair.input_mate1.ext in ["fastq.gz", "fastqsanger.gz"]:
-                    #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq.gz'
+                    #if $params.pbat == "--pbat"
+                        #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq'
+                    #else
+                        #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq.gz'
+                    #end if
                 #else
                     #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq'
                 #end if
-                ln -s '${mate_pair.input_mate1}' '${read1}' &&
+
+                #if $params.pbat == "--pbat" and $mate_pair.input_mate1.ext in ["fastq.gz", "fastqsanger.gz"]:
+                    zcat '${mate_pair.input_mate1}' > '${read1}' &&
+                #else
+                    ln -s '${mate_pair.input_mate1}' '${read1}' &&
+                #end if

                 #if $mate_pair.input_mate2.ext == "fasta":
                     #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fa'
                 #elif $mate_pair.input_mate2.ext in ["fastq.gz", "fastqsanger.gz"]:
-                    #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq.gz'
+                    #if $params.pbat == "--pbat"
+                        #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq'
+                    #else
+                        #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq.gz'
+                    #end if
                 #else
                     #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq'
                 #end if
-                ln -s '${mate_pair.input_mate2}' '${read2}' &&
+
+                #if $params.pbat == "--pbat" and $mate_pair.input_mate2.ext in ["fastq.gz", "fastqsanger.gz"]:
+                    zcat '${mate_pair.input_mate2}' > '${read2}' &&
+                #else
+                    ln -s '${mate_pair.input_mate2}' '${read2}' &&
+                #end if

                 $mate1.append( str($read1) )
                 $mate2.append( str($read2) )
@@ -138,6 +165,8 @@

             $params.non_directional

+            $params.pbat
+
             #if $params.bismark_stdout:
                 --stdout '$output_stdout'
             #end if
@@ -289,6 +318,11 @@
                 <param argument="--non-directional" name="non_directional" type="boolean"
                        truevalue="--non-directional" falsevalue="" checked="false"
                        label="The sequencing library was constructed in a non strand-specific manner, alignments to all four bisulfite strands will be reported."/>
+                <param argument="--pbat" name="pbat" type="boolean"
+                       truevalue="--pbat" falsevalue="" checked="false"
+                       label="Treat input data as PBAT-Seq libraries"
+                       help="Use this option only if you are certain that your libraries were constructed following a PBAT protocol. If you don't know what PBAT-Seq is you should not specify this option." />
+

                 <!--end output options -->
             </when>  <!-- full -->
@@ -544,6 +578,38 @@
                 <has_text text="--score-min 'L,0,-0.8'" />
             </assert_command>
         </test>
+        <test>
+            <param name="genomeSource" value="history"/>
+            <param name="own_file" value="mm10.tiny.fa.gz" />
+            <param name="sPaired" value="single"/>
+            <param name="input_singles" value="input1.fq.gzip" ftype="fastqsanger.gz"/>
+            <param name="sort_bam" value="false"/>
+            <param name="settingsType" value="custom"/>
+            <param name="suppressed_read_file" value="true"/>
+            <param name="unmapped_read_file" value="true"/>
+            <param name="bismark_stdout" value="true"/>
+            <param name="isReportOutput" value="true"/>
+            <conditional name="params">
+                <param name="settingsType" value="custom" />
+                <param name="pbat" value="true" />
+            </conditional>
+
+            <output name="output_stdout" file="summary_pbat.txt" ftype="txt" lines_diff="94">
+                 <assert_contents>
+                     <has_text text="Sequences analysed in total:" />
+                     <has_text text="44115" />
+                     <has_text text="Mapping efficiency:" />
+                     <has_text text="0.0%" />
+                     <has_text text="Bismark run complete" />
+                 </assert_contents>
+            </output>
+            <output name="report_file" file="mapping_report_pbat.txt" ftype="txt" lines_diff="6"/>
+            <output name="output" file="mapped_reads_pbat.bam" ftype="qname_input_sorted.bam" lines_diff="14"/>
+
+            <assert_command>
+                <has_text text="--pbat" />
+            </assert_command>
+        </test>
     </tests>

     <help>
--- a/bismark_wrapper.py	Wed Aug 21 12:59:48 2019 -0400
+++ b/bismark_wrapper.py	Wed Aug 28 07:08:45 2019 -0400
@@ -69,6 +69,7 @@
     parser.add_argument('--fasta', action='store_true', help='Query filetype is in FASTA format')
     parser.add_argument('--phred64-quals', dest='phred64', action="store_true")
     parser.add_argument('--non-directional', dest='non_directional', action="store_true")
+    parser.add_argument('--pbat', dest='pbat', action="store_true")

     parser.add_argument('--skip-reads', dest='skip_reads', type=int)
     parser.add_argument('--score-min', dest='score_min', type=str)
@@ -153,7 +154,9 @@
     # Build bismark command
     tmp_bismark_dir = tempfile.mkdtemp()
     output_dir = os.path.join(tmp_bismark_dir, 'results')
-    cmd = ['bismark', '--bam', '--gzip', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet']
+    cmd = ['bismark', '--bam', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet']
+    if not args.pbat:
+        cmd.append('--gzip')

     if args.fasta:
         # the query input files (specified as mate1,mate2 or singles) are FastA
@@ -187,6 +190,8 @@
         cmd.append('--phred64-quals')
     if args.non_directional:
        cmd.append('--non-directional')
+    if args.pbat:
+       cmd.append('--pbat')
     if args.suppress_header:
         cmd.append('--sam-no-hd')
     if args.output_unmapped_reads or (args.output_unmapped_reads_l and args.output_unmapped_reads_r):
@@ -223,24 +228,24 @@
         output_report_file.close()

     if args.output_suppressed_reads:
-        if glob(os.path.join(output_dir, '*ambiguous_reads.txt')):
-            shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.txt'))[0], args.output_suppressed_reads)
+        if glob(os.path.join(output_dir, '*ambiguous_reads.*')):
+            shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.*'))[0], args.output_suppressed_reads)
     if args.output_suppressed_reads_l:
-        if glob(os.path.join(output_dir, '*ambiguous_reads_1.txt')):
-            shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.txt'))[0], args.output_suppressed_reads_l)
+        if glob(os.path.join(output_dir, '*ambiguous_reads_1.*')):
+            shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.*'))[0], args.output_suppressed_reads_l)
     if args.output_suppressed_reads_r:
-        if glob(os.path.join(output_dir, '*ambiguous_reads_2.txt')):
-            shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.txt'))[0], args.output_suppressed_reads_r)
+        if glob(os.path.join(output_dir, '*ambiguous_reads_2.*')):
+            shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.*'))[0], args.output_suppressed_reads_r)

     if args.output_unmapped_reads:
-        if glob(os.path.join(output_dir, '*unmapped_reads.txt')):
-            shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.txt'))[0], args.output_unmapped_reads)
+        if glob(os.path.join(output_dir, '*unmapped_reads.*')):
+            shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.*'))[0], args.output_unmapped_reads)
     if args.output_unmapped_reads_l:
-        if glob(os.path.join(output_dir, '*unmapped_reads_1.txt')):
-            shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.txt'))[0], args.output_unmapped_reads_l)
+        if glob(os.path.join(output_dir, '*unmapped_reads_1.*')):
+            shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.*'))[0], args.output_unmapped_reads_l)
     if args.output_unmapped_reads_r:
-        if glob(os.path.join(output_dir, '*unmapped_reads_2.txt')):
-            shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.txt'))[0], args.output_unmapped_reads_r)
+        if glob(os.path.join(output_dir, '*unmapped_reads_2.*')):
+            shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.*'))[0], args.output_unmapped_reads_r)

     try:
         # merge all bam files
Binary file test-data/mapped_reads_pbat.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mapping_report_pbat.txt	Wed Aug 28 07:08:45 2019 -0400
@@ -0,0 +1,40 @@
+Bismark report for: input_1.fq (version: v0.22.1)
+Option '--pbat' specified: alignments to original strands (OT and OB) strands were ignored (i.e. not performed)
+Bismark was run with Bowtie 2 against the bisulfite genome of /tmp/tmpuqE7r1/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet
+
+Final Alignment report
+======================
+Sequences analysed in total:	44115
+Number of alignments with a unique best hit from the different alignments:	13
+Mapping efficiency:	0.0%
+Sequences with no alignments under any condition:	44059
+Sequences did not map uniquely:	43
+Sequences which were discarded because genomic sequence could not be extracted:	0
+
+Number of sequences with unique best (first) alignment came from the bowtie output:
+CT/CT:	0	((converted) top strand)
+CT/GA:	0	((converted) bottom strand)
+GA/CT:	11	(complementary to (converted) top strand)
+GA/GA:	2	(complementary to (converted) bottom strand)
+
+Final Cytosine Methylation Report
+=================================
+Total number of C's analysed:	307
+
+Total methylated C's in CpG context:	1
+Total methylated C's in CHG context:	3
+Total methylated C's in CHH context:	227
+Total methylated C's in Unknown context:	0
+
+Total unmethylated C's in CpG context:	1
+Total unmethylated C's in CHG context:	4
+Total unmethylated C's in CHH context:	71
+Total unmethylated C's in Unknown context:	0
+
+C methylated in CpG context:	50.0%
+C methylated in CHG context:	42.9%
+C methylated in CHH context:	76.2%
+Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0
+
+
+Bismark completed in 0d 0h 0m 7s
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary_pbat.txt	Wed Aug 28 07:08:45 2019 -0400
@@ -0,0 +1,493 @@
+Create a temporary index with the offered files from the user. Utilizing the script: bismark_genome_preparation
+Generating index with: 'bismark_genome_preparation --bowtie2 /tmp/tmpuqE7r1'
+Writing bisulfite genomes out into a single MFA (multi FastA) file
+
+Bisulfite Genome Indexer version v0.22.1 (last modified: 14 April 2019)
+
+Step I - Prepare genome folders - completed
+
+
+
+Total number of conversions performed:
+C->T:	146875
+G->A:	150504
+
+Step II - Genome bisulfite conversions - completed
+
+
+Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer
+Please be aware that this process can - depending on genome size - take several hours!
+Settings:
+  Output files: "BS_CT.*.bt2"
+  Line rate: 6 (line is 64 bytes)
+  Lines per side: 1 (side is 64 bytes)
+  Offset rate: 4 (one in 16)
+  FTable chars: 10
+  Strings: unpacked
+  Max bucket size: default
+  Max bucket size, sqrt multiplier: default
+  Max bucket size, len divisor: 4
+  Difference-cover sample period: 1024
+  Endianness: little
+  Actual local endianness: little
+  Sanity checking: disabled
+  Assertions: disabled
+  Random seed: 0
+  Sizeofs: void*:8, int:4, long:8, size_t:8
+Input files DNA, FASTA:
+  genome_mfa.CT_conversion.fa
+Building a SMALL index
+Reading reference sizes
+  Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+  Time to join reference sequences: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+  Doing ahead-of-time memory usage test
+  Passed!  Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+  Building sPrime
+  Building sPrimeOrder
+  V-Sorting samples
+  V-Sorting samples time: 00:00:00
+  Allocating rank array
+  Ranking v-sort output
+  Ranking v-sort output time: 00:00:00
+  Invoking Larsson-Sadakane on ranks
+  Invoking Larsson-Sadakane on ranks time: 00:00:00
+  Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+  (Using difference cover)
+  Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+  Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+  No samples; assembling all-inclusive block
+  Sorting block of length 756159 for bucket 1
+  (Using difference cover)
+  Sorting block time: xxxx
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 235897
+fchr[G]: 235897
+fchr[T]: 386401
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_CT.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_CT.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+    len: 756159
+    bwtLen: 756160
+    sz: 189040
+    bwtSz: 189040
+    lineRate: 6
+    offRate: 4
+    offMask: 0xfffffff0
+    ftabChars: 10
+    eftabLen: 20
+    eftabSz: 80
+    ftabLen: 1048577
+    ftabSz: 4194308
+    offsLen: 47260
+    offsSz: 189040
+    lineSz: 64
+    sideSz: 64
+    sideBwtSz: 48
+    sideBwtLen: 192
+    numSides: 3939
+    numLines: 3939
+    ebwtTotLen: 252096
+    ebwtTotSz: 252096
+    color: 0
+    reverse: 0
+Total time for call to driver() for forward index: xxxx
+Reading reference sizes
+  Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+  Time to join reference sequences: 00:00:00
+  Time to reverse reference sequence: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+  Doing ahead-of-time memory usage test
+  Passed!  Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+  Building sPrime
+  Building sPrimeOrder
+  V-Sorting samples
+  V-Sorting samples time: 00:00:00
+  Allocating rank array
+  Ranking v-sort output
+  Ranking v-sort output time: 00:00:00
+  Invoking Larsson-Sadakane on ranks
+  Invoking Larsson-Sadakane on ranks time: 00:00:00
+  Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+  (Using difference cover)
+  Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+  Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+  No samples; assembling all-inclusive block
+  Sorting block of length 756159 for bucket 1
+  (Using difference cover)
+  Sorting block time: xxxx
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 235897
+fchr[G]: 235897
+fchr[T]: 386401
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_CT.rev.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_CT.rev.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+    len: 756159
+    bwtLen: 756160
+    sz: 189040
+    bwtSz: 189040
+    lineRate: 6
+    offRate: 4
+    offMask: 0xfffffff0
+    ftabChars: 10
+    eftabLen: 20
+    eftabSz: 80
+    ftabLen: 1048577
+    ftabSz: 4194308
+    offsLen: 47260
+    offsSz: 189040
+    lineSz: 64
+    sideSz: 64
+    sideBwtSz: 48
+    sideBwtLen: 192
+    numSides: 3939
+    numLines: 3939
+    ebwtTotLen: 252096
+    ebwtTotSz: 252096
+    color: 0
+    reverse: 1
+Total time for backward call to driver() for mirror index: 00:00:01
+Settings:
+  Output files: "BS_GA.*.bt2"
+  Line rate: 6 (line is 64 bytes)
+  Lines per side: 1 (side is 64 bytes)
+  Offset rate: 4 (one in 16)
+  FTable chars: 10
+  Strings: unpacked
+  Max bucket size: default
+  Max bucket size, sqrt multiplier: default
+  Max bucket size, len divisor: 4
+  Difference-cover sample period: 1024
+  Endianness: little
+  Actual local endianness: little
+  Sanity checking: disabled
+  Assertions: disabled
+  Random seed: 0
+  Sizeofs: void*:8, int:4, long:8, size_t:8
+Input files DNA, FASTA:
+  genome_mfa.GA_conversion.fa
+Building a SMALL index
+Reading reference sizes
+  Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+  Time to join reference sequences: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+  Doing ahead-of-time memory usage test
+  Passed!  Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+  Building sPrime
+  Building sPrimeOrder
+  V-Sorting samples
+  V-Sorting samples time: 00:00:00
+  Allocating rank array
+  Ranking v-sort output
+  Ranking v-sort output time: 00:00:00
+  Invoking Larsson-Sadakane on ranks
+  Invoking Larsson-Sadakane on ranks time: 00:00:00
+  Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+  (Using difference cover)
+  Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+  Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+  No samples; assembling all-inclusive block
+  Sorting block of length 756159 for bucket 1
+  (Using difference cover)
+  Sorting block time: xxxx
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 386401
+fchr[G]: 533276
+fchr[T]: 533276
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_GA.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_GA.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+    len: 756159
+    bwtLen: 756160
+    sz: 189040
+    bwtSz: 189040
+    lineRate: 6
+    offRate: 4
+    offMask: 0xfffffff0
+    ftabChars: 10
+    eftabLen: 20
+    eftabSz: 80
+    ftabLen: 1048577
+    ftabSz: 4194308
+    offsLen: 47260
+    offsSz: 189040
+    lineSz: 64
+    sideSz: 64
+    sideBwtSz: 48
+    sideBwtLen: 192
+    numSides: 3939
+    numLines: 3939
+    ebwtTotLen: 252096
+    ebwtTotSz: 252096
+    color: 0
+    reverse: 0
+Total time for call to driver() for forward index: xxxx
+Reading reference sizes
+  Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+  Time to join reference sequences: 00:00:00
+  Time to reverse reference sequence: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+  Doing ahead-of-time memory usage test
+  Passed!  Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+  Building sPrime
+  Building sPrimeOrder
+  V-Sorting samples
+  V-Sorting samples time: 00:00:00
+  Allocating rank array
+  Ranking v-sort output
+  Ranking v-sort output time: 00:00:00
+  Invoking Larsson-Sadakane on ranks
+  Invoking Larsson-Sadakane on ranks time: 00:00:00
+  Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+  (Using difference cover)
+  Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+  Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+  No samples; assembling all-inclusive block
+  Sorting block of length 756159 for bucket 1
+  (Using difference cover)
+  Sorting block time: xxxx
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 386401
+fchr[G]: 533276
+fchr[T]: 533276
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_GA.rev.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_GA.rev.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+    len: 756159
+    bwtLen: 756160
+    sz: 189040
+    bwtSz: 189040
+    lineRate: 6
+    offRate: 4
+    offMask: 0xfffffff0
+    ftabChars: 10
+    eftabLen: 20
+    eftabSz: 80
+    ftabLen: 1048577
+    ftabSz: 4194308
+    offsLen: 47260
+    offsSz: 189040
+    lineSz: 64
+    sideSz: 64
+    sideBwtSz: 48
+    sideBwtLen: 192
+    numSides: 3939
+    numLines: 3939
+    ebwtTotLen: 252096
+    ebwtTotSz: 252096
+    color: 0
+    reverse: 1
+Total time for backward call to driver() for mirror index: 00:00:01
+Running bismark with: 'bismark --bam --temp_dir /tmp/tmpi3V3GI -o /tmp/tmpi3V3GI/results --quiet --fastq -L 20 -D 15 -R 2 --pbat --un --ambiguous /tmp/tmpuqE7r1 input_1.fq'
+Bowtie 2 seems to be working fine (tested command 'bowtie2 --version' [2.3.5])
+Output format is BAM (default)
+Alignments will be written out in BAM format. Samtools found here: '/home/abretaud/miniconda3/envs/mulled-v1-9f2317dbfb405ed6926c55752e5c11678eee3256a6ea680d1c0f912251153030/bin/samtools'
+Reference genome folder provided is /tmp/tmpuqE7r1/	(absolute path is '/tmp/tmpuqE7r1/)'
+FastQ format specified
+
+Input files to be analysed (in current folder '/tmp/tmpU_oiEI/job_working_directory/000/4/working'):
+input_1.fq
+Library was specified as PBAT-Seq (Post-Bisulfite Adapter Tagging), only performing alignments to the complementary strands (CTOT and CTOB)
+Created output directory /tmp/tmpi3V3GI/results/!
+
+Output will be written into the directory: /tmp/tmpi3V3GI/results/
+
+Using temp directory: /tmp/tmpi3V3GI
+Temporary files will be written into the directory: /tmp/tmpi3V3GI/
+Setting parallelization to single-threaded (default)
+
+Summary of all aligner options:	-q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet
+Current working directory is: /tmp/tmpU_oiEI/job_working_directory/000/4/working
+
+Now reading in and storing sequence information of the genome specified in: /tmp/tmpuqE7r1/
+
+chr chrY_JH584300_random (182347 bp)
+chr chrY_JH584301_random (259875 bp)
+chr chrY_JH584302_random (155838 bp)
+chr chrY_JH584303_random (158099 bp)
+
+Single-core mode: setting pid to 1
+
+Single-end alignments will be performed
+=======================================
+
+Input file is in FastQ format
+Writing a G -> A converted version of the input file input_1.fq to /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq
+
+Created G -> A converted version of the FastQ file input_1.fq (44115 sequences in total)
+
+Input file is input_1.fq_G_to_A.fastq (FastQ)
+
+Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpuqE7r1/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet
+
+Now starting the Bowtie 2 aligner for GAreadCTgenome (reading in sequences from /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq with options -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet --nofw)
+Using Bowtie 2 index: /tmp/tmpuqE7r1/Bisulfite_Genome/CT_conversion/BS_CT
+
+Found first alignment:	1_1	4	*	0	0	*	*	0	0	TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA	AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE	YT:Z:UU
+Now starting the Bowtie 2 aligner for GAreadGAgenome (reading in sequences from /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq with options -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet --norc)
+Using Bowtie 2 index: /tmp/tmpuqE7r1/Bisulfite_Genome/GA_conversion/BS_GA
+
+Found first alignment:	1_1	4	*	0	0	*	*	0	0	TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA	AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE	YT:Z:UU
+
+>>> Writing bisulfite mapping results to /tmp/tmpi3V3GI/results/input_1_bismark_bt2.bam <<<
+
+Unmapped sequences will be written to /tmp/tmpi3V3GI/results/input_1.fq_unmapped_reads.fq.gz
+Ambiguously mapping sequences will be written to /tmp/tmpi3V3GI/results/input_1.fq_ambiguous_reads.fq.gz
+
+Reading in the sequence file input_1.fq
+Processed 44115 sequences in total
+
+
+Successfully deleted the temporary file /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq
+
+Final Alignment report
+======================
+Sequences analysed in total:	44115
+Number of alignments with a unique best hit from the different alignments:	13
+Mapping efficiency:	0.0%
+
+Sequences with no alignments under any condition:	44059
+Sequences did not map uniquely:	43
+Sequences which were discarded because genomic sequence could not be extracted:	0
+
+Number of sequences with unique best (first) alignment came from the bowtie output:
+CT/CT:	0	((converted) top strand)
+CT/GA:	0	((converted) bottom strand)
+GA/CT:	11	(complementary to (converted) top strand)
+GA/GA:	2	(complementary to (converted) bottom strand)
+
+Final Cytosine Methylation Report
+=================================
+Total number of C's analysed:	307
+
+Total methylated C's in CpG context:	1
+Total methylated C's in CHG context:	3
+Total methylated C's in CHH context:	227
+Total methylated C's in Unknown context:	0
+
+Total unmethylated C's in CpG context:	1
+Total unmethylated C's in CHG context:	4
+Total unmethylated C's in CHH context:	71
+Total unmethylated C's in Unknown context:	0
+
+C methylated in CpG context:	50.0%
+C methylated in CHG context:	42.9%
+C methylated in CHH context:	76.2%
+Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0
+
+
+Bismark completed in xxxx
+
+====================
+Bismark run complete
+====================
+