Mercurial > repos > bgruening > bismark
changeset 17:aa9bf0f29a9f draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit d85012b50faac3234496bb51e2a29c2d8c113bde"
author | bgruening |
---|---|
date | Wed, 28 Aug 2019 07:08:45 -0400 |
parents | a4504327c890 |
children | bb74e17e47d7 |
files | bismark_bowtie2_wrapper.xml bismark_wrapper.py test-data/mapped_reads_pbat.bam test-data/mapping_report_pbat.txt test-data/summary_pbat.txt |
diffstat | 5 files changed, 624 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/bismark_bowtie2_wrapper.xml Wed Aug 21 12:59:48 2019 -0400 +++ b/bismark_bowtie2_wrapper.xml Wed Aug 28 07:08:45 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.22.1+galaxy3" profile="18.01"> +<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.22.1+galaxy4" profile="18.01"> <description>Bisulfite reads mapper</description> <requirements> <requirement type="package" version="0.22.1">bismark</requirement> @@ -12,11 +12,20 @@ #if $singlePaired.input_singles.ext == "fasta": #set read1 = 'input_1.fa' #elif $singlePaired.input_singles.ext in ["fastq.gz", "fastqsanger.gz"]: - #set read1 = 'input_1.fq.gz' + #if $params.pbat == "--pbat" + #set read1 = 'input_1.fq' + #else + #set read1 = 'input_1.fq.gz' + #end if #else #set read1 = 'input_1.fq' #end if - ln -s '${singlePaired.input_singles}' '${read1}' && + + #if $params.pbat == "--pbat" and $singlePaired.input_singles.ext in ["fastq.gz", "fastqsanger.gz"]: + zcat '${singlePaired.input_singles}' > '${read1}' && + #else + ln -s '${singlePaired.input_singles}' '${read1}' && + #end if #else: #set $mate1 = list() #set $mate2 = list() @@ -25,20 +34,38 @@ #if $mate_pair.input_mate1.ext == "fasta": #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fa' #elif $mate_pair.input_mate1.ext in ["fastq.gz", "fastqsanger.gz"]: - #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq.gz' + #if $params.pbat == "--pbat" + #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq' + #else + #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq.gz' + #end if #else #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq' #end if - ln -s '${mate_pair.input_mate1}' '${read1}' && + + #if $params.pbat == "--pbat" and $mate_pair.input_mate1.ext in ["fastq.gz", "fastqsanger.gz"]: + zcat '${mate_pair.input_mate1}' > '${read1}' && + #else + ln -s '${mate_pair.input_mate1}' '${read1}' && + #end if #if $mate_pair.input_mate2.ext == "fasta": #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fa' #elif $mate_pair.input_mate2.ext in ["fastq.gz", "fastqsanger.gz"]: - #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq.gz' + #if $params.pbat == "--pbat" + #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq' + #else + #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq.gz' + #end if #else #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq' #end if - ln -s '${mate_pair.input_mate2}' '${read2}' && + + #if $params.pbat == "--pbat" and $mate_pair.input_mate2.ext in ["fastq.gz", "fastqsanger.gz"]: + zcat '${mate_pair.input_mate2}' > '${read2}' && + #else + ln -s '${mate_pair.input_mate2}' '${read2}' && + #end if $mate1.append( str($read1) ) $mate2.append( str($read2) ) @@ -138,6 +165,8 @@ $params.non_directional + $params.pbat + #if $params.bismark_stdout: --stdout '$output_stdout' #end if @@ -289,6 +318,11 @@ <param argument="--non-directional" name="non_directional" type="boolean" truevalue="--non-directional" falsevalue="" checked="false" label="The sequencing library was constructed in a non strand-specific manner, alignments to all four bisulfite strands will be reported."/> + <param argument="--pbat" name="pbat" type="boolean" + truevalue="--pbat" falsevalue="" checked="false" + label="Treat input data as PBAT-Seq libraries" + help="Use this option only if you are certain that your libraries were constructed following a PBAT protocol. If you don't know what PBAT-Seq is you should not specify this option." /> + <!--end output options --> </when> <!-- full --> @@ -544,6 +578,38 @@ <has_text text="--score-min 'L,0,-0.8'" /> </assert_command> </test> + <test> + <param name="genomeSource" value="history"/> + <param name="own_file" value="mm10.tiny.fa.gz" /> + <param name="sPaired" value="single"/> + <param name="input_singles" value="input1.fq.gzip" ftype="fastqsanger.gz"/> + <param name="sort_bam" value="false"/> + <param name="settingsType" value="custom"/> + <param name="suppressed_read_file" value="true"/> + <param name="unmapped_read_file" value="true"/> + <param name="bismark_stdout" value="true"/> + <param name="isReportOutput" value="true"/> + <conditional name="params"> + <param name="settingsType" value="custom" /> + <param name="pbat" value="true" /> + </conditional> + + <output name="output_stdout" file="summary_pbat.txt" ftype="txt" lines_diff="94"> + <assert_contents> + <has_text text="Sequences analysed in total:" /> + <has_text text="44115" /> + <has_text text="Mapping efficiency:" /> + <has_text text="0.0%" /> + <has_text text="Bismark run complete" /> + </assert_contents> + </output> + <output name="report_file" file="mapping_report_pbat.txt" ftype="txt" lines_diff="6"/> + <output name="output" file="mapped_reads_pbat.bam" ftype="qname_input_sorted.bam" lines_diff="14"/> + + <assert_command> + <has_text text="--pbat" /> + </assert_command> + </test> </tests> <help>
--- a/bismark_wrapper.py Wed Aug 21 12:59:48 2019 -0400 +++ b/bismark_wrapper.py Wed Aug 28 07:08:45 2019 -0400 @@ -69,6 +69,7 @@ parser.add_argument('--fasta', action='store_true', help='Query filetype is in FASTA format') parser.add_argument('--phred64-quals', dest='phred64', action="store_true") parser.add_argument('--non-directional', dest='non_directional', action="store_true") + parser.add_argument('--pbat', dest='pbat', action="store_true") parser.add_argument('--skip-reads', dest='skip_reads', type=int) parser.add_argument('--score-min', dest='score_min', type=str) @@ -153,7 +154,9 @@ # Build bismark command tmp_bismark_dir = tempfile.mkdtemp() output_dir = os.path.join(tmp_bismark_dir, 'results') - cmd = ['bismark', '--bam', '--gzip', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet'] + cmd = ['bismark', '--bam', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet'] + if not args.pbat: + cmd.append('--gzip') if args.fasta: # the query input files (specified as mate1,mate2 or singles) are FastA @@ -187,6 +190,8 @@ cmd.append('--phred64-quals') if args.non_directional: cmd.append('--non-directional') + if args.pbat: + cmd.append('--pbat') if args.suppress_header: cmd.append('--sam-no-hd') if args.output_unmapped_reads or (args.output_unmapped_reads_l and args.output_unmapped_reads_r): @@ -223,24 +228,24 @@ output_report_file.close() if args.output_suppressed_reads: - if glob(os.path.join(output_dir, '*ambiguous_reads.txt')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.txt'))[0], args.output_suppressed_reads) + if glob(os.path.join(output_dir, '*ambiguous_reads.*')): + shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.*'))[0], args.output_suppressed_reads) if args.output_suppressed_reads_l: - if glob(os.path.join(output_dir, '*ambiguous_reads_1.txt')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.txt'))[0], args.output_suppressed_reads_l) + if glob(os.path.join(output_dir, '*ambiguous_reads_1.*')): + shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.*'))[0], args.output_suppressed_reads_l) if args.output_suppressed_reads_r: - if glob(os.path.join(output_dir, '*ambiguous_reads_2.txt')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.txt'))[0], args.output_suppressed_reads_r) + if glob(os.path.join(output_dir, '*ambiguous_reads_2.*')): + shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.*'))[0], args.output_suppressed_reads_r) if args.output_unmapped_reads: - if glob(os.path.join(output_dir, '*unmapped_reads.txt')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.txt'))[0], args.output_unmapped_reads) + if glob(os.path.join(output_dir, '*unmapped_reads.*')): + shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.*'))[0], args.output_unmapped_reads) if args.output_unmapped_reads_l: - if glob(os.path.join(output_dir, '*unmapped_reads_1.txt')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.txt'))[0], args.output_unmapped_reads_l) + if glob(os.path.join(output_dir, '*unmapped_reads_1.*')): + shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.*'))[0], args.output_unmapped_reads_l) if args.output_unmapped_reads_r: - if glob(os.path.join(output_dir, '*unmapped_reads_2.txt')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.txt'))[0], args.output_unmapped_reads_r) + if glob(os.path.join(output_dir, '*unmapped_reads_2.*')): + shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.*'))[0], args.output_unmapped_reads_r) try: # merge all bam files
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapping_report_pbat.txt Wed Aug 28 07:08:45 2019 -0400 @@ -0,0 +1,40 @@ +Bismark report for: input_1.fq (version: v0.22.1) +Option '--pbat' specified: alignments to original strands (OT and OB) strands were ignored (i.e. not performed) +Bismark was run with Bowtie 2 against the bisulfite genome of /tmp/tmpuqE7r1/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet + +Final Alignment report +====================== +Sequences analysed in total: 44115 +Number of alignments with a unique best hit from the different alignments: 13 +Mapping efficiency: 0.0% +Sequences with no alignments under any condition: 44059 +Sequences did not map uniquely: 43 +Sequences which were discarded because genomic sequence could not be extracted: 0 + +Number of sequences with unique best (first) alignment came from the bowtie output: +CT/CT: 0 ((converted) top strand) +CT/GA: 0 ((converted) bottom strand) +GA/CT: 11 (complementary to (converted) top strand) +GA/GA: 2 (complementary to (converted) bottom strand) + +Final Cytosine Methylation Report +================================= +Total number of C's analysed: 307 + +Total methylated C's in CpG context: 1 +Total methylated C's in CHG context: 3 +Total methylated C's in CHH context: 227 +Total methylated C's in Unknown context: 0 + +Total unmethylated C's in CpG context: 1 +Total unmethylated C's in CHG context: 4 +Total unmethylated C's in CHH context: 71 +Total unmethylated C's in Unknown context: 0 + +C methylated in CpG context: 50.0% +C methylated in CHG context: 42.9% +C methylated in CHH context: 76.2% +Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0 + + +Bismark completed in 0d 0h 0m 7s
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/summary_pbat.txt Wed Aug 28 07:08:45 2019 -0400 @@ -0,0 +1,493 @@ +Create a temporary index with the offered files from the user. Utilizing the script: bismark_genome_preparation +Generating index with: 'bismark_genome_preparation --bowtie2 /tmp/tmpuqE7r1' +Writing bisulfite genomes out into a single MFA (multi FastA) file + +Bisulfite Genome Indexer version v0.22.1 (last modified: 14 April 2019) + +Step I - Prepare genome folders - completed + + + +Total number of conversions performed: +C->T: 146875 +G->A: 150504 + +Step II - Genome bisulfite conversions - completed + + +Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer +Please be aware that this process can - depending on genome size - take several hours! +Settings: + Output files: "BS_CT.*.bt2" + Line rate: 6 (line is 64 bytes) + Lines per side: 1 (side is 64 bytes) + Offset rate: 4 (one in 16) + FTable chars: 10 + Strings: unpacked + Max bucket size: default + Max bucket size, sqrt multiplier: default + Max bucket size, len divisor: 4 + Difference-cover sample period: 1024 + Endianness: little + Actual local endianness: little + Sanity checking: disabled + Assertions: disabled + Random seed: 0 + Sizeofs: void*:8, int:4, long:8, size_t:8 +Input files DNA, FASTA: + genome_mfa.CT_conversion.fa +Building a SMALL index +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: xxxx +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 235897 +fchr[G]: 235897 +fchr[T]: 386401 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_CT.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_CT.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 0 +Total time for call to driver() for forward index: xxxx +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 + Time to reverse reference sequence: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: xxxx +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 235897 +fchr[G]: 235897 +fchr[T]: 386401 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_CT.rev.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_CT.rev.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 1 +Total time for backward call to driver() for mirror index: 00:00:01 +Settings: + Output files: "BS_GA.*.bt2" + Line rate: 6 (line is 64 bytes) + Lines per side: 1 (side is 64 bytes) + Offset rate: 4 (one in 16) + FTable chars: 10 + Strings: unpacked + Max bucket size: default + Max bucket size, sqrt multiplier: default + Max bucket size, len divisor: 4 + Difference-cover sample period: 1024 + Endianness: little + Actual local endianness: little + Sanity checking: disabled + Assertions: disabled + Random seed: 0 + Sizeofs: void*:8, int:4, long:8, size_t:8 +Input files DNA, FASTA: + genome_mfa.GA_conversion.fa +Building a SMALL index +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: xxxx +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 386401 +fchr[G]: 533276 +fchr[T]: 533276 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_GA.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_GA.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 0 +Total time for call to driver() for forward index: xxxx +Reading reference sizes + Time reading reference sizes: 00:00:00 +Calculating joined length +Writing header +Reserving space for joined string +Joining reference sequences + Time to join reference sequences: 00:00:00 + Time to reverse reference sequence: 00:00:00 +bmax according to bmaxDivN setting: 189039 +Using parameters --bmax 141780 --dcv 1024 + Doing ahead-of-time memory usage test + Passed! Constructing with these parameters: --bmax 141780 --dcv 1024 +Constructing suffix-array element generator +Building DifferenceCoverSample + Building sPrime + Building sPrimeOrder + V-Sorting samples + V-Sorting samples time: 00:00:00 + Allocating rank array + Ranking v-sort output + Ranking v-sort output time: 00:00:00 + Invoking Larsson-Sadakane on ranks + Invoking Larsson-Sadakane on ranks time: 00:00:00 + Sanity-checking and returning +Building samples +Reserving space for 12 sample suffixes +Generating random suffixes +QSorting 12 sample offsets, eliminating duplicates +QSorting sample offsets, eliminating duplicates time: 00:00:00 +Multikey QSorting 12 samples + (Using difference cover) + Multikey QSorting samples time: 00:00:00 +Calculating bucket sizes +Splitting and merging + Splitting and merging time: 00:00:00 +Avg bucket size: 756159 (target: 141779) +Converting suffix-array elements to index image +Allocating ftab, absorbFtab +Entering Ebwt loop +Getting block 1 of 1 + No samples; assembling all-inclusive block + Sorting block of length 756159 for bucket 1 + (Using difference cover) + Sorting block time: xxxx +Returning block of 756160 for bucket 1 +Exited Ebwt loop +fchr[A]: 0 +fchr[C]: 386401 +fchr[G]: 533276 +fchr[T]: 533276 +fchr[$]: 756159 +Exiting Ebwt::buildToDisk() +Returning from initFromVector +Wrote 4446745 bytes to primary EBWT file: BS_GA.rev.1.bt2 +Wrote 189044 bytes to secondary EBWT file: BS_GA.rev.2.bt2 +Re-opening _in1 and _in2 as input streams +Returning from Ebwt constructor +Headers: + len: 756159 + bwtLen: 756160 + sz: 189040 + bwtSz: 189040 + lineRate: 6 + offRate: 4 + offMask: 0xfffffff0 + ftabChars: 10 + eftabLen: 20 + eftabSz: 80 + ftabLen: 1048577 + ftabSz: 4194308 + offsLen: 47260 + offsSz: 189040 + lineSz: 64 + sideSz: 64 + sideBwtSz: 48 + sideBwtLen: 192 + numSides: 3939 + numLines: 3939 + ebwtTotLen: 252096 + ebwtTotSz: 252096 + color: 0 + reverse: 1 +Total time for backward call to driver() for mirror index: 00:00:01 +Running bismark with: 'bismark --bam --temp_dir /tmp/tmpi3V3GI -o /tmp/tmpi3V3GI/results --quiet --fastq -L 20 -D 15 -R 2 --pbat --un --ambiguous /tmp/tmpuqE7r1 input_1.fq' +Bowtie 2 seems to be working fine (tested command 'bowtie2 --version' [2.3.5]) +Output format is BAM (default) +Alignments will be written out in BAM format. Samtools found here: '/home/abretaud/miniconda3/envs/mulled-v1-9f2317dbfb405ed6926c55752e5c11678eee3256a6ea680d1c0f912251153030/bin/samtools' +Reference genome folder provided is /tmp/tmpuqE7r1/ (absolute path is '/tmp/tmpuqE7r1/)' +FastQ format specified + +Input files to be analysed (in current folder '/tmp/tmpU_oiEI/job_working_directory/000/4/working'): +input_1.fq +Library was specified as PBAT-Seq (Post-Bisulfite Adapter Tagging), only performing alignments to the complementary strands (CTOT and CTOB) +Created output directory /tmp/tmpi3V3GI/results/! + +Output will be written into the directory: /tmp/tmpi3V3GI/results/ + +Using temp directory: /tmp/tmpi3V3GI +Temporary files will be written into the directory: /tmp/tmpi3V3GI/ +Setting parallelization to single-threaded (default) + +Summary of all aligner options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet +Current working directory is: /tmp/tmpU_oiEI/job_working_directory/000/4/working + +Now reading in and storing sequence information of the genome specified in: /tmp/tmpuqE7r1/ + +chr chrY_JH584300_random (182347 bp) +chr chrY_JH584301_random (259875 bp) +chr chrY_JH584302_random (155838 bp) +chr chrY_JH584303_random (158099 bp) + +Single-core mode: setting pid to 1 + +Single-end alignments will be performed +======================================= + +Input file is in FastQ format +Writing a G -> A converted version of the input file input_1.fq to /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq + +Created G -> A converted version of the FastQ file input_1.fq (44115 sequences in total) + +Input file is input_1.fq_G_to_A.fastq (FastQ) + +Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpuqE7r1/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet + +Now starting the Bowtie 2 aligner for GAreadCTgenome (reading in sequences from /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq with options -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet --nofw) +Using Bowtie 2 index: /tmp/tmpuqE7r1/Bisulfite_Genome/CT_conversion/BS_CT + +Found first alignment: 1_1 4 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UU +Now starting the Bowtie 2 aligner for GAreadGAgenome (reading in sequences from /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq with options -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet --norc) +Using Bowtie 2 index: /tmp/tmpuqE7r1/Bisulfite_Genome/GA_conversion/BS_GA + +Found first alignment: 1_1 4 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UU + +>>> Writing bisulfite mapping results to /tmp/tmpi3V3GI/results/input_1_bismark_bt2.bam <<< + +Unmapped sequences will be written to /tmp/tmpi3V3GI/results/input_1.fq_unmapped_reads.fq.gz +Ambiguously mapping sequences will be written to /tmp/tmpi3V3GI/results/input_1.fq_ambiguous_reads.fq.gz + +Reading in the sequence file input_1.fq +Processed 44115 sequences in total + + +Successfully deleted the temporary file /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq + +Final Alignment report +====================== +Sequences analysed in total: 44115 +Number of alignments with a unique best hit from the different alignments: 13 +Mapping efficiency: 0.0% + +Sequences with no alignments under any condition: 44059 +Sequences did not map uniquely: 43 +Sequences which were discarded because genomic sequence could not be extracted: 0 + +Number of sequences with unique best (first) alignment came from the bowtie output: +CT/CT: 0 ((converted) top strand) +CT/GA: 0 ((converted) bottom strand) +GA/CT: 11 (complementary to (converted) top strand) +GA/GA: 2 (complementary to (converted) bottom strand) + +Final Cytosine Methylation Report +================================= +Total number of C's analysed: 307 + +Total methylated C's in CpG context: 1 +Total methylated C's in CHG context: 3 +Total methylated C's in CHH context: 227 +Total methylated C's in Unknown context: 0 + +Total unmethylated C's in CpG context: 1 +Total unmethylated C's in CHG context: 4 +Total unmethylated C's in CHH context: 71 +Total unmethylated C's in Unknown context: 0 + +C methylated in CpG context: 50.0% +C methylated in CHG context: 42.9% +C methylated in CHH context: 76.2% +Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0 + + +Bismark completed in xxxx + +==================== +Bismark run complete +==================== +