Mercurial > repos > devteam > samtools_stats
changeset 8:e28839a4b932 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_stats commit 0f75269223c0821c6c82acf98fde947d0f816f2b"
author | iuc |
---|---|
date | Tue, 28 Sep 2021 16:17:39 +0000 |
parents | 145f6d74ff5e |
children | 1cc79f49b8d5 |
files | macros.xml samtools_stats.xml test-data/1.stats.expected test-data/11.stats.expected test-data/11.stats.g4.expected test-data/12.2reads.nooverlap.expected test-data/12.2reads.overlap.expected test-data/12.3reads.nooverlap.expected test-data/12.3reads.overlap.expected test-data/2.stats.expected test-data/6.stats.expected test-data/samtools_stats_out1.tab test-data/samtools_stats_out1__sn.tab |
diffstat | 13 files changed, 1084 insertions(+), 92 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Oct 17 02:21:23 2019 -0400 +++ b/macros.xml Tue Sep 28 16:17:39 2021 +0000 @@ -5,10 +5,16 @@ <yield/> </requirements> </xml> - <token name="@TOOL_VERSION@">1.9</token> - <token name="@FLAGS@">#set $flags = sum(map(int, str($filter).split(',')))</token> + <token name="@TOOL_VERSION@">1.13</token> + <token name="@PROFILE@">20.05</token> + <token name="@FLAGS@"><![CDATA[ + #set $flags = 0 + #if $filter + #set $flags = sum(map(int, str($filter).split(','))) + #end if + ]]></token> <token name="@PREPARE_IDX@"><![CDATA[ - ##prepare input and indices + ##prepare input and indices ln -s '$input' infile && #if $input.is_of_type('bam'): #if str( $input.metadata.bam_index ) != "None": @@ -25,7 +31,7 @@ #end if ]]></token> <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[ - ##prepare input and indices + ##prepare input and indices #for $i, $bam in enumerate( $input_bams ): ln -s '$bam' '${i}' && #if $bam.is_of_type('bam'): @@ -63,6 +69,51 @@ #set reffai=None #end if ]]></token> + + <xml name="optional_reference"> + <conditional name="addref_cond"> + <param name="addref_select" type="select" label="Use a reference sequence"> + <help>@HELP@</help> + <option value="no">No</option> + <option value="history">Use a genome/index from the history</option> + <option value="cached">Use a built-in genome</option> + </param> + <when value="no"/> + <when value="history"> + <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/> + </when> + <when value="cached"> + <param name="ref" argument="@ARGUMENT@" type="select" label="Reference"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/> + </options> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/> + </param> + </when> + </conditional> + </xml> + <xml name="mandatory_reference" token_help="" token_argument=""> + <conditional name="addref_cond"> + <param name="addref_select" type="select" label="Use a reference sequence"> + <help>@HELP@</help> + <option value="history">Use a genome/index from the history</option> + <option value="cached">Use a built-in genome</option> + </param> + <when value="history"> + <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/> + </when> + <when value="cached"> + <param name="ref" argument="@ARGUMENT@" type="select" label="Reference"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/> + <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> + </options> + </param> + </when> + </conditional> + </xml> + + <token name="@ADDTHREADS@"><![CDATA[ ##compute the number of ADDITIONAL threads to be used by samtools (-@) addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) && @@ -70,28 +121,28 @@ <token name="@ADDMEMORY@"><![CDATA[ ##compute the number of memory available to samtools sort (-m) ##use only 75% of available: https://github.com/samtools/samtools/issues/831 - addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && + addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && ((addmemory=addmemory*75/100)) && ]]></token> <xml name="seed_input"> - <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> + <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> </xml> - <xml name="flag_options"> - <option value="1">read is paired</option> - <option value="2">read is mapped in a proper pair</option> - <option value="4">read is unmapped</option> - <option value="8">mate is unmapped</option> - <option value="16">read reverse strand</option> - <option value="32">mate reverse strand</option> - <option value="64">read is the first in a pair</option> - <option value="128">read is the second in a pair</option> - <option value="256">alignment or read is not primary</option> - <option value="512">read fails platform/vendor quality checks</option> - <option value="1024">read is a PCR or optical duplicate</option> - <option value="2048">supplementary alignment</option> + <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false"> + <option value="1" selected="@S1@">Read is paired</option> + <option value="2" selected="@S2@">Read is mapped in a proper pair</option> + <option value="4" selected="@S4@">Read is unmapped</option> + <option value="8" selected="@S8@">Mate is unmapped</option> + <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option> + <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option> + <option value="64" selected="@S64@">Read is the first in a pair</option> + <option value="128" selected="@S128@">Read is the second in a pair</option> + <option value="256" selected="@S256@">Alignment of the read is not primary</option> + <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option> + <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option> + <option value="2048" selected="@S2048@">Alignment is supplementary</option> </xml> - <!-- region specification macros and tokens for tools that allow the specification + <!-- region specification macros and tokens for tools that allow the specification of region by bed file / space separated list of regions --> <token name="@REGIONS_FILE@"><![CDATA[ #if $cond_region.select_region == 'tab':
--- a/samtools_stats.xml Thu Oct 17 02:21:23 2019 -0400 +++ b/samtools_stats.xml Tue Sep 28 16:17:39 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="samtools_stats" name="Samtools stats" version="2.0.2+galaxy2"> +<tool id="samtools_stats" name="Samtools stats" version="2.0.3" profile="@PROFILE@"> <description>generate statistics for BAM dataset</description> <macros> <import>macros.xml</import> @@ -16,16 +16,12 @@ #end if ${remove_dups} #if str( $filter_by_flags.filter_flags ) == "filter": - #if $filter_by_flags.require_flags: - #set $filter = $filter_by_flags.require_flags - @FLAGS@ - --required-flag $flags - #end if - #if $filter_by_flags.exclude_flags: - #set $filter = $filter_by_flags.exclude_flags - @FLAGS@ - --filtering-flag $flags - #end if + #set $filter = $filter_by_flags.require_flags + @FLAGS@ + --required-flag $flags + #set $filter = $filter_by_flags.exclude_flags + @FLAGS@ + --filtering-flag $flags #end if #if str($gc_depth): --GC-depth ${gc_depth} @@ -33,12 +29,9 @@ #if str($insert_size): --insert-size ${insert_size} #end if - ## The code below is commented out because using -I/--id options causes - ## in samtools up to 1.9 the following exception - ## Samtools-htslib: init_group_id() header parsing not yet implemented - ##if str($read_group) != "": - ## -I "${read_group}" - ##end if + ## #if $read_group + ## -I '$read_group' + ## #end if #if str($read_length): --read-length ${read_length} #end if @@ -63,7 +56,7 @@ #if str($cov_threshold): -g $cov_threshold #end if - -@ \$addthreads + -@ \$addthreads infile @REGIONS_MANUAL@ > '$output' @@ -139,40 +132,19 @@ <!-- TODO I would like to set the default values of float and int parameters as on the samtools stats help page, but then the tests don't work. Hence I leave the optional and give the defaults in the help --> <param name="gc_depth" argument="--GC-depth" type="float" optional="True" label="Size of GC-depth bins" help="Decreasing bin size increases memory requirement. default=2e4" /> <param name="insert_size" argument="--insert-size" type="integer" optional="True" label="Maximum insert size" help="default=8000" /> - <!-- - The -I option of samtools stats returns the following message up to version 1.9: - Samtools-htslib: init_group_id() header parsing not yet implemented - Because of this the section below is commented out until this stats bug is fixed - <param name="read_group" type="select" optional="true" label="Limit to a specific read group name" > - <options> - <filter type="data_meta" ref="input" key="read_groups" /> - </options> - </param> - --> + <!-- TOOD https://github.com/samtools/samtools/issues/1489 --> + <param name="read_group" argument="--id" type="select" optional="true" label="Limit to a specific read group name" > + <options> + <filter type="data_meta" ref="input" key="read_groups" /> + </options> + </param> <param name="read_length" argument="--read-length" type="integer" optional="true" label="Minimum read length to generate statistics for" help="No cutoff if left empty" /> <param name="most_inserts" argument="--most-inserts" type="float" optional="true" label="Report only the main part of inserts" help="default=0.99" /> <param name="trim_quality" argument="--trim-quality" type="integer" optional="true" label="BWA trim parameter" help="default=0" /> - <conditional name="addref_cond"> - <param name="addref_select" type="select" label="Use a reference sequence" help="Required for GC-depth and mismatches-per-cycle calculation"> - <option value="no">No</option> - <option value="cached">Locally cached</option> - <option value="history">History</option> - </param> - <when value="no"/> - <when value="cached"> - <param name="ref" type="select" label="Using genome"> - <options from_data_table="fasta_indexes"> - <filter type="data_meta" ref="input" key="dbkey" column="dbkey" /> - </options> - </param> - </when> - <when value="history"> - <param name="ref" type="data" format="fasta" label="Using file" /> - </when> - </conditional> + <expand macro="optional_reference" argument="--ref-seq" help="Required for GC-depth and mismatches-per-cycle calculation"/> <!-- unfortunately -t takes tabular and not bed like view (otherwise a macro might have simplified this) --> - + <expand macro="regions_macro"/> <param name="sparse" argument="-x/--sparse" type="boolean" truevalue="-x" falsevalue="" checked="False" label="Suppress absence of insertions" help="Suppress outputting IS rows where there are no insertions."/> @@ -198,7 +170,7 @@ <param name="addref_select" value="history" /> <param name="ref" value="test.fa" ftype="fasta" /> </conditional> - <output name="output" file="1.stats.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="1.stats.expected" ftype="tabular" lines_diff="2" /> </test> <!-- test_cmd($opts,out=>'stat/1.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/1_map_cigar.sam | tail -n+4", exp_fix=>$efix);--> <test> @@ -207,7 +179,7 @@ <param name="addref_select" value="history" /> <param name="ref" value="test.fa" ftype="fasta" /> </conditional> - <output name="output" file="2.stats.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="2.stats.expected" ftype="tabular" lines_diff="2" /> </test> <!-- test_cmd($opts,out=>'stat/2.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/2_equal_cigar_full_seq.sam | tail -n+4", exp_fix=>$efix);--> <!-- test_cmd($opts,out=>'stat/3.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/3_map_cigar_equal_seq.sam | tail -n+4", exp_fix=>$efix);--> @@ -220,7 +192,7 @@ <param name="addref_select" value="history" /> <param name="ref" value="test.fa" ftype="fasta" /> </conditional> - <output name="output" file="6.stats.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="6.stats.expected" ftype="tabular" lines_diff="2" /> </test> <!-- test_cmd($opts,out=>'stat/6.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa -i 0 $$opts{path}/stat/5_insert_cigar.sam | tail -n+4", exp_fix=>$efix); --> <!-- test_cmd($opts,out=>'stat/7.stats.expected',cmd=>"$$opts{bin}/samtools stats -r $$opts{path}/stat/test.fa $$opts{path}/stat/7_supp.sam | tail -n+4", exp_fix=>$efix); --> @@ -236,7 +208,7 @@ <param name="select_region" value="tab"/> <param name="targetregions" value="11.stats.targets" ftype="tabular" /> </conditional> - <output name="output" file="11.stats.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="11.stats.expected" ftype="tabular" lines_diff="4" /> </test> <!-- test_cmd($opts,out=>'stat/11.stats.expected',cmd=>"$$opts{bin}/samtools stats -t $$opts{path}/stat/11.stats.targets $$opts{path}/stat/11_target.sam | tail -n+4", exp_fix=>$efix); --> @@ -251,7 +223,7 @@ <param name="regions_repeat_1|region" value="ref1:30-46"/> <param name="regions_repeat_2|region" value="ref1:39-56"/> </conditional> - <output name="output" file="11.stats.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="11.stats.expected" ftype="tabular" lines_diff="2" /> </test> <!-- test_cmd($opts,out=>'stat/11.stats.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/11_target.bam ref1:10-24 ref1:30-46 ref1:39-56 | tail -n+4", exp_fix=>$efix); --> @@ -265,7 +237,7 @@ <param name="targetregions" value="11.stats.targets" ftype="tabular" /> </conditional> <param name="cov_threshold" value="4" /> - <output name="output" file="11.stats.g4.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="11.stats.g4.expected" ftype="tabular" lines_diff="4" /> </test> <!-- test_cmd($opts,out=>'stat/11.stats.g4.expected',cmd=>"$$opts{bin}/samtools stats -g 4 -t $$opts{path}/stat/11.stats.targets $$opts{path}/stat/11_target.sam | tail -n+4", exp_fix=>$efix);--> <test> @@ -280,7 +252,7 @@ <param name="regions_repeat_2|region" value="ref1:39-56"/> </conditional> <param name="cov_threshold" value="4" /> - <output name="output" file="11.stats.g4.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="11.stats.g4.expected" ftype="tabular" lines_diff="2" /> </test> <!-- test_cmd($opts,out=>'stat/11.stats.g4.expected',cmd=>"$$opts{bin}/samtools stats -g 4 $$opts{path}/stat/11_target.bam ref1:10-24 ref1:30-46 ref1:39-56 | tail -n+4", exp_fix=>$efix); --> <test> @@ -292,7 +264,7 @@ <param name="select_region" value="tab"/> <param name="targetregions" value="12_3reads.bed" ftype="tabular" /> </conditional> - <output name="output" file="12.3reads.overlap.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="12.3reads.overlap.expected" ftype="tabular" lines_diff="4" /> </test> <!-- test_cmd($opts,out=>'stat/12.3reads.overlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -t $$opts{path}/stat/12_3reads.bed | tail -n+4", exp_fix=>$efix);--> <test> @@ -305,7 +277,7 @@ <param name="targetregions" value="12_3reads.bed" ftype="tabular" /> </conditional> <param name="remove_overlaps" value="-p"/> - <output name="output" file="12.3reads.nooverlap.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="12.3reads.nooverlap.expected" ftype="tabular" lines_diff="4" /> </test> <!-- test_cmd($opts,out=>'stat/12.3reads.nooverlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -p -t $$opts{path}/stat/12_3reads.bed | tail -n+4", exp_fix=>$efix);--> <test> @@ -317,7 +289,7 @@ <param name="select_region" value="tab"/> <param name="targetregions" value="12_2reads.bed" ftype="tabular" /> </conditional> - <output name="output" file="12.2reads.overlap.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="12.2reads.overlap.expected" ftype="tabular" lines_diff="4" /> </test> <!-- test_cmd($opts,out=>'stat/12.2reads.overlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -t $$opts{path}/stat/12_2reads.bed | tail -n+4", exp_fix=>$efix);--> <test> @@ -330,7 +302,7 @@ <param name="targetregions" value="12_2reads.bed" ftype="tabular" /> </conditional> <param name="remove_overlaps" value="-p"/> - <output name="output" file="12.2reads.nooverlap.expected" ftype="tabular" lines_diff="3" /> + <output name="output" file="12.2reads.nooverlap.expected" ftype="tabular" lines_diff="4" /> </test> <!-- test_cmd($opts,out=>'stat/12.2reads.nooverlap.expected',cmd=>"$$opts{bin}/samtools stats $$opts{path}/stat/12_overlaps.bam -p -t $$opts{path}/stat/12_2reads.bed | tail -n+4", exp_fix=>$efix);--> <test> @@ -357,6 +329,18 @@ <element name="Summary Numbers" ftype="tabular" file="samtools_stats_out1__sn.tab" /> </output_collection> </test> + <!-- test filtering by read group --> + <!-- <test> + <param name="input" value="11_target.sam" ftype="sam" /> + <conditional name="addref_cond"> + <param name="addref_select" value="no" /> + </conditional> + <param name="read_group" value="grp1" /> + <output name="output" file="11.stats.read_groups.expected" ftype="tabular" lines_diff="2" /> + <assert_command> + <has_text text="-I 'grp1'"/> + </assert_command> + </test> --> </tests> <help><![CDATA[ **What it does** @@ -367,4 +351,3 @@ ]]></help> <expand macro="citations"/> </tool> -
--- a/test-data/1.stats.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/1.stats.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 1a1c1362 29c426ae 7bab45da # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 2 +SN raw total sequences: 2 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 2 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 70 # ignores clipping SN total first fragment length: 35 # ignores clipping SN total last fragment length: 35 # ignores clipping @@ -195,6 +199,42 @@ GCC 33 50.00 0.00 0.00 50.00 0.00 0.00 GCC 34 50.00 0.00 50.00 0.00 0.00 0.00 GCC 35 0.00 0.00 50.00 50.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 0.00 50.00 0.00 50.00 +GCT 2 50.00 0.00 50.00 0.00 +GCT 3 0.00 0.00 50.00 50.00 +GCT 4 0.00 0.00 100.00 0.00 +GCT 5 0.00 0.00 50.00 50.00 +GCT 6 0.00 50.00 0.00 50.00 +GCT 7 0.00 0.00 50.00 50.00 +GCT 8 0.00 0.00 0.00 100.00 +GCT 9 0.00 50.00 50.00 0.00 +GCT 10 50.00 0.00 50.00 0.00 +GCT 11 50.00 0.00 0.00 50.00 +GCT 12 50.00 0.00 50.00 0.00 +GCT 13 50.00 0.00 50.00 0.00 +GCT 14 0.00 0.00 0.00 100.00 +GCT 15 100.00 0.00 0.00 0.00 +GCT 16 50.00 0.00 0.00 50.00 +GCT 17 0.00 0.00 50.00 50.00 +GCT 18 0.00 50.00 50.00 0.00 +GCT 19 0.00 100.00 0.00 0.00 +GCT 20 0.00 0.00 50.00 50.00 +GCT 21 0.00 0.00 100.00 0.00 +GCT 22 0.00 50.00 0.00 50.00 +GCT 23 50.00 0.00 0.00 50.00 +GCT 24 50.00 0.00 50.00 0.00 +GCT 25 50.00 0.00 50.00 0.00 +GCT 26 0.00 0.00 100.00 0.00 +GCT 27 50.00 0.00 0.00 50.00 +GCT 28 0.00 0.00 50.00 50.00 +GCT 29 0.00 50.00 0.00 50.00 +GCT 30 0.00 50.00 0.00 50.00 +GCT 31 0.00 50.00 50.00 0.00 +GCT 32 0.00 0.00 100.00 0.00 +GCT 33 100.00 0.00 0.00 0.00 +GCT 34 0.00 0.00 50.00 50.00 +GCT 35 50.00 0.00 50.00 0.00 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -231,6 +271,8 @@ FBC 33 100.00 0.00 0.00 0.00 0.00 0.00 FBC 34 0.00 0.00 100.00 0.00 0.00 0.00 FBC 35 0.00 0.00 100.00 0.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 6 5 15 9 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 100.00 0.00 0.00 0.00 LBC 2 0.00 0.00 0.00 100.00 0.00 0.00 @@ -267,6 +309,8 @@ LBC 33 0.00 0.00 0.00 100.00 0.00 0.00 LBC 34 100.00 0.00 0.00 0.00 0.00 0.00 LBC 35 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 11 10 5 9 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/11.stats.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/11.stats.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56 # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK cb2d2d82 bcd83869 62ec814e # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 26 +SN raw total sequences: 26 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 26 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 1 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 260 # ignores clipping SN total first fragment length: 140 # ignores clipping SN total last fragment length: 120 # ignores clipping @@ -89,6 +93,17 @@ GCC 8 26.92 23.08 38.46 11.54 0.00 0.00 GCC 9 23.08 26.92 26.92 23.08 0.00 0.00 GCC 10 23.08 23.08 38.46 15.38 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 26.92 26.92 30.77 15.38 +GCT 2 7.69 38.46 26.92 26.92 +GCT 3 26.92 26.92 30.77 15.38 +GCT 4 11.54 34.62 26.92 26.92 +GCT 5 23.08 26.92 38.46 11.54 +GCT 6 11.54 34.62 23.08 30.77 +GCT 7 19.23 23.08 38.46 19.23 +GCT 8 11.54 38.46 23.08 26.92 +GCT 9 23.08 19.23 34.62 23.08 +GCT 10 11.54 34.62 26.92 26.92 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 21.43 42.86 21.43 14.29 0.00 0.00 FBC 2 7.14 28.57 42.86 21.43 0.00 0.00 @@ -100,6 +115,8 @@ FBC 8 21.43 28.57 28.57 21.43 0.00 0.00 FBC 9 21.43 21.43 35.71 21.43 0.00 0.00 FBC 10 14.29 28.57 35.71 21.43 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 24 43 45 28 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 16.67 41.67 8.33 33.33 0.00 0.00 LBC 2 33.33 8.33 50.00 8.33 0.00 0.00 @@ -111,6 +128,19 @@ LBC 8 33.33 16.67 50.00 0.00 0.00 0.00 LBC 9 25.00 33.33 16.67 25.00 0.00 0.00 LBC 10 33.33 16.67 41.67 8.33 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 30 33 36 21 0 +# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%] +BCC1 1 50.00 0.00 0.00 50.00 0.00 +BCC1 2 0.00 50.00 50.00 0.00 0.00 +BCC1 3 0.00 50.00 50.00 0.00 0.00 +BCC1 4 50.00 0.00 0.00 50.00 0.00 +# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part. +# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number. +QTQ1 1 +QTQ1 2 +QTQ1 3 +QTQ1 4 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/11.stats.g4.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/11.stats.g4.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats -g 4 -@ 0 infile ref1:10-24 ref1:30-46 ref1:39-56 # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK cb2d2d82 bcd83869 62ec814e # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 26 +SN raw total sequences: 26 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 26 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 1 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 260 # ignores clipping SN total first fragment length: 140 # ignores clipping SN total last fragment length: 120 # ignores clipping @@ -89,6 +93,17 @@ GCC 8 26.92 23.08 38.46 11.54 0.00 0.00 GCC 9 23.08 26.92 26.92 23.08 0.00 0.00 GCC 10 23.08 23.08 38.46 15.38 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 26.92 26.92 30.77 15.38 +GCT 2 7.69 38.46 26.92 26.92 +GCT 3 26.92 26.92 30.77 15.38 +GCT 4 11.54 34.62 26.92 26.92 +GCT 5 23.08 26.92 38.46 11.54 +GCT 6 11.54 34.62 23.08 30.77 +GCT 7 19.23 23.08 38.46 19.23 +GCT 8 11.54 38.46 23.08 26.92 +GCT 9 23.08 19.23 34.62 23.08 +GCT 10 11.54 34.62 26.92 26.92 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 21.43 42.86 21.43 14.29 0.00 0.00 FBC 2 7.14 28.57 42.86 21.43 0.00 0.00 @@ -100,6 +115,8 @@ FBC 8 21.43 28.57 28.57 21.43 0.00 0.00 FBC 9 21.43 21.43 35.71 21.43 0.00 0.00 FBC 10 14.29 28.57 35.71 21.43 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 24 43 45 28 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 16.67 41.67 8.33 33.33 0.00 0.00 LBC 2 33.33 8.33 50.00 8.33 0.00 0.00 @@ -111,6 +128,19 @@ LBC 8 33.33 16.67 50.00 0.00 0.00 0.00 LBC 9 25.00 33.33 16.67 25.00 0.00 0.00 LBC 10 33.33 16.67 41.67 8.33 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 30 33 36 21 0 +# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%] +BCC1 1 50.00 0.00 0.00 50.00 0.00 +BCC1 2 0.00 50.00 50.00 0.00 0.00 +BCC1 3 0.00 50.00 50.00 0.00 0.00 +BCC1 4 50.00 0.00 0.00 50.00 0.00 +# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part. +# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number. +QTQ1 1 +QTQ1 2 +QTQ1 3 +QTQ1 4 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/12.2reads.nooverlap.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/12.2reads.nooverlap.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats -t /tmp/tmp0r5zs075/files/b/2/1/dataset_b2175431-044e-449d-8f60-1bfd33679b61.dat -p -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 5b31676a b0edee94 471895da # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 2 +SN raw total sequences: 2 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 2 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 200 # ignores clipping SN total first fragment length: 100 # ignores clipping SN total last fragment length: 100 # ignores clipping @@ -353,6 +357,107 @@ GCC 98 50.00 50.00 0.00 0.00 0.00 0.00 GCC 99 50.00 50.00 0.00 0.00 0.00 0.00 GCC 100 100.00 0.00 0.00 0.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 50.00 0.00 50.00 0.00 +GCT 2 0.00 50.00 50.00 0.00 +GCT 3 0.00 50.00 0.00 50.00 +GCT 4 0.00 50.00 50.00 0.00 +GCT 5 0.00 0.00 50.00 50.00 +GCT 6 0.00 0.00 50.00 50.00 +GCT 7 0.00 50.00 0.00 50.00 +GCT 8 0.00 0.00 50.00 50.00 +GCT 9 0.00 50.00 0.00 50.00 +GCT 10 50.00 0.00 50.00 0.00 +GCT 11 0.00 0.00 100.00 0.00 +GCT 12 50.00 0.00 0.00 50.00 +GCT 13 0.00 50.00 50.00 0.00 +GCT 14 50.00 0.00 0.00 50.00 +GCT 15 0.00 0.00 50.00 50.00 +GCT 16 0.00 50.00 50.00 0.00 +GCT 17 0.00 50.00 0.00 50.00 +GCT 18 100.00 0.00 0.00 0.00 +GCT 19 50.00 0.00 0.00 50.00 +GCT 20 0.00 50.00 50.00 0.00 +GCT 21 50.00 0.00 0.00 50.00 +GCT 22 0.00 50.00 0.00 50.00 +GCT 23 0.00 0.00 0.00 100.00 +GCT 24 0.00 50.00 50.00 0.00 +GCT 25 0.00 0.00 0.00 100.00 +GCT 26 0.00 0.00 50.00 50.00 +GCT 27 0.00 100.00 0.00 0.00 +GCT 28 0.00 0.00 0.00 100.00 +GCT 29 50.00 0.00 50.00 0.00 +GCT 30 0.00 50.00 0.00 50.00 +GCT 31 0.00 50.00 0.00 50.00 +GCT 32 0.00 0.00 50.00 50.00 +GCT 33 0.00 50.00 0.00 50.00 +GCT 34 50.00 0.00 0.00 50.00 +GCT 35 0.00 50.00 0.00 50.00 +GCT 36 0.00 100.00 0.00 0.00 +GCT 37 0.00 50.00 0.00 50.00 +GCT 38 50.00 50.00 0.00 0.00 +GCT 39 100.00 0.00 0.00 0.00 +GCT 40 0.00 0.00 100.00 0.00 +GCT 41 0.00 50.00 0.00 50.00 +GCT 42 50.00 50.00 0.00 0.00 +GCT 43 0.00 0.00 50.00 50.00 +GCT 44 50.00 0.00 50.00 0.00 +GCT 45 50.00 0.00 50.00 0.00 +GCT 46 50.00 0.00 50.00 0.00 +GCT 47 50.00 0.00 50.00 0.00 +GCT 48 0.00 50.00 50.00 0.00 +GCT 49 50.00 0.00 0.00 50.00 +GCT 50 0.00 0.00 50.00 50.00 +GCT 51 0.00 0.00 100.00 0.00 +GCT 52 50.00 0.00 50.00 0.00 +GCT 53 0.00 50.00 0.00 50.00 +GCT 54 50.00 0.00 50.00 0.00 +GCT 55 50.00 0.00 0.00 50.00 +GCT 56 0.00 50.00 50.00 0.00 +GCT 57 100.00 0.00 0.00 0.00 +GCT 58 50.00 0.00 50.00 0.00 +GCT 59 100.00 0.00 0.00 0.00 +GCT 60 50.00 0.00 50.00 0.00 +GCT 61 0.00 0.00 0.00 100.00 +GCT 62 0.00 50.00 50.00 0.00 +GCT 63 50.00 0.00 0.00 50.00 +GCT 64 0.00 0.00 100.00 0.00 +GCT 65 50.00 50.00 0.00 0.00 +GCT 66 0.00 50.00 0.00 50.00 +GCT 67 0.00 0.00 0.00 100.00 +GCT 68 0.00 50.00 0.00 50.00 +GCT 69 0.00 50.00 0.00 50.00 +GCT 70 50.00 0.00 0.00 50.00 +GCT 71 0.00 0.00 0.00 100.00 +GCT 72 0.00 50.00 0.00 50.00 +GCT 73 0.00 0.00 0.00 100.00 +GCT 74 0.00 50.00 0.00 50.00 +GCT 75 50.00 0.00 0.00 50.00 +GCT 76 0.00 50.00 0.00 50.00 +GCT 77 50.00 0.00 0.00 50.00 +GCT 78 50.00 0.00 0.00 50.00 +GCT 79 50.00 0.00 0.00 50.00 +GCT 80 50.00 0.00 0.00 50.00 +GCT 81 50.00 0.00 0.00 50.00 +GCT 82 50.00 0.00 0.00 50.00 +GCT 83 50.00 0.00 0.00 50.00 +GCT 84 50.00 0.00 0.00 50.00 +GCT 85 50.00 0.00 0.00 50.00 +GCT 86 50.00 0.00 0.00 50.00 +GCT 87 50.00 0.00 0.00 50.00 +GCT 88 50.00 0.00 0.00 50.00 +GCT 89 50.00 0.00 0.00 50.00 +GCT 90 50.00 0.00 0.00 50.00 +GCT 91 50.00 0.00 50.00 0.00 +GCT 92 50.00 0.00 50.00 0.00 +GCT 93 50.00 0.00 50.00 0.00 +GCT 94 50.00 0.00 0.00 50.00 +GCT 95 50.00 0.00 50.00 0.00 +GCT 96 50.00 0.00 50.00 0.00 +GCT 97 50.00 0.00 50.00 0.00 +GCT 98 50.00 0.00 50.00 0.00 +GCT 99 50.00 0.00 50.00 0.00 +GCT 100 50.00 0.00 0.00 50.00 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -454,6 +559,8 @@ FBC 98 0.00 100.00 0.00 0.00 0.00 0.00 FBC 99 0.00 100.00 0.00 0.00 0.00 0.00 FBC 100 100.00 0.00 0.00 0.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 50 21 14 15 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 100.00 0.00 0.00 0.00 LBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -555,6 +662,26 @@ LBC 98 100.00 0.00 0.00 0.00 0.00 0.00 LBC 99 100.00 0.00 0.00 0.00 0.00 0.00 LBC 100 100.00 0.00 0.00 0.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 41 20 23 16 0 +# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%] +BCC1 2 0.00 0.00 100.00 0.00 0.00 +BCC1 3 0.00 0.00 0.00 100.00 0.00 +BCC1 4 0.00 100.00 0.00 0.00 0.00 +BCC1 5 0.00 0.00 0.00 100.00 0.00 +BCC1 6 100.00 0.00 0.00 0.00 0.00 +BCC1 7 0.00 0.00 0.00 100.00 0.00 +BCC1 8 0.00 100.00 0.00 0.00 0.00 +# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part. +# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number. +QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/12.2reads.overlap.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/12.2reads.overlap.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats -t /tmp/tmp0r5zs075/files/2/a/2/dataset_2a2a3ee9-3133-4880-a37c-50b6354c9000.dat -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 5b31676a b0edee94 471895da # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 2 +SN raw total sequences: 2 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 2 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 200 # ignores clipping SN total first fragment length: 100 # ignores clipping SN total last fragment length: 100 # ignores clipping @@ -353,6 +357,107 @@ GCC 98 50.00 50.00 0.00 0.00 0.00 0.00 GCC 99 50.00 50.00 0.00 0.00 0.00 0.00 GCC 100 100.00 0.00 0.00 0.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 50.00 0.00 50.00 0.00 +GCT 2 0.00 50.00 50.00 0.00 +GCT 3 0.00 50.00 0.00 50.00 +GCT 4 0.00 50.00 50.00 0.00 +GCT 5 0.00 0.00 50.00 50.00 +GCT 6 0.00 0.00 50.00 50.00 +GCT 7 0.00 50.00 0.00 50.00 +GCT 8 0.00 0.00 50.00 50.00 +GCT 9 0.00 50.00 0.00 50.00 +GCT 10 50.00 0.00 50.00 0.00 +GCT 11 0.00 0.00 100.00 0.00 +GCT 12 50.00 0.00 0.00 50.00 +GCT 13 0.00 50.00 50.00 0.00 +GCT 14 50.00 0.00 0.00 50.00 +GCT 15 0.00 0.00 50.00 50.00 +GCT 16 0.00 50.00 50.00 0.00 +GCT 17 0.00 50.00 0.00 50.00 +GCT 18 100.00 0.00 0.00 0.00 +GCT 19 50.00 0.00 0.00 50.00 +GCT 20 0.00 50.00 50.00 0.00 +GCT 21 50.00 0.00 0.00 50.00 +GCT 22 0.00 50.00 0.00 50.00 +GCT 23 0.00 0.00 0.00 100.00 +GCT 24 0.00 50.00 50.00 0.00 +GCT 25 0.00 0.00 0.00 100.00 +GCT 26 0.00 0.00 50.00 50.00 +GCT 27 0.00 100.00 0.00 0.00 +GCT 28 0.00 0.00 0.00 100.00 +GCT 29 50.00 0.00 50.00 0.00 +GCT 30 0.00 50.00 0.00 50.00 +GCT 31 0.00 50.00 0.00 50.00 +GCT 32 0.00 0.00 50.00 50.00 +GCT 33 0.00 50.00 0.00 50.00 +GCT 34 50.00 0.00 0.00 50.00 +GCT 35 0.00 50.00 0.00 50.00 +GCT 36 0.00 100.00 0.00 0.00 +GCT 37 0.00 50.00 0.00 50.00 +GCT 38 50.00 50.00 0.00 0.00 +GCT 39 100.00 0.00 0.00 0.00 +GCT 40 0.00 0.00 100.00 0.00 +GCT 41 0.00 50.00 0.00 50.00 +GCT 42 50.00 50.00 0.00 0.00 +GCT 43 0.00 0.00 50.00 50.00 +GCT 44 50.00 0.00 50.00 0.00 +GCT 45 50.00 0.00 50.00 0.00 +GCT 46 50.00 0.00 50.00 0.00 +GCT 47 50.00 0.00 50.00 0.00 +GCT 48 0.00 50.00 50.00 0.00 +GCT 49 50.00 0.00 0.00 50.00 +GCT 50 0.00 0.00 50.00 50.00 +GCT 51 0.00 0.00 100.00 0.00 +GCT 52 50.00 0.00 50.00 0.00 +GCT 53 0.00 50.00 0.00 50.00 +GCT 54 50.00 0.00 50.00 0.00 +GCT 55 50.00 0.00 0.00 50.00 +GCT 56 0.00 50.00 50.00 0.00 +GCT 57 100.00 0.00 0.00 0.00 +GCT 58 50.00 0.00 50.00 0.00 +GCT 59 100.00 0.00 0.00 0.00 +GCT 60 50.00 0.00 50.00 0.00 +GCT 61 0.00 0.00 0.00 100.00 +GCT 62 0.00 50.00 50.00 0.00 +GCT 63 50.00 0.00 0.00 50.00 +GCT 64 0.00 0.00 100.00 0.00 +GCT 65 50.00 50.00 0.00 0.00 +GCT 66 0.00 50.00 0.00 50.00 +GCT 67 0.00 0.00 0.00 100.00 +GCT 68 0.00 50.00 0.00 50.00 +GCT 69 0.00 50.00 0.00 50.00 +GCT 70 50.00 0.00 0.00 50.00 +GCT 71 0.00 0.00 0.00 100.00 +GCT 72 0.00 50.00 0.00 50.00 +GCT 73 0.00 0.00 0.00 100.00 +GCT 74 0.00 50.00 0.00 50.00 +GCT 75 50.00 0.00 0.00 50.00 +GCT 76 0.00 50.00 0.00 50.00 +GCT 77 50.00 0.00 0.00 50.00 +GCT 78 50.00 0.00 0.00 50.00 +GCT 79 50.00 0.00 0.00 50.00 +GCT 80 50.00 0.00 0.00 50.00 +GCT 81 50.00 0.00 0.00 50.00 +GCT 82 50.00 0.00 0.00 50.00 +GCT 83 50.00 0.00 0.00 50.00 +GCT 84 50.00 0.00 0.00 50.00 +GCT 85 50.00 0.00 0.00 50.00 +GCT 86 50.00 0.00 0.00 50.00 +GCT 87 50.00 0.00 0.00 50.00 +GCT 88 50.00 0.00 0.00 50.00 +GCT 89 50.00 0.00 0.00 50.00 +GCT 90 50.00 0.00 0.00 50.00 +GCT 91 50.00 0.00 50.00 0.00 +GCT 92 50.00 0.00 50.00 0.00 +GCT 93 50.00 0.00 50.00 0.00 +GCT 94 50.00 0.00 0.00 50.00 +GCT 95 50.00 0.00 50.00 0.00 +GCT 96 50.00 0.00 50.00 0.00 +GCT 97 50.00 0.00 50.00 0.00 +GCT 98 50.00 0.00 50.00 0.00 +GCT 99 50.00 0.00 50.00 0.00 +GCT 100 50.00 0.00 0.00 50.00 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -454,6 +559,8 @@ FBC 98 0.00 100.00 0.00 0.00 0.00 0.00 FBC 99 0.00 100.00 0.00 0.00 0.00 0.00 FBC 100 100.00 0.00 0.00 0.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 50 21 14 15 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 100.00 0.00 0.00 0.00 LBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -555,6 +662,26 @@ LBC 98 100.00 0.00 0.00 0.00 0.00 0.00 LBC 99 100.00 0.00 0.00 0.00 0.00 0.00 LBC 100 100.00 0.00 0.00 0.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 41 20 23 16 0 +# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%] +BCC1 2 0.00 0.00 100.00 0.00 0.00 +BCC1 3 0.00 0.00 0.00 100.00 0.00 +BCC1 4 0.00 100.00 0.00 0.00 0.00 +BCC1 5 0.00 0.00 0.00 100.00 0.00 +BCC1 6 100.00 0.00 0.00 0.00 0.00 +BCC1 7 0.00 0.00 0.00 100.00 0.00 +BCC1 8 0.00 100.00 0.00 0.00 0.00 +# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part. +# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number. +QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/12.3reads.nooverlap.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/12.3reads.nooverlap.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats -t /tmp/tmp0r5zs075/files/1/3/0/dataset_13082855-efe1-437e-8a91-ff7d013770db.dat -p -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 09f8b87f 140798ec 2b989f07 # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 3 +SN raw total sequences: 3 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 3 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 300 # ignores clipping SN total first fragment length: 100 # ignores clipping SN total last fragment length: 200 # ignores clipping @@ -355,6 +359,107 @@ GCC 98 0.00 66.67 0.00 33.33 0.00 0.00 GCC 99 0.00 100.00 0.00 0.00 0.00 0.00 GCC 100 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 33.33 0.00 0.00 66.67 +GCT 2 0.00 33.33 33.33 33.33 +GCT 3 0.00 33.33 33.33 33.33 +GCT 4 33.33 66.67 0.00 0.00 +GCT 5 0.00 66.67 33.33 0.00 +GCT 6 33.33 0.00 0.00 66.67 +GCT 7 33.33 0.00 0.00 66.67 +GCT 8 0.00 33.33 33.33 33.33 +GCT 9 0.00 66.67 33.33 0.00 +GCT 10 33.33 33.33 0.00 33.33 +GCT 11 33.33 33.33 0.00 33.33 +GCT 12 0.00 33.33 33.33 33.33 +GCT 13 0.00 0.00 33.33 66.67 +GCT 14 0.00 66.67 33.33 0.00 +GCT 15 33.33 66.67 0.00 0.00 +GCT 16 33.33 0.00 0.00 66.67 +GCT 17 33.33 0.00 0.00 66.67 +GCT 18 0.00 33.33 33.33 33.33 +GCT 19 0.00 33.33 33.33 33.33 +GCT 20 0.00 66.67 33.33 0.00 +GCT 21 33.33 66.67 0.00 0.00 +GCT 22 33.33 0.00 0.00 66.67 +GCT 23 0.00 0.00 33.33 66.67 +GCT 24 0.00 33.33 33.33 33.33 +GCT 25 33.33 66.67 0.00 0.00 +GCT 26 33.33 33.33 0.00 33.33 +GCT 27 0.00 33.33 33.33 33.33 +GCT 28 0.00 33.33 33.33 33.33 +GCT 29 0.00 0.00 33.33 66.67 +GCT 30 33.33 66.67 0.00 0.00 +GCT 31 33.33 66.67 0.00 0.00 +GCT 32 33.33 0.00 0.00 66.67 +GCT 33 0.00 0.00 33.33 66.67 +GCT 34 0.00 33.33 33.33 33.33 +GCT 35 33.33 33.33 0.00 33.33 +GCT 36 0.00 66.67 33.33 0.00 +GCT 37 33.33 66.67 0.00 0.00 +GCT 38 33.33 0.00 0.00 66.67 +GCT 39 0.00 0.00 33.33 66.67 +GCT 40 0.00 33.33 33.33 33.33 +GCT 41 33.33 66.67 0.00 0.00 +GCT 42 33.33 33.33 0.00 33.33 +GCT 43 0.00 33.33 33.33 33.33 +GCT 44 0.00 33.33 33.33 33.33 +GCT 45 0.00 33.33 33.33 33.33 +GCT 46 33.33 66.67 0.00 0.00 +GCT 47 33.33 33.33 0.00 33.33 +GCT 48 33.33 0.00 0.00 66.67 +GCT 49 0.00 0.00 33.33 66.67 +GCT 50 0.00 66.67 33.33 0.00 +GCT 51 33.33 66.67 0.00 0.00 +GCT 52 0.00 66.67 33.33 0.00 +GCT 53 33.33 0.00 0.00 66.67 +GCT 54 33.33 0.00 0.00 66.67 +GCT 55 0.00 33.33 33.33 33.33 +GCT 56 0.00 66.67 33.33 0.00 +GCT 57 33.33 33.33 0.00 33.33 +GCT 58 33.33 33.33 0.00 33.33 +GCT 59 0.00 33.33 33.33 33.33 +GCT 60 0.00 0.00 33.33 66.67 +GCT 61 0.00 66.67 33.33 0.00 +GCT 62 33.33 66.67 0.00 0.00 +GCT 63 33.33 0.00 0.00 66.67 +GCT 64 33.33 0.00 0.00 66.67 +GCT 65 0.00 33.33 33.33 33.33 +GCT 66 0.00 33.33 33.33 33.33 +GCT 67 33.33 66.67 0.00 0.00 +GCT 68 0.00 66.67 33.33 0.00 +GCT 69 33.33 0.00 0.00 66.67 +GCT 70 33.33 0.00 0.00 66.67 +GCT 71 0.00 33.33 33.33 33.33 +GCT 72 0.00 66.67 33.33 0.00 +GCT 73 33.33 33.33 0.00 33.33 +GCT 74 33.33 33.33 0.00 33.33 +GCT 75 0.00 33.33 33.33 33.33 +GCT 76 0.00 0.00 33.33 66.67 +GCT 77 0.00 66.67 33.33 0.00 +GCT 78 33.33 66.67 0.00 0.00 +GCT 79 33.33 0.00 0.00 66.67 +GCT 80 33.33 0.00 0.00 66.67 +GCT 81 0.00 33.33 33.33 33.33 +GCT 82 0.00 66.67 33.33 0.00 +GCT 83 0.00 66.67 33.33 0.00 +GCT 84 33.33 33.33 0.00 33.33 +GCT 85 33.33 0.00 0.00 66.67 +GCT 86 0.00 0.00 33.33 66.67 +GCT 87 0.00 66.67 33.33 0.00 +GCT 88 33.33 66.67 0.00 0.00 +GCT 89 33.33 33.33 0.00 33.33 +GCT 90 0.00 0.00 33.33 66.67 +GCT 91 0.00 33.33 33.33 33.33 +GCT 92 0.00 66.67 33.33 0.00 +GCT 93 33.33 66.67 0.00 0.00 +GCT 94 33.33 33.33 0.00 33.33 +GCT 95 33.33 0.00 0.00 66.67 +GCT 96 0.00 33.33 33.33 33.33 +GCT 97 33.33 0.00 33.33 33.33 +GCT 98 33.33 66.67 0.00 0.00 +GCT 99 0.00 66.67 33.33 0.00 +GCT 100 33.33 0.00 0.00 66.67 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 100.00 0.00 0.00 0.00 0.00 @@ -456,6 +561,8 @@ FBC 98 0.00 0.00 0.00 100.00 0.00 0.00 FBC 99 0.00 100.00 0.00 0.00 0.00 0.00 FBC 100 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 0 51 0 49 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 0.00 100.00 0.00 0.00 LBC 2 0.00 50.00 0.00 50.00 0.00 0.00 @@ -557,6 +664,26 @@ LBC 98 0.00 100.00 0.00 0.00 0.00 0.00 LBC 99 0.00 100.00 0.00 0.00 0.00 0.00 LBC 100 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 1 102 0 97 0 +# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%] +BCC1 2 0.00 0.00 100.00 0.00 0.00 +BCC1 3 0.00 0.00 0.00 100.00 0.00 +BCC1 4 0.00 100.00 0.00 0.00 0.00 +BCC1 5 0.00 0.00 0.00 100.00 0.00 +BCC1 6 100.00 0.00 0.00 0.00 0.00 +BCC1 7 0.00 0.00 0.00 100.00 0.00 +BCC1 8 0.00 100.00 0.00 0.00 0.00 +# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part. +# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number. +QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/12.3reads.overlap.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/12.3reads.overlap.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats -t /tmp/tmp0r5zs075/files/6/5/e/dataset_65ea4e4d-a70d-4001-911d-9d81ff2829a6.dat -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 09f8b87f 140798ec 2b989f07 # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 3 +SN raw total sequences: 3 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 3 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 300 # ignores clipping SN total first fragment length: 100 # ignores clipping SN total last fragment length: 200 # ignores clipping @@ -355,6 +359,107 @@ GCC 98 0.00 66.67 0.00 33.33 0.00 0.00 GCC 99 0.00 100.00 0.00 0.00 0.00 0.00 GCC 100 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 33.33 0.00 0.00 66.67 +GCT 2 0.00 33.33 33.33 33.33 +GCT 3 0.00 33.33 33.33 33.33 +GCT 4 33.33 66.67 0.00 0.00 +GCT 5 0.00 66.67 33.33 0.00 +GCT 6 33.33 0.00 0.00 66.67 +GCT 7 33.33 0.00 0.00 66.67 +GCT 8 0.00 33.33 33.33 33.33 +GCT 9 0.00 66.67 33.33 0.00 +GCT 10 33.33 33.33 0.00 33.33 +GCT 11 33.33 33.33 0.00 33.33 +GCT 12 0.00 33.33 33.33 33.33 +GCT 13 0.00 0.00 33.33 66.67 +GCT 14 0.00 66.67 33.33 0.00 +GCT 15 33.33 66.67 0.00 0.00 +GCT 16 33.33 0.00 0.00 66.67 +GCT 17 33.33 0.00 0.00 66.67 +GCT 18 0.00 33.33 33.33 33.33 +GCT 19 0.00 33.33 33.33 33.33 +GCT 20 0.00 66.67 33.33 0.00 +GCT 21 33.33 66.67 0.00 0.00 +GCT 22 33.33 0.00 0.00 66.67 +GCT 23 0.00 0.00 33.33 66.67 +GCT 24 0.00 33.33 33.33 33.33 +GCT 25 33.33 66.67 0.00 0.00 +GCT 26 33.33 33.33 0.00 33.33 +GCT 27 0.00 33.33 33.33 33.33 +GCT 28 0.00 33.33 33.33 33.33 +GCT 29 0.00 0.00 33.33 66.67 +GCT 30 33.33 66.67 0.00 0.00 +GCT 31 33.33 66.67 0.00 0.00 +GCT 32 33.33 0.00 0.00 66.67 +GCT 33 0.00 0.00 33.33 66.67 +GCT 34 0.00 33.33 33.33 33.33 +GCT 35 33.33 33.33 0.00 33.33 +GCT 36 0.00 66.67 33.33 0.00 +GCT 37 33.33 66.67 0.00 0.00 +GCT 38 33.33 0.00 0.00 66.67 +GCT 39 0.00 0.00 33.33 66.67 +GCT 40 0.00 33.33 33.33 33.33 +GCT 41 33.33 66.67 0.00 0.00 +GCT 42 33.33 33.33 0.00 33.33 +GCT 43 0.00 33.33 33.33 33.33 +GCT 44 0.00 33.33 33.33 33.33 +GCT 45 0.00 33.33 33.33 33.33 +GCT 46 33.33 66.67 0.00 0.00 +GCT 47 33.33 33.33 0.00 33.33 +GCT 48 33.33 0.00 0.00 66.67 +GCT 49 0.00 0.00 33.33 66.67 +GCT 50 0.00 66.67 33.33 0.00 +GCT 51 33.33 66.67 0.00 0.00 +GCT 52 0.00 66.67 33.33 0.00 +GCT 53 33.33 0.00 0.00 66.67 +GCT 54 33.33 0.00 0.00 66.67 +GCT 55 0.00 33.33 33.33 33.33 +GCT 56 0.00 66.67 33.33 0.00 +GCT 57 33.33 33.33 0.00 33.33 +GCT 58 33.33 33.33 0.00 33.33 +GCT 59 0.00 33.33 33.33 33.33 +GCT 60 0.00 0.00 33.33 66.67 +GCT 61 0.00 66.67 33.33 0.00 +GCT 62 33.33 66.67 0.00 0.00 +GCT 63 33.33 0.00 0.00 66.67 +GCT 64 33.33 0.00 0.00 66.67 +GCT 65 0.00 33.33 33.33 33.33 +GCT 66 0.00 33.33 33.33 33.33 +GCT 67 33.33 66.67 0.00 0.00 +GCT 68 0.00 66.67 33.33 0.00 +GCT 69 33.33 0.00 0.00 66.67 +GCT 70 33.33 0.00 0.00 66.67 +GCT 71 0.00 33.33 33.33 33.33 +GCT 72 0.00 66.67 33.33 0.00 +GCT 73 33.33 33.33 0.00 33.33 +GCT 74 33.33 33.33 0.00 33.33 +GCT 75 0.00 33.33 33.33 33.33 +GCT 76 0.00 0.00 33.33 66.67 +GCT 77 0.00 66.67 33.33 0.00 +GCT 78 33.33 66.67 0.00 0.00 +GCT 79 33.33 0.00 0.00 66.67 +GCT 80 33.33 0.00 0.00 66.67 +GCT 81 0.00 33.33 33.33 33.33 +GCT 82 0.00 66.67 33.33 0.00 +GCT 83 0.00 66.67 33.33 0.00 +GCT 84 33.33 33.33 0.00 33.33 +GCT 85 33.33 0.00 0.00 66.67 +GCT 86 0.00 0.00 33.33 66.67 +GCT 87 0.00 66.67 33.33 0.00 +GCT 88 33.33 66.67 0.00 0.00 +GCT 89 33.33 33.33 0.00 33.33 +GCT 90 0.00 0.00 33.33 66.67 +GCT 91 0.00 33.33 33.33 33.33 +GCT 92 0.00 66.67 33.33 0.00 +GCT 93 33.33 66.67 0.00 0.00 +GCT 94 33.33 33.33 0.00 33.33 +GCT 95 33.33 0.00 0.00 66.67 +GCT 96 0.00 33.33 33.33 33.33 +GCT 97 33.33 0.00 33.33 33.33 +GCT 98 33.33 66.67 0.00 0.00 +GCT 99 0.00 66.67 33.33 0.00 +GCT 100 33.33 0.00 0.00 66.67 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 100.00 0.00 0.00 0.00 0.00 @@ -456,6 +561,8 @@ FBC 98 0.00 0.00 0.00 100.00 0.00 0.00 FBC 99 0.00 100.00 0.00 0.00 0.00 0.00 FBC 100 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 0 51 0 49 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 0.00 100.00 0.00 0.00 LBC 2 0.00 50.00 0.00 50.00 0.00 0.00 @@ -557,6 +664,26 @@ LBC 98 0.00 100.00 0.00 0.00 0.00 0.00 LBC 99 0.00 100.00 0.00 0.00 0.00 0.00 LBC 100 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 1 102 0 97 0 +# ACGT content per cycle for barcodes. Use `grep ^BCC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N counts as a percentage of all A/C/G/T bases [%] +BCC1 2 0.00 0.00 100.00 0.00 0.00 +BCC1 3 0.00 0.00 0.00 100.00 0.00 +BCC1 4 0.00 100.00 0.00 0.00 0.00 +BCC1 5 0.00 0.00 0.00 100.00 0.00 +BCC1 6 100.00 0.00 0.00 0.00 0.00 +BCC1 7 0.00 0.00 0.00 100.00 0.00 +BCC1 8 0.00 100.00 0.00 0.00 0.00 +# Barcode Qualities. Use `grep ^QTQ | cut -f 2-` to extract this part. +# Columns correspond to qualities and rows to barcode cycles. First column is the cycle number. +QTQ1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +QTQ1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +QTQ1 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +QTQ1 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +QTQ1 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/2.stats.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/2.stats.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 1a1c1362 29c426ae 7bab45da # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 2 +SN raw total sequences: 2 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 2 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 70 # ignores clipping SN total first fragment length: 35 # ignores clipping SN total last fragment length: 35 # ignores clipping @@ -195,6 +199,42 @@ GCC 33 50.00 0.00 0.00 50.00 0.00 0.00 GCC 34 50.00 0.00 50.00 0.00 0.00 0.00 GCC 35 0.00 0.00 50.00 50.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 0.00 50.00 0.00 50.00 +GCT 2 50.00 0.00 50.00 0.00 +GCT 3 0.00 0.00 50.00 50.00 +GCT 4 0.00 0.00 100.00 0.00 +GCT 5 0.00 0.00 50.00 50.00 +GCT 6 0.00 50.00 0.00 50.00 +GCT 7 0.00 0.00 50.00 50.00 +GCT 8 0.00 0.00 0.00 100.00 +GCT 9 0.00 50.00 50.00 0.00 +GCT 10 50.00 0.00 50.00 0.00 +GCT 11 50.00 0.00 0.00 50.00 +GCT 12 50.00 0.00 50.00 0.00 +GCT 13 50.00 0.00 50.00 0.00 +GCT 14 0.00 0.00 0.00 100.00 +GCT 15 100.00 0.00 0.00 0.00 +GCT 16 50.00 0.00 0.00 50.00 +GCT 17 0.00 0.00 50.00 50.00 +GCT 18 0.00 50.00 50.00 0.00 +GCT 19 0.00 100.00 0.00 0.00 +GCT 20 0.00 0.00 50.00 50.00 +GCT 21 0.00 0.00 100.00 0.00 +GCT 22 0.00 50.00 0.00 50.00 +GCT 23 50.00 0.00 0.00 50.00 +GCT 24 50.00 0.00 50.00 0.00 +GCT 25 50.00 0.00 50.00 0.00 +GCT 26 0.00 0.00 100.00 0.00 +GCT 27 50.00 0.00 0.00 50.00 +GCT 28 0.00 0.00 50.00 50.00 +GCT 29 0.00 50.00 0.00 50.00 +GCT 30 0.00 50.00 0.00 50.00 +GCT 31 0.00 50.00 50.00 0.00 +GCT 32 0.00 0.00 100.00 0.00 +GCT 33 100.00 0.00 0.00 0.00 +GCT 34 0.00 0.00 50.00 50.00 +GCT 35 50.00 0.00 50.00 0.00 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -231,6 +271,8 @@ FBC 33 100.00 0.00 0.00 0.00 0.00 0.00 FBC 34 0.00 0.00 100.00 0.00 0.00 0.00 FBC 35 0.00 0.00 100.00 0.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 6 5 15 9 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 100.00 0.00 0.00 0.00 LBC 2 0.00 0.00 0.00 100.00 0.00 0.00 @@ -267,6 +309,8 @@ LBC 33 0.00 0.00 0.00 100.00 0.00 0.00 LBC 34 100.00 0.00 0.00 0.00 0.00 0.00 LBC 35 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 11 10 5 9 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/6.stats.expected Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/6.stats.expected Tue Sep 28 16:17:39 2021 +0000 @@ -1,8 +1,11 @@ +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats +# This file contains statistics for all reads. +# The command line was: stats --insert-size 0 --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 1a1c1362 32507d92 7bab45da # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 2 +SN raw total sequences: 2 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 2 SN is sorted: 1 @@ -17,6 +20,7 @@ SN reads MQ0: 0 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 70 # ignores clipping SN total first fragment length: 35 # ignores clipping SN total last fragment length: 35 # ignores clipping @@ -195,6 +199,42 @@ GCC 33 0.00 0.00 50.00 50.00 0.00 0.00 GCC 34 100.00 0.00 0.00 0.00 0.00 0.00 GCC 35 0.00 0.00 50.00 50.00 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 0.00 50.00 0.00 50.00 +GCT 2 50.00 0.00 50.00 0.00 +GCT 3 0.00 0.00 50.00 50.00 +GCT 4 0.00 0.00 100.00 0.00 +GCT 5 0.00 0.00 50.00 50.00 +GCT 6 0.00 50.00 50.00 0.00 +GCT 7 0.00 0.00 0.00 100.00 +GCT 8 0.00 0.00 50.00 50.00 +GCT 9 0.00 0.00 50.00 50.00 +GCT 10 0.00 50.00 50.00 0.00 +GCT 11 100.00 0.00 0.00 0.00 +GCT 12 0.00 0.00 50.00 50.00 +GCT 13 100.00 0.00 0.00 0.00 +GCT 14 0.00 0.00 50.00 50.00 +GCT 15 50.00 0.00 0.00 50.00 +GCT 16 50.00 0.00 0.00 50.00 +GCT 17 50.00 0.00 50.00 0.00 +GCT 18 0.00 0.00 50.00 50.00 +GCT 19 0.00 100.00 0.00 0.00 +GCT 20 0.00 50.00 0.00 50.00 +GCT 21 0.00 0.00 100.00 0.00 +GCT 22 0.00 50.00 50.00 0.00 +GCT 23 50.00 0.00 0.00 50.00 +GCT 24 50.00 0.00 0.00 50.00 +GCT 25 50.00 0.00 50.00 0.00 +GCT 26 0.00 0.00 100.00 0.00 +GCT 27 0.00 0.00 50.00 50.00 +GCT 28 50.00 0.00 0.00 50.00 +GCT 29 0.00 50.00 50.00 0.00 +GCT 30 0.00 0.00 0.00 100.00 +GCT 31 0.00 50.00 50.00 0.00 +GCT 32 0.00 50.00 50.00 0.00 +GCT 33 50.00 0.00 50.00 0.00 +GCT 34 50.00 0.00 0.00 50.00 +GCT 35 50.00 0.00 50.00 0.00 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 0.00 0.00 0.00 100.00 0.00 0.00 FBC 2 0.00 0.00 100.00 0.00 0.00 0.00 @@ -231,6 +271,8 @@ FBC 33 0.00 0.00 100.00 0.00 0.00 0.00 FBC 34 100.00 0.00 0.00 0.00 0.00 0.00 FBC 35 0.00 0.00 100.00 0.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 6 5 15 9 0 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 0.00 0.00 100.00 0.00 0.00 0.00 LBC 2 0.00 0.00 0.00 100.00 0.00 0.00 @@ -267,6 +309,8 @@ LBC 33 0.00 0.00 0.00 100.00 0.00 0.00 LBC 34 100.00 0.00 0.00 0.00 0.00 0.00 LBC 35 0.00 0.00 0.00 100.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 11 10 5 9 0 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs IS 0 0 0 0 0 IS 1 0 0 0 0
--- a/test-data/samtools_stats_out1.tab Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/samtools_stats_out1.tab Tue Sep 28 16:17:39 2021 +0000 @@ -1,11 +1,11 @@ -# This file was produced by samtools stats (1.9+htslib-1.9) and can be plotted using plot-bamstats +# This file was produced by samtools stats (1.12+htslib-1.12) and can be plotted using plot-bamstats # This file contains statistics for all reads. -# The command line was: stats --ref-seq reference.fa infile +# The command line was: stats --ref-seq reference.fa -@ 0 infile # CHK, Checksum [2]Read Names [3]Sequences [4]Qualities # CHK, CRC32 of reads which passed filtering followed by addition (32bit overflow) CHK 1bd20fd8 58ad2167 29883386 # Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part. -SN raw total sequences: 200 +SN raw total sequences: 200 # excluding supplementary and secondary reads SN filtered sequences: 0 SN sequences: 200 SN is sorted: 1 @@ -20,6 +20,7 @@ SN reads MQ0: 6 # mapped and MQ=0 SN reads QC failed: 0 SN non-primary alignments: 0 +SN supplementary alignments: 0 SN total length: 50200 # ignores clipping SN total first fragment length: 25100 # ignores clipping SN total last fragment length: 25100 # ignores clipping @@ -1094,6 +1095,258 @@ GCC 249 25.00 23.00 21.00 31.00 0.00 0.00 GCC 250 27.50 22.50 17.50 32.50 0.00 0.00 GCC 251 13.50 20.50 36.50 29.50 0.00 0.00 +# ACGT content per cycle, read oriented. Use `grep ^GCT | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%] +GCT 1 22.45 32.65 31.63 13.27 +GCT 2 26.00 17.00 10.00 47.00 +GCT 3 34.00 25.00 9.00 32.00 +GCT 4 13.00 32.00 10.00 45.00 +GCT 5 37.00 24.00 10.00 29.00 +GCT 6 44.00 26.00 17.00 13.00 +GCT 7 28.00 13.00 32.00 27.00 +GCT 8 49.00 16.00 18.00 17.00 +GCT 9 19.00 35.00 16.00 30.00 +GCT 10 35.00 13.00 22.00 30.00 +GCT 11 18.00 15.00 29.00 38.00 +GCT 12 28.00 24.00 22.00 26.00 +GCT 13 29.00 17.00 19.00 35.00 +GCT 14 21.21 24.24 24.24 30.30 +GCT 15 23.00 20.00 25.00 32.00 +GCT 16 35.00 15.00 20.00 30.00 +GCT 17 29.00 26.00 13.00 32.00 +GCT 18 31.00 30.00 18.00 21.00 +GCT 19 30.00 26.00 21.00 23.00 +GCT 20 23.00 21.00 20.00 36.00 +GCT 21 36.00 18.00 19.00 27.00 +GCT 22 39.00 31.00 13.00 17.00 +GCT 23 30.30 21.21 11.11 37.37 +GCT 24 36.00 14.00 20.00 30.00 +GCT 25 38.00 23.00 17.00 22.00 +GCT 26 36.00 21.00 13.00 30.00 +GCT 27 32.00 18.00 22.00 28.00 +GCT 28 30.00 13.00 25.00 32.00 +GCT 29 30.00 28.00 23.00 19.00 +GCT 30 34.00 21.00 13.00 32.00 +GCT 31 33.00 29.00 14.00 24.00 +GCT 32 38.00 20.00 20.00 22.00 +GCT 33 27.00 23.00 15.00 35.00 +GCT 34 43.00 17.00 20.00 20.00 +GCT 35 35.00 23.00 22.00 20.00 +GCT 36 41.00 25.00 16.00 18.00 +GCT 37 38.00 18.00 18.00 26.00 +GCT 38 23.00 26.00 25.00 26.00 +GCT 39 22.00 41.00 19.00 18.00 +GCT 40 29.00 14.00 23.00 34.00 +GCT 41 29.00 27.00 20.00 24.00 +GCT 42 29.00 27.00 18.00 26.00 +GCT 43 38.00 28.00 16.00 18.00 +GCT 44 21.00 26.00 23.00 30.00 +GCT 45 38.00 25.00 18.00 19.00 +GCT 46 27.00 21.00 22.00 30.00 +GCT 47 40.00 28.00 18.00 14.00 +GCT 48 25.00 22.00 20.00 33.00 +GCT 49 26.00 33.00 10.00 31.00 +GCT 50 49.00 20.00 10.00 21.00 +GCT 51 29.00 29.00 13.00 29.00 +GCT 52 43.00 22.00 22.00 13.00 +GCT 53 19.00 27.00 27.00 27.00 +GCT 54 25.00 18.00 25.00 32.00 +GCT 55 32.00 22.00 17.00 29.00 +GCT 56 36.00 20.00 16.00 28.00 +GCT 57 28.00 24.00 23.00 25.00 +GCT 58 23.00 39.00 17.00 21.00 +GCT 59 33.00 21.00 26.00 20.00 +GCT 60 30.00 26.00 19.00 25.00 +GCT 61 23.00 23.00 25.00 29.00 +GCT 62 35.00 19.00 20.00 26.00 +GCT 63 26.00 25.00 29.00 20.00 +GCT 64 30.00 20.00 27.00 23.00 +GCT 65 38.00 17.00 21.00 24.00 +GCT 66 33.00 26.00 14.00 27.00 +GCT 67 35.00 21.00 21.00 23.00 +GCT 68 39.00 28.00 15.00 18.00 +GCT 69 38.00 18.00 22.00 22.00 +GCT 70 40.00 13.00 22.00 25.00 +GCT 71 29.00 41.00 19.00 11.00 +GCT 72 33.00 26.00 28.00 13.00 +GCT 73 31.00 18.00 24.00 27.00 +GCT 74 31.00 32.00 13.00 24.00 +GCT 75 46.00 10.00 21.00 23.00 +GCT 76 23.00 28.00 25.00 24.00 +GCT 77 35.00 20.00 19.00 26.00 +GCT 78 29.00 30.00 26.00 15.00 +GCT 79 24.00 25.00 27.00 24.00 +GCT 80 40.00 18.00 29.00 13.00 +GCT 81 18.00 26.00 26.00 30.00 +GCT 82 30.00 25.00 28.00 17.00 +GCT 83 36.00 25.00 14.00 25.00 +GCT 84 22.00 39.00 23.00 16.00 +GCT 85 42.00 20.00 21.00 17.00 +GCT 86 12.00 47.00 16.00 25.00 +GCT 87 45.00 15.00 19.00 21.00 +GCT 88 26.00 24.00 31.00 19.00 +GCT 89 26.00 25.00 29.00 20.00 +GCT 90 24.00 22.00 26.00 28.00 +GCT 91 29.00 30.00 24.00 17.00 +GCT 92 22.00 13.00 32.00 33.00 +GCT 93 29.00 40.00 13.00 18.00 +GCT 94 30.00 16.00 25.00 29.00 +GCT 95 31.31 35.35 17.17 16.16 +GCT 96 34.00 27.00 15.00 24.00 +GCT 97 32.00 27.00 25.00 16.00 +GCT 98 21.21 31.31 24.24 23.23 +GCT 99 27.00 37.00 21.00 15.00 +GCT 100 29.00 15.00 26.00 30.00 +GCT 101 27.00 34.00 25.00 14.00 +GCT 102 20.00 14.00 36.00 30.00 +GCT 103 32.00 26.00 16.00 26.00 +GCT 104 30.00 28.00 22.00 20.00 +GCT 105 35.00 35.00 19.00 11.00 +GCT 106 22.00 32.00 25.00 21.00 +GCT 107 19.19 33.33 26.26 21.21 +GCT 108 27.27 33.33 21.21 18.18 +GCT 109 19.00 29.00 24.00 28.00 +GCT 110 20.20 31.31 28.28 20.20 +GCT 111 25.25 33.33 22.22 19.19 +GCT 112 39.39 28.28 16.16 16.16 +GCT 113 21.21 24.24 31.31 23.23 +GCT 114 19.00 31.00 35.00 15.00 +GCT 115 32.00 23.00 25.00 20.00 +GCT 116 20.00 29.00 35.00 16.00 +GCT 117 31.00 21.00 21.00 27.00 +GCT 118 25.00 14.00 35.00 26.00 +GCT 119 21.00 31.00 24.00 24.00 +GCT 120 19.00 24.00 27.00 30.00 +GCT 121 24.24 36.36 20.20 19.19 +GCT 122 18.00 25.00 23.00 34.00 +GCT 123 25.00 48.00 10.00 17.00 +GCT 124 10.00 31.00 35.00 24.00 +GCT 125 27.00 24.00 22.00 27.00 +GCT 126 24.00 19.00 26.00 31.00 +GCT 127 26.00 16.00 22.00 36.00 +GCT 128 16.00 35.00 30.00 19.00 +GCT 129 24.00 27.00 26.00 23.00 +GCT 130 25.00 19.00 23.00 33.00 +GCT 131 17.00 40.00 24.00 19.00 +GCT 132 20.00 27.00 24.00 29.00 +GCT 133 15.00 34.00 33.00 18.00 +GCT 134 22.00 19.00 31.00 28.00 +GCT 135 17.00 28.00 26.00 29.00 +GCT 136 25.00 33.00 14.00 28.00 +GCT 137 21.00 22.00 30.00 27.00 +GCT 138 15.00 30.00 29.00 26.00 +GCT 139 19.00 32.00 22.00 27.00 +GCT 140 19.00 24.00 29.00 28.00 +GCT 141 21.00 24.00 25.00 30.00 +GCT 142 17.00 32.00 35.00 16.00 +GCT 143 23.00 23.00 28.00 26.00 +GCT 144 20.00 33.00 24.00 23.00 +GCT 145 20.00 26.00 24.00 30.00 +GCT 146 26.00 15.00 40.00 19.00 +GCT 147 23.00 23.00 30.00 24.00 +GCT 148 12.00 32.00 33.00 23.00 +GCT 149 27.00 24.00 23.00 26.00 +GCT 150 12.00 32.00 21.00 35.00 +GCT 151 22.00 29.00 34.00 15.00 +GCT 152 16.00 21.00 26.00 37.00 +GCT 153 24.00 20.00 34.00 22.00 +GCT 154 25.00 22.00 28.00 25.00 +GCT 155 21.00 28.00 17.00 34.00 +GCT 156 16.00 23.00 36.00 25.00 +GCT 157 23.00 34.00 20.00 23.00 +GCT 158 21.00 33.00 21.00 25.00 +GCT 159 19.00 15.00 44.00 22.00 +GCT 160 24.00 31.00 22.00 23.00 +GCT 161 13.00 28.00 40.00 19.00 +GCT 162 33.00 19.00 20.00 28.00 +GCT 163 23.00 26.00 31.00 20.00 +GCT 164 33.00 19.00 20.00 28.00 +GCT 165 13.00 16.00 37.00 34.00 +GCT 166 20.00 36.00 30.00 14.00 +GCT 167 17.00 28.00 25.00 30.00 +GCT 168 16.00 24.00 31.00 29.00 +GCT 169 21.00 23.00 21.00 35.00 +GCT 170 22.00 24.00 27.00 27.00 +GCT 171 23.00 30.00 31.00 16.00 +GCT 172 24.00 22.00 30.00 24.00 +GCT 173 24.00 17.00 33.00 26.00 +GCT 174 21.00 24.00 33.00 22.00 +GCT 175 34.00 19.00 34.00 13.00 +GCT 176 17.00 37.00 24.00 22.00 +GCT 177 24.00 24.00 25.00 27.00 +GCT 178 19.00 26.00 30.00 25.00 +GCT 179 24.00 20.00 20.00 36.00 +GCT 180 22.00 25.00 31.00 22.00 +GCT 181 21.00 17.00 38.00 24.00 +GCT 182 35.00 21.00 20.00 24.00 +GCT 183 28.00 15.00 27.00 30.00 +GCT 184 17.00 30.00 29.00 24.00 +GCT 185 16.00 32.00 33.00 19.00 +GCT 186 21.00 22.00 36.00 21.00 +GCT 187 24.00 13.00 41.00 22.00 +GCT 188 29.00 19.00 30.00 22.00 +GCT 189 19.00 23.00 33.00 25.00 +GCT 190 24.00 28.00 25.00 23.00 +GCT 191 28.00 21.00 29.00 22.00 +GCT 192 37.00 25.00 13.00 25.00 +GCT 193 18.00 26.00 25.00 31.00 +GCT 194 21.00 34.00 24.00 21.00 +GCT 195 16.00 28.00 21.00 35.00 +GCT 196 23.00 23.00 27.00 27.00 +GCT 197 18.00 20.00 24.00 38.00 +GCT 198 15.00 19.00 39.00 27.00 +GCT 199 23.00 17.00 27.00 33.00 +GCT 200 25.00 27.00 23.00 25.00 +GCT 201 26.00 19.00 25.00 30.00 +GCT 202 23.00 23.00 21.00 33.00 +GCT 203 21.00 19.00 32.00 28.00 +GCT 204 26.00 29.00 24.00 21.00 +GCT 205 16.00 23.00 21.00 40.00 +GCT 206 28.00 25.00 21.00 26.00 +GCT 207 22.00 22.00 19.00 37.00 +GCT 208 12.00 32.00 32.00 24.00 +GCT 209 18.00 20.00 15.00 47.00 +GCT 210 18.00 15.00 45.00 22.00 +GCT 211 28.00 14.00 28.00 30.00 +GCT 212 23.00 27.00 22.00 28.00 +GCT 213 22.00 12.00 34.00 32.00 +GCT 214 16.00 24.00 26.00 34.00 +GCT 215 28.00 22.00 25.00 25.00 +GCT 216 18.00 17.00 26.00 39.00 +GCT 217 27.00 30.00 16.00 27.00 +GCT 218 18.00 24.00 24.00 34.00 +GCT 219 19.00 18.00 24.00 39.00 +GCT 220 28.00 23.00 24.00 25.00 +GCT 221 20.00 24.00 20.00 36.00 +GCT 222 22.00 21.00 19.00 38.00 +GCT 223 19.00 22.00 33.00 26.00 +GCT 224 19.00 20.00 27.00 34.00 +GCT 225 14.00 32.00 34.00 20.00 +GCT 226 20.00 20.00 29.00 31.00 +GCT 227 17.00 22.00 26.00 35.00 +GCT 228 34.00 20.00 25.00 21.00 +GCT 229 22.00 27.00 16.00 35.00 +GCT 230 23.00 33.00 15.00 29.00 +GCT 231 20.00 27.00 31.00 22.00 +GCT 232 25.00 23.00 11.00 41.00 +GCT 233 22.00 25.00 27.00 26.00 +GCT 234 14.00 27.00 23.00 36.00 +GCT 235 17.00 11.00 32.00 40.00 +GCT 236 29.00 20.00 24.00 27.00 +GCT 237 16.00 19.00 23.00 42.00 +GCT 238 22.00 25.00 29.00 24.00 +GCT 239 18.00 8.00 17.00 57.00 +GCT 240 27.00 26.00 14.00 33.00 +GCT 241 21.00 22.00 17.00 40.00 +GCT 242 26.00 28.00 10.00 36.00 +GCT 243 25.00 15.00 24.00 36.00 +GCT 244 15.00 24.00 16.00 45.00 +GCT 245 24.00 23.00 19.00 34.00 +GCT 246 18.00 25.00 29.00 28.00 +GCT 247 30.00 13.00 14.00 43.00 +GCT 248 22.00 17.00 26.00 35.00 +GCT 249 25.00 22.00 22.00 31.00 +GCT 250 25.00 20.00 20.00 35.00 +GCT 251 13.00 17.00 40.00 30.00 # ACGT content per cycle for first fragments. Use `grep ^FBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] FBC 1 21.43 27.55 36.73 14.29 2.04 0.00 FBC 2 34.00 15.00 12.00 39.00 0.00 0.00 @@ -1346,6 +1599,8 @@ FBC 249 25.00 24.00 20.00 31.00 0.00 0.00 FBC 250 30.00 25.00 15.00 30.00 0.00 0.00 FBC 251 14.00 24.00 33.00 29.00 0.00 0.00 +# ACGT raw counters for first fragments. Use `grep ^FTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +FTC 6178 6247 5821 6841 13 # ACGT content per cycle for last fragments. Use `grep ^LBC | cut -f 2-` to extract this part. The columns are: cycle; A,C,G,T base counts as a percentage of all A/C/G/T bases [%]; and N and O counts as a percentage of all A/C/G/T bases [%] LBC 1 22.45 32.65 31.63 13.27 2.04 0.00 LBC 2 26.00 17.00 10.00 47.00 0.00 0.00 @@ -1598,6 +1853,8 @@ LBC 249 25.00 22.00 22.00 31.00 0.00 0.00 LBC 250 25.00 20.00 20.00 35.00 0.00 0.00 LBC 251 13.00 17.00 40.00 30.00 0.00 0.00 +# ACGT raw counters for last fragments. Use `grep ^LTC | cut -f 2-` to extract this part. The columns are: A,C,G,T,N base counters +LTC 6413 6102 5966 6606 13 # Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: insert size, pairs total, inward oriented pairs, outward oriented pairs, other pairs # Read lengths. Use `grep ^RL | cut -f 2-` to extract this part. The columns are: read length, count RL 251 200
--- a/test-data/samtools_stats_out1__sn.tab Thu Oct 17 02:21:23 2019 -0400 +++ b/test-data/samtools_stats_out1__sn.tab Tue Sep 28 16:17:39 2021 +0000 @@ -1,5 +1,5 @@ # Summary Numbers. -raw total sequences: 200 +raw total sequences: 200 # excluding supplementary and secondary reads filtered sequences: 0 sequences: 200 is sorted: 1 @@ -14,6 +14,7 @@ reads MQ0: 6 # mapped and MQ=0 reads QC failed: 0 non-primary alignments: 0 +supplementary alignments: 0 total length: 50200 # ignores clipping total first fragment length: 25100 # ignores clipping total last fragment length: 25100 # ignores clipping