changeset 0:76a4f74f3c09 draft

Uploaded
author jackcurragh
date Wed, 23 Mar 2022 12:47:57 +0000
parents
children cdb826d5314b
files samtools_sort_genome/macros.xml samtools_sort_genome/samtools_sort.xml samtools_sort_genome/test-data/1.bam samtools_sort_genome/test-data/1_sort.bam samtools_sort_genome/test-data/1_sort_read_names.bam samtools_sort_genome/test-data/minhash.expected.bam samtools_sort_genome/test-data/name.sort.expected.bam samtools_sort_genome/test-data/name.sort.expected.sam samtools_sort_genome/test-data/pos.sort.expected.bam samtools_sort_genome/test-data/pos.sort.expected.sam samtools_sort_genome/test-data/tag.as.sort.expected.bam samtools_sort_genome/test-data/tag.as.sort.expected.sam samtools_sort_genome/test-data/tag.fi.sort.expected.bam samtools_sort_genome/test-data/tag.fi.sort.expected.sam samtools_sort_genome/test-data/tag.rg.n.sort.expected.bam samtools_sort_genome/test-data/tag.rg.n.sort.expected.sam samtools_sort_genome/test-data/tag.rg.sort.expected.bam samtools_sort_genome/test-data/tag.rg.sort.expected.sam samtools_sort_genome/test-data/test_input_1_a.bam
diffstat 19 files changed, 556 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/macros.xml	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,223 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">samtools</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">1.13</token>
+    <token name="@PROFILE@">20.05</token>
+    <token name="@FLAGS@"><![CDATA[
+        #set $flags = 0
+        #if $filter
+            #set $flags = sum(map(int, str($filter).split(',')))
+        #end if
+    ]]></token>
+    <token name="@PREPARE_IDX@"><![CDATA[
+        ##prepare input and indices
+        ln -s '$input' infile &&
+        #if $input.is_of_type('bam'):
+            #if str( $input.metadata.bam_index ) != "None":
+                ln -s '${input.metadata.bam_index}' infile.bai &&
+            #else:
+                samtools index infile infile.bai &&
+            #end if
+        #elif $input.is_of_type('cram'):
+            #if str( $input.metadata.cram_index ) != "None":
+                ln -s '${input.metadata.cram_index}' infile.crai &&
+            #else:
+                samtools index infile infile.crai &&
+            #end if
+        #end if
+    ]]></token>
+    <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[
+        ##prepare input and indices
+        #for $i, $bam in enumerate( $input_bams ):
+            ln -s '$bam' '${i}' &&
+            #if $bam.is_of_type('bam'):
+                #if str( $bam.metadata.bam_index ) != "None":
+                    ln -s '${bam.metadata.bam_index}' '${i}.bai' &&
+                #else:
+                    samtools index '${i}' '${i}.bai' &&
+                #end if
+            #elif $bam.is_of_type('cram'):
+                #if str( $bam.metadata.cram_index ) != "None":
+                    ln -s '${bam.metadata.cram_index}' '${i}.crai' &&
+                #else:
+                    samtools index '${i}' '${i}.crai' &&
+                #end if
+            #end if
+        #end for
+    ]]></token>
+    <token name="@PREPARE_FASTA_IDX@"><![CDATA[
+        ##checks for reference data ($addref_cond.addref_select=="history" or =="cached")
+        ##and sets the -t/-T parameters accordingly:
+        ##- in case of history a symbolic link is used because samtools (view) will generate
+        ##  the index which might not be possible in the directory containing the fasta file
+        ##- in case of cached the absolute path is used which allows to read the cram file
+        ##  without specifying the reference
+        #if $addref_cond.addref_select == "history":
+            ln -s '${addref_cond.ref}' reference.fa &&
+            samtools faidx reference.fa &&
+            #set reffa="reference.fa"
+            #set reffai="reference.fa.fai"
+        #elif $addref_cond.addref_select == "cached":
+            #set reffa=str($addref_cond.ref.fields.path)
+            #set reffai=str($addref_cond.ref.fields.path)+".fai"
+        #else
+            #set reffa=None
+            #set reffai=None
+        #end if
+    ]]></token>
+
+    <xml name="optional_reference">
+        <conditional name="addref_cond">
+            <param name="addref_select" type="select" label="Use a reference sequence">
+                <help>@HELP@</help>
+                <option value="no">No</option>
+                <option value="history">Use a genome/index from the history</option>
+                <option value="cached">Use a built-in genome</option>
+            </param>
+            <when value="no"/>
+            <when value="history">
+                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
+            </when>
+            <when value="cached">
+                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
+                    </options>
+                    <validator  type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="mandatory_reference" token_help="" token_argument="">
+        <conditional name="addref_cond">
+            <param name="addref_select" type="select" label="Use a reference sequence">
+                <help>@HELP@</help>
+                <option value="history">Use a genome/index from the history</option>
+                <option value="cached">Use a built-in genome</option>
+            </param>
+            <when value="history">
+                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
+            </when>
+            <when value="cached">
+                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
+                        <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+
+    <token name="@ADDTHREADS@"><![CDATA[
+        ##compute the number of ADDITIONAL threads to be used by samtools (-@)
+        addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
+    ]]></token>
+    <token name="@ADDMEMORY@"><![CDATA[
+        ##compute the number of memory available to samtools sort (-m)
+        ##use only 75% of available: https://github.com/samtools/samtools/issues/831
+        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
+        ((addmemory=addmemory*75/100)) &&
+    ]]></token>
+    <xml name="seed_input">
+       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
+    </xml>
+    <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false">
+        <option value="1" selected="@S1@">Read is paired</option>
+        <option value="2" selected="@S2@">Read is mapped in a proper pair</option>
+        <option value="4" selected="@S4@">Read is unmapped</option>
+        <option value="8" selected="@S8@">Mate is unmapped</option>
+        <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option>
+        <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option>
+        <option value="64" selected="@S64@">Read is the first in a pair</option>
+        <option value="128" selected="@S128@">Read is the second in a pair</option>
+        <option value="256" selected="@S256@">Alignment of the read is not primary</option>
+        <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option>
+        <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option>
+        <option value="2048" selected="@S2048@">Alignment is supplementary</option>
+    </xml>
+
+    <!-- region specification macros and tokens for tools that allow the specification
+         of region by bed file / space separated list of regions -->
+    <token name="@REGIONS_FILE@"><![CDATA[
+        #if $cond_region.select_region == 'tab':
+            -t '$cond_region.targetregions'
+        #end if
+    ]]></token>
+    <token name="@REGIONS_MANUAL@"><![CDATA[
+        #if $cond_region.select_region == 'text':
+            #for $i, $x in enumerate($cond_region.regions_repeat):
+               '${x.region}'
+            #end for
+        #end if
+    ]]></token>
+    <xml name="regions_macro">
+        <conditional name="cond_region">
+            <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)">
+                <option value="no" selected="True">No</option>
+                <option value="text">Manualy specify regions</option>
+                <option value="tab">Regions from tabular file</option>
+            </param>
+            <when value="no"/>
+            <when value="text">
+                <repeat name="regions_repeat" min="1" default="1" title="Regions">
+                    <param name="region" type="text" label="region" help="format chr:from-to">
+                        <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\s'\":]+(:\d+(-\d+){0,1}){0,1}$</validator>
+                    </param>
+                </repeat>
+            </when>
+            <when value="tab">
+                <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" />
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @misc{SAM_def,
+                title={Definition of SAM/BAM format},
+                url = {https://samtools.github.io/hts-specs/},}
+            </citation>
+            <citation type="doi">10.1093/bioinformatics/btp352</citation>
+            <citation type="doi">10.1093/bioinformatics/btr076</citation>
+            <citation type="doi">10.1093/bioinformatics/btr509</citation>
+            <citation type="bibtex">
+                @misc{Danecek_et_al,
+                Author={Danecek, P., Schiffels, S., Durbin, R.},
+                title={Multiallelic calling model in bcftools (-m)},
+                url = {http://samtools.github.io/bcftools/call-m.pdf},}
+            </citation>
+            <citation type="bibtex">
+                @misc{Durbin_VCQC,
+                Author={Durbin, R.},
+                title={Segregation based metric for variant call QC},
+                url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}
+            </citation>
+            <citation type="bibtex">
+                @misc{Li_SamMath,
+                Author={Li, H.},
+                title={Mathematical Notes on SAMtools Algorithms},
+                url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}
+            </citation>
+            <citation type="bibtex">
+                @misc{SamTools_github,
+                title={SAMTools GitHub page},
+                url = {https://github.com/samtools/samtools},}
+            </citation>
+        </citations>
+    </xml>
+    <xml name="version_command">
+        <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" description="Error" />
+        </stdio>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/samtools_sort.xml	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,173 @@
+<tool id="samtools_sort" name="Samtools sort" version="2.0.4" profile="@PROFILE@">
+    <description>order of storing aligned sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command><![CDATA[
+        @ADDTHREADS@
+	@ADDMEMORY@
+        samtools sort
+            -@ \$addthreads
+            -m \$addmemory"M"
+            ###if str(compression):
+            ##    -l '$compression'
+            ###end if
+            $prim_key_cond.prim_key_select
+            #if $prim_key_cond.prim_key_select == '-t':
+                $prim_key_cond.tag
+                $prim_key_cond.sec_key_select
+            #end if
+            $minhash
+            -O bam
+            -T "\${TMPDIR:-.}"
+            '${input1}'
+             > '${output1}'
+    ]]></command>
+    <inputs>
+        <param name="input1" type="data" format="sam,unsorted.bam,cram" label="BAM File" />
+        <conditional name="prim_key_cond">
+            <param name="prim_key_select" type="select" label="Primary sort key">
+                <option value="" selected="True">coordinate</option>
+                <option value="-n">name (-n)</option>
+                <option value="-t">tag (-t)</option>
+            </param>
+            <when value=""/>
+            <when value="-n"/>
+            <when value="-t">
+                <param name="tag" type="text" optional="false" argument="-t" label="Alignment tag"/>
+                <param name="sec_key_select" type="select" label="Secondary sort key">
+                    <option value="">coordinate</option>
+                    <option value="-n">name (-n)</option>
+                </param>
+           </when>
+        </conditional>
+        <param name="minhash" type="boolean" argument="-M" truevalue="-M" falsevalue="" checked="false" label="Minhash collation" help="Use minimiser for clustering unaligned/unplaced reads."/>
+        <!--<param name="compression" type="integer" argument="-l" optional="True" min="0" max="9" label="compression level" help="0 (uncompressed) to 9 (best)"/>-->
+    </inputs>
+    <outputs>
+       <data name="output1" format="bam">
+            <change_format>
+                <when input="prim_key_cond.prim_key_select" value="" format="bam" />
+                <when input="prim_key_cond.prim_key_select" value="-n" format="qname_sorted.bam" />
+                <when input="prim_key_cond.prim_key_select" value="-t" format="unsorted.bam" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <!-- tests from https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2464 -->
+        <!-- 1) # Pos sort -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <output name="output1" file="pos.sort.expected.bam" ftype="bam" lines_diff="4" />
+        </test>
+    	<!-- test_cmd($opts, out=>"sort/pos.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads}  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -"); -->
+        <!-- 2) # Name sort -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <conditional name="prim_key_cond">
+                <param name="prim_key_select" value="-n"/>
+            </conditional>
+            <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam" lines_diff="4"/>
+        </test>
+	    <!-- test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
+        <!-- 3) # Tag sort (RG) (considers output and name sorted) -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <conditional name="prim_key_cond">
+                <param name="prim_key_select" value="-t"/>
+                <param name="tag" value="RG"/>
+            </conditional>
+            <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
+        </test>
+        <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
+        <!-- 4) # Tag sort (RG); secondary by name -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <conditional name="prim_key_cond">
+                <param name="prim_key_select" value="-t"/>
+                <param name="tag" value="RG"/>
+                <param name="sec_key_select" value="-n"/>
+            </conditional>
+            <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
+        </test>
+        <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
+        <!-- 5) # Tag sort (AS) -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <conditional name="prim_key_cond">
+                <param name="prim_key_select" value="-t"/>
+                <param name="tag" value="AS"/>
+                <param name="sec_key_select" value=""/>
+            </conditional>
+            <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
+        </test>
+        <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
+        <!-- 6) # Tag sort (FI) -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <conditional name="prim_key_cond">
+                <param name="prim_key_select" value="-t"/>
+                <param name="tag" value="FI"/>
+            </conditional>
+            <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
+        </test>
+        <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
+        <!-- 7) tests from old version -->
+        <test>
+            <param name="input1" value="1.bam" ftype="bam" />
+            <output name="output1" file="1_sort.bam" ftype="bam" sort="True" lines_diff="4"/>
+        </test>
+        <test>
+            <param name="input1" value="1.bam" ftype="bam" />
+            <conditional name="prim_key_cond">
+                <param name="prim_key_select" value="-n"/>
+            </conditional>
+            <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam" lines_diff="4"/>
+        </test>
+        <!-- 8) test minhash sorting -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <param name="minhash" value="true" />
+            <output name="output1" file="minhash.expected.bam" ftype="bam" lines_diff="4" />
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Sort alignments by leftmost coordinates, or by read name when -n is used.
+An appropriate @HD-SO sort order header tag will be added or an existing
+one updated if necessary.
+
+**Ordering Rules**
+
+The following rules are used for ordering records.
+
+If option -t is in use, records are first sorted by the value of the given
+alignment tag, and then by position or name (if using -n). For example, “-t RG”
+will make read group the primary sort key. The rules for ordering by tag are:
+
+- Records that do not have the tag are sorted before ones that do.
+- If the types of the tags are different, they will be sorted so that single
+  character tags (type A) come before array tags (type B), then string tags
+  (types H and Z), then numeric tags (types f and i).
+- Numeric tags (types f and i) are compared by value. Note that comparisons of
+  floating-point values are subject to issues of rounding and precision.
+- String tags (types H and Z) are compared based on the binary contents of the
+  tag using the C strcmp(3) function.
+- Character tags (type A) are compared by binary character value.
+- No attempt is made to compare tags of other types — notably type B array values will not be compared.
+
+When the -n option is present, records are sorted by name. Names are compared so as to give a “natural” ordering — i.e. sections consisting of digits are compared numerically while all other sections are compared based on their binary representation. This means “a1” will come before “b1” and “a9” will come before “a10”. Records with the same name will be ordered according to the values of the READ1 and READ2 flags (see flags).
+
+When the -n option is not present, reads are sorted by reference (according to the order of the @SQ header records), then by position in the reference, and then by the REVERSE flag.
+
+This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
+
+When the -M (minash collation) option is present, then samtools sort groups unmapped reads with similar sequence together. This can sometimes significantly reduce the file size.
+
+    </help>
+    <expand macro="citations"/>
+</tool>
Binary file samtools_sort_genome/test-data/1.bam has changed
Binary file samtools_sort_genome/test-data/1_sort.bam has changed
Binary file samtools_sort_genome/test-data/1_sort_read_names.bam has changed
Binary file samtools_sort_genome/test-data/minhash.expected.bam has changed
Binary file samtools_sort_genome/test-data/name.sort.expected.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/test-data/name.sort.expected.sam	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,28 @@
+@HD	VN:1.4	SO:queryname
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@RG	ID:fish	PG:donkey
+@RG	ID:cow	PU:13_&^&&*(:332
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@PG	ID:moose
+@PG	PP:moose	ID:cow
+@CO	
+r000	99	insert	50	30	10M	=	80	30	ATTTAGCTAC	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r000	211	insert	80	30	10M	=	50	-30	CCCAATCATT	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r001	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:fish	PG:Z:colt
+r001	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:fish	PG:Z:colt
+r002	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	PG:Z:colt
+r003	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:cow
+r003	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:cow	PG:Z:colt
+r004	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:colt
+u1	4	*	0	30	23M	*	0	0	TAATTAAGTCTACAGAAAAAAAA	???????????????????????
+x1	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:colt	PG:Z:bull
+x2	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:colt	PG:Z:bull
+x3	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:fish	PG:Z:bull
+x4	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:fish	PG:Z:bull
+x5	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:fish	PG:Z:bull
+x6	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow
Binary file samtools_sort_genome/test-data/pos.sort.expected.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/test-data/pos.sort.expected.sam	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,28 @@
+@HD	VN:1.4	SO:coordinate
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@RG	ID:fish	PG:donkey
+@RG	ID:cow	PU:13_&^&&*(:332
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@PG	ID:moose
+@PG	PP:moose	ID:cow
+@CO	
+r000	99	insert	50	30	10M	=	80	30	ATTTAGCTAC	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r000	211	insert	80	30	10M	=	50	-30	CCCAATCATT	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r001	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:fish	PG:Z:colt
+r002	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	PG:Z:colt
+r003	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:cow
+r004	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:colt
+r003	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:cow	PG:Z:colt
+r001	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:fish	PG:Z:colt
+x1	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:colt	PG:Z:bull
+x2	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:colt	PG:Z:bull
+x3	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:fish	PG:Z:bull
+x4	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:fish	PG:Z:bull
+x5	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:fish	PG:Z:bull
+x6	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow
+u1	4	*	0	30	23M	*	0	0	TAATTAAGTCTACAGAAAAAAAA	???????????????????????
Binary file samtools_sort_genome/test-data/tag.as.sort.expected.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/test-data/tag.as.sort.expected.sam	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,24 @@
+@HD	VN:1.4	SO:unknown
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@PG	ID:llama
+@RG	ID:fish	PG:llama
+@RG	ID:cow	PU:13_&^&&*(:332	PG:donkey
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@CO	Do you know?
+r006	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:colt	PG:Z:donkey	FI:i:3
+x11	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:cow	PG:Z:bull	FI:Z:a
+r007	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:donkey	AS:i:-5	FI:f:3.5
+x10	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:cow	PG:Z:bull	AS:i:0	FI:A:b
+r007	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:colt	PG:Z:donkey	AS:i:1	FI:i:4
+r005	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:colt	PG:Z:donkey	AS:i:10	FI:i:5
+x8	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:cow	PG:Z:bull	AS:i:10	FI:f:1.5
+r006	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	RG:Z:colt	PG:Z:donkey	AS:i:20	FI:f:4.5
+x9	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:cow	PG:Z:bull	AS:i:20	FI:i:1
+x7	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:cow	PG:Z:bull	AS:i:50	FI:i:2
+r005	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:colt	PG:Z:donkey	AS:i:100	FI:f:2.5
+x12	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow	PG:Z:bull	AS:i:65100
Binary file samtools_sort_genome/test-data/tag.fi.sort.expected.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/test-data/tag.fi.sort.expected.sam	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,24 @@
+@HD	VN:1.4	SO:unknown
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@PG	ID:llama
+@RG	ID:fish	PG:llama
+@RG	ID:cow	PU:13_&^&&*(:332	PG:donkey
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@CO	Do you know?
+x12	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow	PG:Z:bull	AS:i:65100
+x10	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:cow	PG:Z:bull	AS:i:0	FI:A:b
+x11	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:cow	PG:Z:bull	FI:Z:a
+x9	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:cow	PG:Z:bull	AS:i:20	FI:i:1
+x8	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:cow	PG:Z:bull	AS:i:10	FI:f:1.5
+x7	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:cow	PG:Z:bull	AS:i:50	FI:i:2
+r005	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:colt	PG:Z:donkey	AS:i:100	FI:f:2.5
+r006	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:colt	PG:Z:donkey	FI:i:3
+r007	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:donkey	AS:i:-5	FI:f:3.5
+r007	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:colt	PG:Z:donkey	AS:i:1	FI:i:4
+r006	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	RG:Z:colt	PG:Z:donkey	AS:i:20	FI:f:4.5
+r005	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:colt	PG:Z:donkey	AS:i:10	FI:i:5
Binary file samtools_sort_genome/test-data/tag.rg.n.sort.expected.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/test-data/tag.rg.n.sort.expected.sam	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,28 @@
+@HD	VN:1.4	SO:unknown
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@RG	ID:fish	PG:donkey
+@RG	ID:cow	PU:13_&^&&*(:332
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@PG	ID:moose
+@PG	PP:moose	ID:cow
+@CO	
+r002	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	PG:Z:colt
+u1	4	*	0	30	23M	*	0	0	TAATTAAGTCTACAGAAAAAAAA	???????????????????????
+r004	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:colt
+x1	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:colt	PG:Z:bull
+x2	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:colt	PG:Z:bull
+r000	99	insert	50	30	10M	=	80	30	ATTTAGCTAC	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r000	211	insert	80	30	10M	=	50	-30	CCCAATCATT	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r003	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:cow
+r003	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:cow	PG:Z:colt
+x6	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow
+r001	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:fish	PG:Z:colt
+r001	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:fish	PG:Z:colt
+x3	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:fish	PG:Z:bull
+x4	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:fish	PG:Z:bull
+x5	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:fish	PG:Z:bull
Binary file samtools_sort_genome/test-data/tag.rg.sort.expected.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_sort_genome/test-data/tag.rg.sort.expected.sam	Wed Mar 23 12:47:57 2022 +0000
@@ -0,0 +1,28 @@
+@HD	VN:1.4	SO:unknown
+@SQ	SN:insert	LN:599
+@SQ	SN:ref1	LN:45
+@SQ	SN:ref2	LN:40
+@SQ	SN:ref3	LN:4
+@RG	ID:fish	PG:donkey
+@RG	ID:cow	PU:13_&^&&*(:332
+@RG	PU:*9u8jkjjkjd:	ID:colt
+@PG	ID:bull	PP:donkey
+@PG	ID:donkey
+@PG	ID:moose
+@PG	PP:moose	ID:cow
+@CO	
+r002	0	ref1	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*	XA:Z:abc	XB:i:-10	PG:Z:colt
+u1	4	*	0	30	23M	*	0	0	TAATTAAGTCTACAGAAAAAAAA	???????????????????????
+r004	0	ref1	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*	RG:Z:colt	PG:Z:colt
+x1	0	ref2	1	30	20M	*	0	0	AGGTTTTATAAAACAAATAA	*	RG:Z:colt	PG:Z:bull
+x2	0	ref2	2	30	21M	*	0	0	GGTTTTATAAAACAAATAATT	?????????????????????	RG:Z:colt	PG:Z:bull
+r000	99	insert	50	30	10M	=	80	30	ATTTAGCTAC	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r000	211	insert	80	30	10M	=	50	-30	CCCAATCATT	AAAAAAAAAA	RG:Z:cow	PG:Z:bull
+r003	0	ref1	9	30	5H6M	*	0	0	AGCTAA	*	RG:Z:cow
+r003	16	ref1	29	30	6H5M	*	0	0	TAGGC	*	RG:Z:cow	PG:Z:colt
+x6	0	ref2	14	30	23M	*	0	0	TAATTAAGTCTACAGAGCAACTA	???????????????????????	RG:Z:cow
+r001	163	ref1	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112	YY:i:100	RG:Z:fish	PG:Z:colt
+r001	83	ref1	37	30	9M	=	7	-39	CAGCGCCAT	*	RG:Z:fish	PG:Z:colt
+x3	0	ref2	6	30	9M4I13M	*	0	0	TTATAAAACAAATAATTAAGTCTACA	??????????????????????????	RG:Z:fish	PG:Z:bull
+x4	0	ref2	10	30	25M	*	0	0	CAAATAATTAAGTCTACAGAGCAAC	?????????????????????????	RG:Z:fish	PG:Z:bull
+x5	0	ref2	12	30	24M	*	0	0	AATAATTAAGTCTACAGAGCAACT	????????????????????????	RG:Z:fish	PG:Z:bull
Binary file samtools_sort_genome/test-data/test_input_1_a.bam has changed