annotate samtools_sort_genome/samtools_sort.xml @ 2:e25b7da5ecaa draft default tip

Uploaded
author jackcurragh
date Tue, 07 Jun 2022 17:25:56 +0000
parents cdb826d5314b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
e25b7da5ecaa Uploaded
jackcurragh
parents: 1
diff changeset
1 <tool id="samtools_sort" name="Samtools Sort" version="2.0.5" profile="@PROFILE@">
1
cdb826d5314b Uploaded
jackcurragh
parents: 0
diff changeset
2 <description>Sort Alignment Files (SAM/BAM)</description>
0
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
3 <macros>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
4 <import>macros.xml</import>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
5 </macros>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
6 <expand macro="requirements"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
7 <expand macro="stdio"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
8 <expand macro="version_command"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
9 <command><![CDATA[
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
10 samtools sort
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
11 ###if str(compression):
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
12 ## -l '$compression'
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
13 ###end if
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
14 $prim_key_cond.prim_key_select
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
15 #if $prim_key_cond.prim_key_select == '-t':
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
16 $prim_key_cond.tag
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
17 $prim_key_cond.sec_key_select
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
18 #end if
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
19 $minhash
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
20 -O bam
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
21 -T "\${TMPDIR:-.}"
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
22 '${input1}'
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
23 > '${output1}'
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
24 ]]></command>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
25 <inputs>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
26 <param name="input1" type="data" format="sam,unsorted.bam,cram" label="BAM File" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
27 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
28 <param name="prim_key_select" type="select" label="Primary sort key">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
29 <option value="" selected="True">coordinate</option>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
30 <option value="-n">name (-n)</option>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
31 <option value="-t">tag (-t)</option>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
32 </param>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
33 <when value=""/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
34 <when value="-n"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
35 <when value="-t">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
36 <param name="tag" type="text" optional="false" argument="-t" label="Alignment tag"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
37 <param name="sec_key_select" type="select" label="Secondary sort key">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
38 <option value="">coordinate</option>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
39 <option value="-n">name (-n)</option>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
40 </param>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
41 </when>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
42 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
43 <param name="minhash" type="boolean" argument="-M" truevalue="-M" falsevalue="" checked="false" label="Minhash collation" help="Use minimiser for clustering unaligned/unplaced reads."/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
44 <!--<param name="compression" type="integer" argument="-l" optional="True" min="0" max="9" label="compression level" help="0 (uncompressed) to 9 (best)"/>-->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
45 </inputs>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
46 <outputs>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
47 <data name="output1" format="bam">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
48 <change_format>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
49 <when input="prim_key_cond.prim_key_select" value="" format="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
50 <when input="prim_key_cond.prim_key_select" value="-n" format="qname_sorted.bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
51 <when input="prim_key_cond.prim_key_select" value="-t" format="unsorted.bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
52 </change_format>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
53 </data>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
54 </outputs>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
55 <tests>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
56 <!-- tests from https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2464 -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
57 <!-- 1) # Pos sort -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
58 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
59 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
60 <output name="output1" file="pos.sort.expected.bam" ftype="bam" lines_diff="4" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
61 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
62 <!-- test_cmd($opts, out=>"sort/pos.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} $$opts{path}/dat/test_input_1_a.bam -O SAM -o -"); -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
63 <!-- 2) # Name sort -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
64 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
65 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
66 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
67 <param name="prim_key_select" value="-n"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
68 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
69 <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
70 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
71 <!-- test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
72 <!-- 3) # Tag sort (RG) (considers output and name sorted) -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
73 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
74 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
75 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
76 <param name="prim_key_select" value="-t"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
77 <param name="tag" value="RG"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
78 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
79 <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
80 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
81 <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
82 <!-- 4) # Tag sort (RG); secondary by name -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
83 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
84 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
85 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
86 <param name="prim_key_select" value="-t"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
87 <param name="tag" value="RG"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
88 <param name="sec_key_select" value="-n"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
89 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
90 <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
91 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
92 <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
93 <!-- 5) # Tag sort (AS) -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
94 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
95 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
96 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
97 <param name="prim_key_select" value="-t"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
98 <param name="tag" value="AS"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
99 <param name="sec_key_select" value=""/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
100 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
101 <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
102 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
103 <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
104 <!-- 6) # Tag sort (FI) -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
105 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
106 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
107 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
108 <param name="prim_key_select" value="-t"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
109 <param name="tag" value="FI"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
110 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
111 <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
112 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
113 <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
114 <!-- 7) tests from old version -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
115 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
116 <param name="input1" value="1.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
117 <output name="output1" file="1_sort.bam" ftype="bam" sort="True" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
118 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
119 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
120 <param name="input1" value="1.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
121 <conditional name="prim_key_cond">
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
122 <param name="prim_key_select" value="-n"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
123 </conditional>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
124 <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam" lines_diff="4"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
125 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
126 <!-- 8) test minhash sorting -->
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
127 <test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
128 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
129 <param name="minhash" value="true" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
130 <output name="output1" file="minhash.expected.bam" ftype="bam" lines_diff="4" />
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
131 </test>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
132 </tests>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
133 <help>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
134 **What it does**
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
135
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
136 Sort alignments by leftmost coordinates, or by read name when -n is used.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
137 An appropriate @HD-SO sort order header tag will be added or an existing
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
138 one updated if necessary.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
139
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
140 **Ordering Rules**
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
141
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
142 The following rules are used for ordering records.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
143
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
144 If option -t is in use, records are first sorted by the value of the given
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
145 alignment tag, and then by position or name (if using -n). For example, “-t RG”
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
146 will make read group the primary sort key. The rules for ordering by tag are:
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
147
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
148 - Records that do not have the tag are sorted before ones that do.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
149 - If the types of the tags are different, they will be sorted so that single
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
150 character tags (type A) come before array tags (type B), then string tags
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
151 (types H and Z), then numeric tags (types f and i).
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
152 - Numeric tags (types f and i) are compared by value. Note that comparisons of
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
153 floating-point values are subject to issues of rounding and precision.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
154 - String tags (types H and Z) are compared based on the binary contents of the
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
155 tag using the C strcmp(3) function.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
156 - Character tags (type A) are compared by binary character value.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
157 - No attempt is made to compare tags of other types — notably type B array values will not be compared.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
158
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
159 When the -n option is present, records are sorted by name. Names are compared so as to give a “natural” ordering — i.e. sections consisting of digits are compared numerically while all other sections are compared based on their binary representation. This means “a1” will come before “b1” and “a9” will come before “a10”. Records with the same name will be ordered according to the values of the READ1 and READ2 flags (see flags).
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
160
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
161 When the -n option is not present, reads are sorted by reference (according to the order of the @SQ header records), then by position in the reference, and then by the REVERSE flag.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
162
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
163 This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
164
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
165 When the -M (minash collation) option is present, then samtools sort groups unmapped reads with similar sequence together. This can sometimes significantly reduce the file size.
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
166
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
167 </help>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
168 <expand macro="citations"/>
76a4f74f3c09 Uploaded
jackcurragh
parents:
diff changeset
169 </tool>