annotate samtools_sort_transcriptome/samtools_sort.xml @ 5:21eceda8a1ae draft default tip

Uploaded
author jackcurragh
date Tue, 07 Jun 2022 12:32:37 +0000
parents 9445a308fb7e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
21eceda8a1ae Uploaded
jackcurragh
parents: 4
diff changeset
1 <tool id="samtools_transcriptome_sort" name="Samtools sort" version="2.0.6" profile="@PROFILE@">
3
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
2 <description>Sort Alignment Files (SAM/BAM)</description>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
3 <macros>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
4 <import>macros.xml</import>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
5 </macros>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
6 <expand macro="requirements"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
7 <expand macro="stdio"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
8 <expand macro="version_command"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
9 <command><![CDATA[
4
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
10 samtools sort
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
11 ###if str(compression):
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
12 ## -l '$compression'
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
13 ###end if
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
14 $prim_key_cond.prim_key_select
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
15 #if $prim_key_cond.prim_key_select == '-t':
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
16 $prim_key_cond.tag
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
17 $prim_key_cond.sec_key_select
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
18 #end if
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
19 $minhash
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
20 -O bam
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
21 -T "\${TMPDIR:-.}"
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
22 '${input1}'
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
23 > '${output1}'
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
24 ]]>
5
21eceda8a1ae Uploaded
jackcurragh
parents: 4
diff changeset
25 </command>
3
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
26 <inputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
27 <param name="input1" type="data" format="sam,unsorted.bam,cram" label="BAM File" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
28 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
29 <param name="prim_key_select" type="select" label="Primary sort key">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
30 <option value="">coordinate</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
31 <option value="-n" selected="True">name (-n)</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
32 <option value="-t">tag (-t)</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
33 </param>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
34 <when value=""/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
35 <when value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
36 <when value="-t">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
37 <param name="tag" type="text" optional="false" argument="-t" label="Alignment tag"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
38 <param name="sec_key_select" type="select" label="Secondary sort key">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
39 <option value="">coordinate</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
40 <option value="-n">name (-n)</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
41 </param>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
42 </when>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
43 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
44 <param name="minhash" type="boolean" argument="-M" truevalue="-M" falsevalue="" checked="false" label="Minhash collation" help="Use minimiser for clustering unaligned/unplaced reads."/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
45 <!--<param name="compression" type="integer" argument="-l" optional="True" min="0" max="9" label="compression level" help="0 (uncompressed) to 9 (best)"/>-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
46 </inputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
47 <outputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
48 <data name="output1" format="bam">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
49 <change_format>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
50 <when input="prim_key_cond.prim_key_select" value="" format="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
51 <when input="prim_key_cond.prim_key_select" value="-n" format="qname_sorted.bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
52 <when input="prim_key_cond.prim_key_select" value="-t" format="unsorted.bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
53 </change_format>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
54 </data>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
55 </outputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
56 <tests>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
57 <!-- tests from https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2464 -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
58 <!-- 1) # Pos sort -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
59 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
60 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
61 <output name="output1" file="pos.sort.expected.bam" ftype="bam" lines_diff="4" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
62 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
63 <!-- test_cmd($opts, out=>"sort/pos.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} $$opts{path}/dat/test_input_1_a.bam -O SAM -o -"); -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
64 <!-- 2) # Name sort -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
65 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
66 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
67 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
68 <param name="prim_key_select" value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
69 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
70 <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
71 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
72 <!-- test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
73 <!-- 3) # Tag sort (RG) (considers output and name sorted) -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
74 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
75 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
76 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
77 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
78 <param name="tag" value="RG"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
79 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
80 <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
81 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
82 <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
83 <!-- 4) # Tag sort (RG); secondary by name -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
84 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
85 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
86 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
87 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
88 <param name="tag" value="RG"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
89 <param name="sec_key_select" value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
90 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
91 <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
92 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
93 <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
94 <!-- 5) # Tag sort (AS) -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
95 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
96 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
97 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
98 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
99 <param name="tag" value="AS"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
100 <param name="sec_key_select" value=""/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
101 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
102 <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
103 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
104 <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
105 <!-- 6) # Tag sort (FI) -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
106 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
107 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
108 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
109 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
110 <param name="tag" value="FI"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
111 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
112 <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
113 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
114 <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
115 <!-- 7) tests from old version -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
116 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
117 <param name="input1" value="1.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
118 <output name="output1" file="1_sort.bam" ftype="bam" sort="True" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
119 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
120 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
121 <param name="input1" value="1.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
122 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
123 <param name="prim_key_select" value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
124 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
125 <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
126 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
127 <!-- 8) test minhash sorting -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
128 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
129 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
130 <param name="minhash" value="true" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
131 <output name="output1" file="minhash.expected.bam" ftype="bam" lines_diff="4" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
132 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
133 </tests>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
134 <help>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
135 **What it does**
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
136
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
137 Sort alignments by leftmost coordinates, or by read name when -n is used.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
138 An appropriate @HD-SO sort order header tag will be added or an existing
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
139 one updated if necessary.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
140
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
141 **Ordering Rules**
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
142
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
143 The following rules are used for ordering records.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
144
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
145 If option -t is in use, records are first sorted by the value of the given
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
146 alignment tag, and then by position or name (if using -n). For example, “-t RG”
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
147 will make read group the primary sort key. The rules for ordering by tag are:
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
148
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
149 - Records that do not have the tag are sorted before ones that do.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
150 - If the types of the tags are different, they will be sorted so that single
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
151 character tags (type A) come before array tags (type B), then string tags
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
152 (types H and Z), then numeric tags (types f and i).
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
153 - Numeric tags (types f and i) are compared by value. Note that comparisons of
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
154 floating-point values are subject to issues of rounding and precision.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
155 - String tags (types H and Z) are compared based on the binary contents of the
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
156 tag using the C strcmp(3) function.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
157 - Character tags (type A) are compared by binary character value.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
158 - No attempt is made to compare tags of other types — notably type B array values will not be compared.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
159
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
160 When the -n option is present, records are sorted by name. Names are compared so as to give a “natural” ordering — i.e. sections consisting of digits are compared numerically while all other sections are compared based on their binary representation. This means “a1” will come before “b1” and “a9” will come before “a10”. Records with the same name will be ordered according to the values of the READ1 and READ2 flags (see flags).
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
161
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
162 When the -n option is not present, reads are sorted by reference (according to the order of the @SQ header records), then by position in the reference, and then by the REVERSE flag.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
163
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
164 This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
165
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
166 When the -M (minash collation) option is present, then samtools sort groups unmapped reads with similar sequence together. This can sometimes significantly reduce the file size.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
167
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
168 </help>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
169 <expand macro="citations"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
170 </tool>