annotate samtools_sort_transcriptome/samtools_sort.xml @ 4:9445a308fb7e draft

Uploaded
author jackcurragh
date Tue, 07 Jun 2022 11:33:01 +0000
parents 0421e96d203a
children 21eceda8a1ae
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
1 <tool id="samtools_transcriptome_sort" name="Samtools sort" version="2.0.5" profile="@PROFILE@">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
2 <description>Sort Alignment Files (SAM/BAM)</description>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
3 <macros>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
4 <import>macros.xml</import>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
5 </macros>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
6 <expand macro="requirements"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
7 <expand macro="stdio"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
8 <expand macro="version_command"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
9 <command><![CDATA[
4
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
10 samtools sort
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
11 ###if str(compression):
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
12 ## -l '$compression'
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
13 ###end if
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
14 $prim_key_cond.prim_key_select
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
15 #if $prim_key_cond.prim_key_select == '-t':
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
16 $prim_key_cond.tag
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
17 $prim_key_cond.sec_key_select
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
18 #end if
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
19 $minhash
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
20 -O bam
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
21 -T "\${TMPDIR:-.}"
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
22 '${input1}'
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
23 > '${output1}'
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
24 ]]>
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
25 <!-- <![CDATA[
3
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
26 @ADDTHREADS@
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
27 @ADDMEMORY@
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
28 samtools sort
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
29 -@ \$addthreads
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
30 -m \$addmemory"M"
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
31 ###if str(compression):
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
32 ## -l '$compression'
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
33 ###end if
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
34 $prim_key_cond.prim_key_select
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
35 #if $prim_key_cond.prim_key_select == '-t':
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
36 $prim_key_cond.tag
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
37 $prim_key_cond.sec_key_select
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
38 #end if
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
39 $minhash
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
40 -O bam
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
41 -T "\${TMPDIR:-.}"
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
42 '${input1}'
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
43 > '${output1}'
4
9445a308fb7e Uploaded
jackcurragh
parents: 3
diff changeset
44 ]]>--></command>
3
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
45 <inputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
46 <param name="input1" type="data" format="sam,unsorted.bam,cram" label="BAM File" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
47 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
48 <param name="prim_key_select" type="select" label="Primary sort key">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
49 <option value="">coordinate</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
50 <option value="-n" selected="True">name (-n)</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
51 <option value="-t">tag (-t)</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
52 </param>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
53 <when value=""/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
54 <when value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
55 <when value="-t">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
56 <param name="tag" type="text" optional="false" argument="-t" label="Alignment tag"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
57 <param name="sec_key_select" type="select" label="Secondary sort key">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
58 <option value="">coordinate</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
59 <option value="-n">name (-n)</option>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
60 </param>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
61 </when>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
62 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
63 <param name="minhash" type="boolean" argument="-M" truevalue="-M" falsevalue="" checked="false" label="Minhash collation" help="Use minimiser for clustering unaligned/unplaced reads."/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
64 <!--<param name="compression" type="integer" argument="-l" optional="True" min="0" max="9" label="compression level" help="0 (uncompressed) to 9 (best)"/>-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
65 </inputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
66 <outputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
67 <data name="output1" format="bam">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
68 <change_format>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
69 <when input="prim_key_cond.prim_key_select" value="" format="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
70 <when input="prim_key_cond.prim_key_select" value="-n" format="qname_sorted.bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
71 <when input="prim_key_cond.prim_key_select" value="-t" format="unsorted.bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
72 </change_format>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
73 </data>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
74 </outputs>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
75 <tests>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
76 <!-- tests from https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2464 -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
77 <!-- 1) # Pos sort -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
78 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
79 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
80 <output name="output1" file="pos.sort.expected.bam" ftype="bam" lines_diff="4" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
81 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
82 <!-- test_cmd($opts, out=>"sort/pos.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} $$opts{path}/dat/test_input_1_a.bam -O SAM -o -"); -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
83 <!-- 2) # Name sort -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
84 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
85 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
86 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
87 <param name="prim_key_select" value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
88 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
89 <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
90 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
91 <!-- test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
92 <!-- 3) # Tag sort (RG) (considers output and name sorted) -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
93 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
94 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
95 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
96 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
97 <param name="tag" value="RG"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
98 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
99 <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
100 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
101 <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
102 <!-- 4) # Tag sort (RG); secondary by name -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
103 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
104 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
105 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
106 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
107 <param name="tag" value="RG"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
108 <param name="sec_key_select" value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
109 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
110 <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
111 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
112 <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
113 <!-- 5) # Tag sort (AS) -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
114 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
115 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
116 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
117 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
118 <param name="tag" value="AS"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
119 <param name="sec_key_select" value=""/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
120 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
121 <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
122 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
123 <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
124 <!-- 6) # Tag sort (FI) -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
125 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
126 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
127 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
128 <param name="prim_key_select" value="-t"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
129 <param name="tag" value="FI"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
130 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
131 <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
132 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
133 <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
134 <!-- 7) tests from old version -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
135 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
136 <param name="input1" value="1.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
137 <output name="output1" file="1_sort.bam" ftype="bam" sort="True" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
138 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
139 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
140 <param name="input1" value="1.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
141 <conditional name="prim_key_cond">
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
142 <param name="prim_key_select" value="-n"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
143 </conditional>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
144 <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam" lines_diff="4"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
145 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
146 <!-- 8) test minhash sorting -->
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
147 <test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
148 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
149 <param name="minhash" value="true" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
150 <output name="output1" file="minhash.expected.bam" ftype="bam" lines_diff="4" />
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
151 </test>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
152 </tests>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
153 <help>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
154 **What it does**
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
155
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
156 Sort alignments by leftmost coordinates, or by read name when -n is used.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
157 An appropriate @HD-SO sort order header tag will be added or an existing
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
158 one updated if necessary.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
159
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
160 **Ordering Rules**
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
161
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
162 The following rules are used for ordering records.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
163
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
164 If option -t is in use, records are first sorted by the value of the given
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
165 alignment tag, and then by position or name (if using -n). For example, “-t RG”
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
166 will make read group the primary sort key. The rules for ordering by tag are:
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
167
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
168 - Records that do not have the tag are sorted before ones that do.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
169 - If the types of the tags are different, they will be sorted so that single
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
170 character tags (type A) come before array tags (type B), then string tags
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
171 (types H and Z), then numeric tags (types f and i).
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
172 - Numeric tags (types f and i) are compared by value. Note that comparisons of
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
173 floating-point values are subject to issues of rounding and precision.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
174 - String tags (types H and Z) are compared based on the binary contents of the
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
175 tag using the C strcmp(3) function.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
176 - Character tags (type A) are compared by binary character value.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
177 - No attempt is made to compare tags of other types — notably type B array values will not be compared.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
178
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
179 When the -n option is present, records are sorted by name. Names are compared so as to give a “natural” ordering — i.e. sections consisting of digits are compared numerically while all other sections are compared based on their binary representation. This means “a1” will come before “b1” and “a9” will come before “a10”. Records with the same name will be ordered according to the values of the READ1 and READ2 flags (see flags).
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
180
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
181 When the -n option is not present, reads are sorted by reference (according to the order of the @SQ header records), then by position in the reference, and then by the REVERSE flag.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
182
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
183 This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
184
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
185 When the -M (minash collation) option is present, then samtools sort groups unmapped reads with similar sequence together. This can sometimes significantly reduce the file size.
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
186
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
187 </help>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
188 <expand macro="citations"/>
0421e96d203a Uploaded
jackcurragh
parents:
diff changeset
189 </tool>