annotate samtools_sort_transcriptome/samtools_sort.xml @ 0:a9b4a031a403 draft

Uploaded
author jackcurragh
date Wed, 23 Mar 2022 12:49:11 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
1 <tool id="samtools_sort" name="Samtools sort" version="2.0.4" profile="@PROFILE@">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
2 <description>order of storing aligned sequences</description>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
3 <macros>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
4 <import>macros.xml</import>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
5 </macros>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
6 <expand macro="requirements"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
7 <expand macro="stdio"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
8 <expand macro="version_command"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
9 <command><![CDATA[
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
10 @ADDTHREADS@
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
11 @ADDMEMORY@
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
12 samtools sort
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
13 -@ \$addthreads
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
14 -m \$addmemory"M"
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
15 ###if str(compression):
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
16 ## -l '$compression'
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
17 ###end if
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
18 $prim_key_cond.prim_key_select
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
19 #if $prim_key_cond.prim_key_select == '-t':
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
20 $prim_key_cond.tag
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
21 $prim_key_cond.sec_key_select
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
22 #end if
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
23 $minhash
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
24 -O bam
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
25 -T "\${TMPDIR:-.}"
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
26 '${input1}'
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
27 > '${output1}'
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
28 ]]></command>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
29 <inputs>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
30 <param name="input1" type="data" format="sam,unsorted.bam,cram" label="BAM File" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
31 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
32 <param name="prim_key_select" type="select" label="Primary sort key">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
33 <option value="">coordinate</option>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
34 <option value="-n" selected="True">name (-n)</option>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
35 <option value="-t">tag (-t)</option>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
36 </param>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
37 <when value=""/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
38 <when value="-n"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
39 <when value="-t">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
40 <param name="tag" type="text" optional="false" argument="-t" label="Alignment tag"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
41 <param name="sec_key_select" type="select" label="Secondary sort key">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
42 <option value="">coordinate</option>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
43 <option value="-n">name (-n)</option>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
44 </param>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
45 </when>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
46 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
47 <param name="minhash" type="boolean" argument="-M" truevalue="-M" falsevalue="" checked="false" label="Minhash collation" help="Use minimiser for clustering unaligned/unplaced reads."/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
48 <!--<param name="compression" type="integer" argument="-l" optional="True" min="0" max="9" label="compression level" help="0 (uncompressed) to 9 (best)"/>-->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
49 </inputs>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
50 <outputs>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
51 <data name="output1" format="bam">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
52 <change_format>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
53 <when input="prim_key_cond.prim_key_select" value="" format="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
54 <when input="prim_key_cond.prim_key_select" value="-n" format="qname_sorted.bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
55 <when input="prim_key_cond.prim_key_select" value="-t" format="unsorted.bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
56 </change_format>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
57 </data>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
58 </outputs>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
59 <tests>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
60 <!-- tests from https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2464 -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
61 <!-- 1) # Pos sort -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
62 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
63 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
64 <output name="output1" file="pos.sort.expected.bam" ftype="bam" lines_diff="4" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
65 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
66 <!-- test_cmd($opts, out=>"sort/pos.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} $$opts{path}/dat/test_input_1_a.bam -O SAM -o -"); -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
67 <!-- 2) # Name sort -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
68 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
69 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
70 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
71 <param name="prim_key_select" value="-n"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
72 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
73 <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
74 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
75 <!-- test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
76 <!-- 3) # Tag sort (RG) (considers output and name sorted) -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
77 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
78 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
79 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
80 <param name="prim_key_select" value="-t"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
81 <param name="tag" value="RG"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
82 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
83 <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
84 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
85 <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
86 <!-- 4) # Tag sort (RG); secondary by name -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
87 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
88 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
89 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
90 <param name="prim_key_select" value="-t"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
91 <param name="tag" value="RG"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
92 <param name="sec_key_select" value="-n"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
93 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
94 <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
95 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
96 <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
97 <!-- 5) # Tag sort (AS) -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
98 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
99 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
100 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
101 <param name="prim_key_select" value="-t"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
102 <param name="tag" value="AS"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
103 <param name="sec_key_select" value=""/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
104 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
105 <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
106 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
107 <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
108 <!-- 6) # Tag sort (FI) -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
109 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
110 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
111 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
112 <param name="prim_key_select" value="-t"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
113 <param name="tag" value="FI"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
114 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
115 <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
116 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
117 <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
118 <!-- 7) tests from old version -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
119 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
120 <param name="input1" value="1.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
121 <output name="output1" file="1_sort.bam" ftype="bam" sort="True" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
122 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
123 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
124 <param name="input1" value="1.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
125 <conditional name="prim_key_cond">
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
126 <param name="prim_key_select" value="-n"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
127 </conditional>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
128 <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam" lines_diff="4"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
129 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
130 <!-- 8) test minhash sorting -->
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
131 <test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
132 <param name="input1" value="test_input_1_a.bam" ftype="bam" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
133 <param name="minhash" value="true" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
134 <output name="output1" file="minhash.expected.bam" ftype="bam" lines_diff="4" />
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
135 </test>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
136 </tests>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
137 <help>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
138 **What it does**
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
139
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
140 Sort alignments by leftmost coordinates, or by read name when -n is used.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
141 An appropriate @HD-SO sort order header tag will be added or an existing
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
142 one updated if necessary.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
143
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
144 **Ordering Rules**
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
145
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
146 The following rules are used for ordering records.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
147
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
148 If option -t is in use, records are first sorted by the value of the given
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
149 alignment tag, and then by position or name (if using -n). For example, “-t RG”
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
150 will make read group the primary sort key. The rules for ordering by tag are:
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
151
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
152 - Records that do not have the tag are sorted before ones that do.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
153 - If the types of the tags are different, they will be sorted so that single
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
154 character tags (type A) come before array tags (type B), then string tags
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
155 (types H and Z), then numeric tags (types f and i).
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
156 - Numeric tags (types f and i) are compared by value. Note that comparisons of
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
157 floating-point values are subject to issues of rounding and precision.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
158 - String tags (types H and Z) are compared based on the binary contents of the
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
159 tag using the C strcmp(3) function.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
160 - Character tags (type A) are compared by binary character value.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
161 - No attempt is made to compare tags of other types — notably type B array values will not be compared.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
162
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
163 When the -n option is present, records are sorted by name. Names are compared so as to give a “natural” ordering — i.e. sections consisting of digits are compared numerically while all other sections are compared based on their binary representation. This means “a1” will come before “b1” and “a9” will come before “a10”. Records with the same name will be ordered according to the values of the READ1 and READ2 flags (see flags).
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
164
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
165 When the -n option is not present, reads are sorted by reference (according to the order of the @SQ header records), then by position in the reference, and then by the REVERSE flag.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
166
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
167 This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
168
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
169 When the -M (minash collation) option is present, then samtools sort groups unmapped reads with similar sequence together. This can sometimes significantly reduce the file size.
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
170
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
171 </help>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
172 <expand macro="citations"/>
a9b4a031a403 Uploaded
jackcurragh
parents:
diff changeset
173 </tool>