comparison samtools_view.xml @ 16:2dce91e11ca7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit e3de8bc1123bf4ce56818f2b7ad4b53080cb3bd8
author iuc
date Fri, 30 Aug 2024 10:24:46 +0000
parents 6be888be75f9
children 32dc5f781059
comparison
equal deleted inserted replaced
15:6be888be75f9 16:2dce91e11ca7
1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@">
2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description> 2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 <token name="@REF_DATA@"> 5 <token name="@REF_DATA@">
6 ## additional reference data 6 ## additional reference data
134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag 134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag
135 #end if 135 #end if
136 #if $mode.filter_config.qname_file: 136 #if $mode.filter_config.qname_file:
137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file 137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file
138 #end if 138 #end if
139 #if str($cond_expr.select_expr) == "yes":
140 #set std_filters = $std_filters + " -e '%s'" % $cond_expr.expression
141 #end if
139 #end if 142 #end if
140 143
141 #if $with_subsampling: 144 #if $with_subsampling:
142 ## handle seed and fraction calculation for subsampling 145 ## handle seed and fraction calculation for subsampling
143 #import random 146 #import random
168 -@ \$addthreads 171 -@ \$addthreads
169 $fmtopt 172 $fmtopt
170 173
171 ## filter options (except regions filter, which is the last parameter) 174 ## filter options (except regions filter, which is the last parameter)
172 $std_filters 175 $std_filters
173
174 #if $with_subsampling: 176 #if $with_subsampling:
175 --subsample-seed $seed 177 --subsample-seed $seed
176 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target": 178 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target":
177 ##this is calculated at execution time before the main samtools command 179 ##this is calculated at execution time before the main samtools command
178 --subsample \${sample_fragment} 180 --subsample \${sample_fragment}
296 <when value="text"> 298 <when value="text">
297 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." /> 299 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." />
298 </when> 300 </when>
299 <when value="file"> 301 <when value="file">
300 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." /> 302 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." />
303 </when>
304 </conditional>
305 <conditional name="cond_expr">
306 <param name="select_expr" type="select" label="Filter by expression">
307 <option value="no" selected="True">No</option>
308 <option value="yes">Filter using an expression (see manual)</option>
309 </param>
310 <when value="no"/>
311 <when value="yes">
312 <param name="expression" type="text" argument="-e" label="Filter by expression - for example sclen&gt;0 will filter all soft clipped reads" help="See Samtools manual for Filter expression syntax">
313 <sanitizer invalid_char="">
314 <valid initial="string.printable">
315 <remove value=" "/>
316 <remove value="'"/>
317 <remove value='"'/>
318 </valid>
319 </sanitizer>
320 </param>
301 </when> 321 </when>
302 </conditional> 322 </conditional>
303 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." /> 323 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." />
304 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" /> 324 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" />
305 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." /> 325 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." />
574 </conditional> 594 </conditional>
575 <conditional name="addref_cond"> 595 <conditional name="addref_cond">
576 <param name="addref_select" value="history" /> 596 <param name="addref_select" value="history" />
577 <param name="ref" value="test.fa" /> 597 <param name="ref" value="test.fa" />
578 </conditional> 598 </conditional>
579 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> 599 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="500" />
580 </test> 600 </test>
581 <!-- 16) --> 601 <!-- 16) -->
582 <test expect_num_outputs="1"> 602 <test expect_num_outputs="1">
583 <param name="input" value="in_test_14.bam" ftype="bam" /> 603 <param name="input" value="in_test_14.bam" ftype="bam" />
584 <conditional name="mode"> 604 <conditional name="mode">
906 <assert_command> 926 <assert_command>
907 <has_text text="--qname-file"/> 927 <has_text text="--qname-file"/>
908 </assert_command> 928 </assert_command>
909 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" /> 929 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" />
910 </test> 930 </test>
931 <!-- 32) testing expression filters -->
932 <test expect_num_outputs="1">
933 <param name="input" value="in_test_30.bam" ftype="bam"/>
934 <conditional name="mode">
935 <param name="outtype" value="selected_reads" />
936 <section name="filter_config">
937 <conditional name="cond_expr">
938 <param name="select_expr" value="yes"/>
939 <param name="expression" value="sclen>0"/>
940 </conditional>
941 </section>
942 <conditional name="output_options">
943 <conditional name="output_format">
944 <param name="oformat" value="bam" />
945 </conditional>
946 </conditional>
947 </conditional>
948 <assert_command>
949 <has_text text="-e 'sclen>0'"/>
950 </assert_command>
951 <output name="outputsam" file="test_32.bam" ftype="bam" lines_diff="2" />
952 </test>
953 <!-- 33) testing expression filters -->
954 <test expect_num_outputs="1">
955 <param name="input" value="in_test_30.bam" ftype="bam"/>
956 <conditional name="mode">
957 <param name="outtype" value="selected_reads" />
958 <section name="filter_config">
959 <conditional name="cond_expr">
960 <param name="select_expr" value="yes"/>
961 <param name="expression" value='rname!="chr13"'/>
962 </conditional>
963 </section>
964 <conditional name="output_options">
965 <conditional name="output_format">
966 <param name="oformat" value="bam" />
967 </conditional>
968 </conditional>
969 </conditional>
970 <assert_command>
971 <has_text text="-e 'rname!="/>
972 </assert_command>
973 <output name="outputsam" file="test_33.bam" ftype="bam" lines_diff="2" />
974 </test>
911 </tests> 975 </tests>
912 <help> 976 <help>
913 **What it does** 977 **What it does**
914 978
915 Samtools view can: 979 Samtools view can:
989 1053
990 **Filtering by quality** 1054 **Filtering by quality**
991 1055
992 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition. 1056 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition.
993 1057
994 ## Filtering by Tag ** 1058 **Filtering by Tag**
995 1059
996 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie. 1060 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie.
997 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18. 1061 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18.
998 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS. 1062 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS.
999 1063
1064 **Filtering by Expression**
1065
1066
1067 Filter expressions are used as an on-the-fly checking of incoming SAM, BAM or CRAM records, discarding records that do not match the specified expression.
1068
1069 The language used is primarily C style, but with a few differences in the precedence rules for bit operators and the inclusion of regular expression
1070 matching.
1071
1072 The operator precedence, from strongest binding to weakest, is
1073
1074 ::
1075
1076 Grouping (, ) E.g. &quot;(1+2)&#42;3&quot;
1077 Values: literals, vars Numbers, strings and variables
1078 Unary ops: +, -, !, ~ E.g. -10 +10, !10 (not), ~5 (bit not)
1079 Math ops: \*, /, % Multiply, division and (integer) modulo
1080 Math ops: +, - Addition / subtraction
1081 Bit-wise: &amp; Integer AND
1082 Bit-wise ^ Integer XOR
1083 Bit-wise | Integer OR
1084 Conditionals: &gt;, &gt;=, &lt;, &lt;=
1085 Equality: \=\=, !=, =~, !~ =~ and !~ match regular expressions
1086 Boolean: &amp;&amp;, || Logical AND / OR
1087
1088
1089 Expressions are computed using floating point mathematics, so &quot;10 / 4&quot; evaluates to 2.5 rather than 2. They may be written as integers in decimal or
1090 &quot;0x&quot; plus hexadecimal, and floating point with or without exponents.However operations that require integers first do an implicit type conversion, so
1091 &quot;7.9 % 5&quot; is 2 and &quot;7.9 &amp; 4.1&quot; is equivalent to &quot;7 &amp; 4&quot;, which is 4. Strings are always specified using double quotes. To get a double quote in a
1092 string, use backslash. Similarly a double backslash is used to get a literal backslash. For example ab\&quot;c\\d is the string ab&quot;c\d.
1093
1094 Comparison operators are evaluated as a match being 1 and a mismatch being 0, thus &quot;(2 &gt; 1) + (3 &lt; 5)&quot; evaluates as 2. All comparisons involving undefined (null) values are deemed to be false.
1095
1096 The variables are where the file format specifics are accessed from the expression. The variables correspond to SAM fields, for example to find paired
1097 alignments with high mapping quality and a very large insert size, we may use the expression &quot;mapq &gt;= 30 &amp;&amp; (tlen &gt;= 100000 || tlen &lt;= -100000)&quot;. Valid
1098 variable names and their data types are:
1099
1100 ::
1101
1102 endpos int Alignment end position (1-based)
1103 flag int Combined FLAG field
1104 flag.paired int Single bit, 0 or 1
1105 flag.proper_pair int Single bit, 0 or 2
1106 flag.unmap int Single bit, 0 or 4
1107 flag.munmap int Single bit, 0 or 8
1108 flag.reverse int Single bit, 0 or 16
1109 flag.mreverse int Single bit, 0 or 32
1110 flag.read1 int Single bit, 0 or 64
1111 flag.read2 int Single bit, 0 or 128
1112 flag.secondary int Single bit, 0 or 256
1113 flag.qcfail int Single bit, 0 or 512
1114 flag.dup int Single bit, 0 or 1024
1115 flag.supplementary int Single bit, 0 or 2048
1116 hclen int Number of hard-clipped bases
1117 library string Library (LB header via RG)
1118 mapq int Mapping quality
1119 mpos int Synonym for pnext
1120 mrefid int Mate reference number (0 based)
1121 mrname string Synonym for rnext
1122 ncigar int Number of cigar operations
1123 pnext int Mate's alignment position (1-based)
1124 pos int Alignment position (1-based)
1125 qlen int Alignment length: no. query bases
1126 qname string Query name
1127 qual string Quality values (raw, 0 based)
1128 refid int Integer reference number (0 based)
1129 rlen int Alignment length: no. reference bases
1130 rname string Reference name
1131 rnext string Mate's reference name
1132 sclen int Number of soft-clipped bases
1133 seq string Sequence
1134 tlen int Template length (insert size)
1135 [XX] int / string XX tag value
1136
1137
1138 Flags are returned either as the whole flag value or by checking for a single bit. Hence the filter expression flag.dup is equivalent to flag &amp; 1024.
1139
1140 &quot;qlen&quot; and &quot;rlen&quot; are measured using the CIGAR string to count the number of query (sequence) and reference bases consumed. Note &quot;qlen&quot; may not exactly
1141 match the length of the &quot;seq&quot; field if the sequence is &quot;&#42;&quot;.
1142
1143 &quot;sclen&quot; and &quot;hclen&quot; are the number of soft and hard-clipped bases respectively. The formula &quot;qlen-sclen&quot; gives the number of sequence bases used in the
1144 alignment, distinguishing between global alignment and local alignment length.
1145
1146 &quot;endpos&quot; is the (1-based inclusive) position of the rightmost mapped base of the read, as measured using the CIGAR string, and for mapped reads is
1147 equivalent to &quot;pos+rlen-1&quot;. For unmapped reads, it is the same as &quot;pos&quot;.
1148
1149 Reference names may be matched either by their string forms (&quot;rname&quot; and &quot;mrname&quot;) or as the Nth @SQ line (counting from zero) as stored in BAM using
1150 &quot;tid&quot; and &quot;mtid&quot; respectively.
1151
1152 Auxiliary tags are described in square brackets and these expand to either integer or string as defined by the tag itself (XX:Z:string or XX:i:int).
1153 For example [NM]&gt;=10 can be used to look for alignments with many mismatches and [RG]=~&quot;grp[ABC]-&quot; will match the read-group string.
1154
1155 If no comparison is used with an auxiliary tag it is taken simply to be a test for the existence of that tag. So [NM] will return any record containing
1156 an NM tag, even if that tag is zero (NM:i:0). In htslib &lt;= 1.15 negating this with ![NM] gave misleading results as it was true if the tag did not exist
1157 or did exist but was zero. Now this is strictly does-not-exist. An explicit exists([NM]) and !exists([NM]) function has also been added to make
1158 this intention clear.
1159
1160 Similarly in htslib &lt;= 1.15 using [NM]!=0 was true both when the tag existed and was not zero as well as when the tag did not exist. From 1.16 onwards
1161 all comparison operators are only true for tags that exist, so [NM]!=0 works as expected.
1162
1163 Some simple functions are available to operate on strings. These treat the strings as arrays of bytes, permitting their length, minimum, maximum and
1164 average values to be computed. These are useful for processing Quality Scores.
1165
1166 ::
1167
1168 length(x) Length of the string (excluding nul char)
1169 min(x) Minimum byte value in the string
1170 max(x) Maximum byte value in the string
1171 avg(x) Average byte value in the string
1172
1173
1174 Note that &quot;avg&quot; is a floating point value and it may be NAN for empty strings. This means that &quot;avg(qual)&quot; does not produce an error for records that
1175 have both seq and qual of &quot;&#42;&quot;. NAN values will fail any conditional checks, so e.g. &quot;avg(qual) &gt; 20&quot; works and will not report these records. NAN also
1176 fails all equality, &lt; and &gt; comparisons, and returns zero when given as an argument to the exists function. It can be negated with !x in which case it
1177 becomes true.
1178
1179 Functions that operate on both strings and numerics:
1180
1181 ::
1182
1183 exists(x) True if the value exists (or is explicitly true).
1184 default(x,d) Value x if it exists or d if not.
1185
1186 Functions that apply only to numeric values:
1187
1188 ::
1189
1190 qrt(x) Square root of x
1191 og(x) Natural logarithm of x
1192 ow(x, y) Power function, x to the power of y
1193 xp(x) Base-e exponential, equivalent to pow(e,x)
1194
1000 </help> 1195 </help>
1001 <expand macro="citations"/> 1196 <expand macro="citations"/>
1002 </tool> 1197 </tool>