Mercurial > repos > iuc > samtools_view
comparison samtools_view.xml @ 16:2dce91e11ca7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit e3de8bc1123bf4ce56818f2b7ad4b53080cb3bd8
author | iuc |
---|---|
date | Fri, 30 Aug 2024 10:24:46 +0000 |
parents | 6be888be75f9 |
children | 32dc5f781059 |
comparison
equal
deleted
inserted
replaced
15:6be888be75f9 | 16:2dce91e11ca7 |
---|---|
1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | 1 <tool id="samtools_view" name="Samtools view" version="@TOOL_VERSION@+galaxy3" profile="@PROFILE@"> |
2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description> | 2 <description>- reformat, filter, or subsample SAM, BAM or CRAM</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 <token name="@REF_DATA@"> | 5 <token name="@REF_DATA@"> |
6 ## additional reference data | 6 ## additional reference data |
134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag | 134 #set $std_filters = $std_filters + " --tag '%s'" % $mode.filter_config.tag |
135 #end if | 135 #end if |
136 #if $mode.filter_config.qname_file: | 136 #if $mode.filter_config.qname_file: |
137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file | 137 #set std_filters = $std_filters + " --qname-file '%s'" % $mode.filter_config.qname_file |
138 #end if | 138 #end if |
139 #if str($cond_expr.select_expr) == "yes": | |
140 #set std_filters = $std_filters + " -e '%s'" % $cond_expr.expression | |
141 #end if | |
139 #end if | 142 #end if |
140 | 143 |
141 #if $with_subsampling: | 144 #if $with_subsampling: |
142 ## handle seed and fraction calculation for subsampling | 145 ## handle seed and fraction calculation for subsampling |
143 #import random | 146 #import random |
168 -@ \$addthreads | 171 -@ \$addthreads |
169 $fmtopt | 172 $fmtopt |
170 | 173 |
171 ## filter options (except regions filter, which is the last parameter) | 174 ## filter options (except regions filter, which is the last parameter) |
172 $std_filters | 175 $std_filters |
173 | |
174 #if $with_subsampling: | 176 #if $with_subsampling: |
175 --subsample-seed $seed | 177 --subsample-seed $seed |
176 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target": | 178 #if str($mode.subsample_config.subsampling_mode.select_subsample) == "target": |
177 ##this is calculated at execution time before the main samtools command | 179 ##this is calculated at execution time before the main samtools command |
178 --subsample \${sample_fragment} | 180 --subsample \${sample_fragment} |
296 <when value="text"> | 298 <when value="text"> |
297 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." /> | 299 <param name="readgr" type="text" argument="-r" label="Filter by read group" help="Only output alignments in read group." /> |
298 </when> | 300 </when> |
299 <when value="file"> | 301 <when value="file"> |
300 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." /> | 302 <param name="rgfile" type="data" format="tabular" argument="-R" label="Filter by read groups in file" help="Output alignments in read groups listed in FILE." /> |
303 </when> | |
304 </conditional> | |
305 <conditional name="cond_expr"> | |
306 <param name="select_expr" type="select" label="Filter by expression"> | |
307 <option value="no" selected="True">No</option> | |
308 <option value="yes">Filter using an expression (see manual)</option> | |
309 </param> | |
310 <when value="no"/> | |
311 <when value="yes"> | |
312 <param name="expression" type="text" argument="-e" label="Filter by expression - for example sclen>0 will filter all soft clipped reads" help="See Samtools manual for Filter expression syntax"> | |
313 <sanitizer invalid_char=""> | |
314 <valid initial="string.printable"> | |
315 <remove value=" "/> | |
316 <remove value="'"/> | |
317 <remove value='"'/> | |
318 </valid> | |
319 </sanitizer> | |
320 </param> | |
301 </when> | 321 </when> |
302 </conditional> | 322 </conditional> |
303 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." /> | 323 <param name="quality" type="integer" argument="-q" optional="true" min="0" label="Filter by quality" help="Skip alignments with MAPQ smaller than INT." /> |
304 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" /> | 324 <param name="library" type="text" argument="-l" optional="true" label="Filter by library" help="Only output alignments in library STR" /> |
305 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." /> | 325 <param name="cigarcons" type="integer" argument="-m" optional="true" min="0" label="Filter by number of CIGAR bases consuming query sequence" help="Only output alignments with number of CIGAR bases consuming query sequence greater than or equal INT." /> |
574 </conditional> | 594 </conditional> |
575 <conditional name="addref_cond"> | 595 <conditional name="addref_cond"> |
576 <param name="addref_select" value="history" /> | 596 <param name="addref_select" value="history" /> |
577 <param name="ref" value="test.fa" /> | 597 <param name="ref" value="test.fa" /> |
578 </conditional> | 598 </conditional> |
579 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="250" /> | 599 <output name="outputsam" file="test_15.cram" ftype="cram" compare="sim_size" delta="500" /> |
580 </test> | 600 </test> |
581 <!-- 16) --> | 601 <!-- 16) --> |
582 <test expect_num_outputs="1"> | 602 <test expect_num_outputs="1"> |
583 <param name="input" value="in_test_14.bam" ftype="bam" /> | 603 <param name="input" value="in_test_14.bam" ftype="bam" /> |
584 <conditional name="mode"> | 604 <conditional name="mode"> |
906 <assert_command> | 926 <assert_command> |
907 <has_text text="--qname-file"/> | 927 <has_text text="--qname-file"/> |
908 </assert_command> | 928 </assert_command> |
909 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" /> | 929 <output name="outputsam" file="test_31.bam" ftype="bam" lines_diff="2" /> |
910 </test> | 930 </test> |
931 <!-- 32) testing expression filters --> | |
932 <test expect_num_outputs="1"> | |
933 <param name="input" value="in_test_30.bam" ftype="bam"/> | |
934 <conditional name="mode"> | |
935 <param name="outtype" value="selected_reads" /> | |
936 <section name="filter_config"> | |
937 <conditional name="cond_expr"> | |
938 <param name="select_expr" value="yes"/> | |
939 <param name="expression" value="sclen>0"/> | |
940 </conditional> | |
941 </section> | |
942 <conditional name="output_options"> | |
943 <conditional name="output_format"> | |
944 <param name="oformat" value="bam" /> | |
945 </conditional> | |
946 </conditional> | |
947 </conditional> | |
948 <assert_command> | |
949 <has_text text="-e 'sclen>0'"/> | |
950 </assert_command> | |
951 <output name="outputsam" file="test_32.bam" ftype="bam" lines_diff="2" /> | |
952 </test> | |
953 <!-- 33) testing expression filters --> | |
954 <test expect_num_outputs="1"> | |
955 <param name="input" value="in_test_30.bam" ftype="bam"/> | |
956 <conditional name="mode"> | |
957 <param name="outtype" value="selected_reads" /> | |
958 <section name="filter_config"> | |
959 <conditional name="cond_expr"> | |
960 <param name="select_expr" value="yes"/> | |
961 <param name="expression" value='rname!="chr13"'/> | |
962 </conditional> | |
963 </section> | |
964 <conditional name="output_options"> | |
965 <conditional name="output_format"> | |
966 <param name="oformat" value="bam" /> | |
967 </conditional> | |
968 </conditional> | |
969 </conditional> | |
970 <assert_command> | |
971 <has_text text="-e 'rname!="/> | |
972 </assert_command> | |
973 <output name="outputsam" file="test_33.bam" ftype="bam" lines_diff="2" /> | |
974 </test> | |
911 </tests> | 975 </tests> |
912 <help> | 976 <help> |
913 **What it does** | 977 **What it does** |
914 | 978 |
915 Samtools view can: | 979 Samtools view can: |
989 | 1053 |
990 **Filtering by quality** | 1054 **Filtering by quality** |
991 | 1055 |
992 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition. | 1056 This filters based on the MAPQ column of the SAM format which gives an estimate about the correct placement of the alignment. Note that aligners do not follow a consistent definition. |
993 | 1057 |
994 ## Filtering by Tag ** | 1058 **Filtering by Tag** |
995 | 1059 |
996 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie. | 1060 This filter allows to select reads based on tool or user specific tags, e.g., XS:i:-18 the alignment score tag of bowtie. |
997 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18. | 1061 Thus to filter for a specific value of the tag you need the format STR1:STR2, e.g., XS:-18 to filter reads with an aligment score of -18. |
998 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS. | 1062 You can also just write STR1 without the value STR2 hence the filter selects all reads with the tag STR1, e.g., XS. |
999 | 1063 |
1064 **Filtering by Expression** | |
1065 | |
1066 | |
1067 Filter expressions are used as an on-the-fly checking of incoming SAM, BAM or CRAM records, discarding records that do not match the specified expression. | |
1068 | |
1069 The language used is primarily C style, but with a few differences in the precedence rules for bit operators and the inclusion of regular expression | |
1070 matching. | |
1071 | |
1072 The operator precedence, from strongest binding to weakest, is | |
1073 | |
1074 :: | |
1075 | |
1076 Grouping (, ) E.g. "(1+2)*3" | |
1077 Values: literals, vars Numbers, strings and variables | |
1078 Unary ops: +, -, !, ~ E.g. -10 +10, !10 (not), ~5 (bit not) | |
1079 Math ops: \*, /, % Multiply, division and (integer) modulo | |
1080 Math ops: +, - Addition / subtraction | |
1081 Bit-wise: & Integer AND | |
1082 Bit-wise ^ Integer XOR | |
1083 Bit-wise | Integer OR | |
1084 Conditionals: >, >=, <, <= | |
1085 Equality: \=\=, !=, =~, !~ =~ and !~ match regular expressions | |
1086 Boolean: &&, || Logical AND / OR | |
1087 | |
1088 | |
1089 Expressions are computed using floating point mathematics, so "10 / 4" evaluates to 2.5 rather than 2. They may be written as integers in decimal or | |
1090 "0x" plus hexadecimal, and floating point with or without exponents.However operations that require integers first do an implicit type conversion, so | |
1091 "7.9 % 5" is 2 and "7.9 & 4.1" is equivalent to "7 & 4", which is 4. Strings are always specified using double quotes. To get a double quote in a | |
1092 string, use backslash. Similarly a double backslash is used to get a literal backslash. For example ab\"c\\d is the string ab"c\d. | |
1093 | |
1094 Comparison operators are evaluated as a match being 1 and a mismatch being 0, thus "(2 > 1) + (3 < 5)" evaluates as 2. All comparisons involving undefined (null) values are deemed to be false. | |
1095 | |
1096 The variables are where the file format specifics are accessed from the expression. The variables correspond to SAM fields, for example to find paired | |
1097 alignments with high mapping quality and a very large insert size, we may use the expression "mapq >= 30 && (tlen >= 100000 || tlen <= -100000)". Valid | |
1098 variable names and their data types are: | |
1099 | |
1100 :: | |
1101 | |
1102 endpos int Alignment end position (1-based) | |
1103 flag int Combined FLAG field | |
1104 flag.paired int Single bit, 0 or 1 | |
1105 flag.proper_pair int Single bit, 0 or 2 | |
1106 flag.unmap int Single bit, 0 or 4 | |
1107 flag.munmap int Single bit, 0 or 8 | |
1108 flag.reverse int Single bit, 0 or 16 | |
1109 flag.mreverse int Single bit, 0 or 32 | |
1110 flag.read1 int Single bit, 0 or 64 | |
1111 flag.read2 int Single bit, 0 or 128 | |
1112 flag.secondary int Single bit, 0 or 256 | |
1113 flag.qcfail int Single bit, 0 or 512 | |
1114 flag.dup int Single bit, 0 or 1024 | |
1115 flag.supplementary int Single bit, 0 or 2048 | |
1116 hclen int Number of hard-clipped bases | |
1117 library string Library (LB header via RG) | |
1118 mapq int Mapping quality | |
1119 mpos int Synonym for pnext | |
1120 mrefid int Mate reference number (0 based) | |
1121 mrname string Synonym for rnext | |
1122 ncigar int Number of cigar operations | |
1123 pnext int Mate's alignment position (1-based) | |
1124 pos int Alignment position (1-based) | |
1125 qlen int Alignment length: no. query bases | |
1126 qname string Query name | |
1127 qual string Quality values (raw, 0 based) | |
1128 refid int Integer reference number (0 based) | |
1129 rlen int Alignment length: no. reference bases | |
1130 rname string Reference name | |
1131 rnext string Mate's reference name | |
1132 sclen int Number of soft-clipped bases | |
1133 seq string Sequence | |
1134 tlen int Template length (insert size) | |
1135 [XX] int / string XX tag value | |
1136 | |
1137 | |
1138 Flags are returned either as the whole flag value or by checking for a single bit. Hence the filter expression flag.dup is equivalent to flag & 1024. | |
1139 | |
1140 "qlen" and "rlen" are measured using the CIGAR string to count the number of query (sequence) and reference bases consumed. Note "qlen" may not exactly | |
1141 match the length of the "seq" field if the sequence is "*". | |
1142 | |
1143 "sclen" and "hclen" are the number of soft and hard-clipped bases respectively. The formula "qlen-sclen" gives the number of sequence bases used in the | |
1144 alignment, distinguishing between global alignment and local alignment length. | |
1145 | |
1146 "endpos" is the (1-based inclusive) position of the rightmost mapped base of the read, as measured using the CIGAR string, and for mapped reads is | |
1147 equivalent to "pos+rlen-1". For unmapped reads, it is the same as "pos". | |
1148 | |
1149 Reference names may be matched either by their string forms ("rname" and "mrname") or as the Nth @SQ line (counting from zero) as stored in BAM using | |
1150 "tid" and "mtid" respectively. | |
1151 | |
1152 Auxiliary tags are described in square brackets and these expand to either integer or string as defined by the tag itself (XX:Z:string or XX:i:int). | |
1153 For example [NM]>=10 can be used to look for alignments with many mismatches and [RG]=~"grp[ABC]-" will match the read-group string. | |
1154 | |
1155 If no comparison is used with an auxiliary tag it is taken simply to be a test for the existence of that tag. So [NM] will return any record containing | |
1156 an NM tag, even if that tag is zero (NM:i:0). In htslib <= 1.15 negating this with ![NM] gave misleading results as it was true if the tag did not exist | |
1157 or did exist but was zero. Now this is strictly does-not-exist. An explicit exists([NM]) and !exists([NM]) function has also been added to make | |
1158 this intention clear. | |
1159 | |
1160 Similarly in htslib <= 1.15 using [NM]!=0 was true both when the tag existed and was not zero as well as when the tag did not exist. From 1.16 onwards | |
1161 all comparison operators are only true for tags that exist, so [NM]!=0 works as expected. | |
1162 | |
1163 Some simple functions are available to operate on strings. These treat the strings as arrays of bytes, permitting their length, minimum, maximum and | |
1164 average values to be computed. These are useful for processing Quality Scores. | |
1165 | |
1166 :: | |
1167 | |
1168 length(x) Length of the string (excluding nul char) | |
1169 min(x) Minimum byte value in the string | |
1170 max(x) Maximum byte value in the string | |
1171 avg(x) Average byte value in the string | |
1172 | |
1173 | |
1174 Note that "avg" is a floating point value and it may be NAN for empty strings. This means that "avg(qual)" does not produce an error for records that | |
1175 have both seq and qual of "*". NAN values will fail any conditional checks, so e.g. "avg(qual) > 20" works and will not report these records. NAN also | |
1176 fails all equality, < and > comparisons, and returns zero when given as an argument to the exists function. It can be negated with !x in which case it | |
1177 becomes true. | |
1178 | |
1179 Functions that operate on both strings and numerics: | |
1180 | |
1181 :: | |
1182 | |
1183 exists(x) True if the value exists (or is explicitly true). | |
1184 default(x,d) Value x if it exists or d if not. | |
1185 | |
1186 Functions that apply only to numeric values: | |
1187 | |
1188 :: | |
1189 | |
1190 qrt(x) Square root of x | |
1191 og(x) Natural logarithm of x | |
1192 ow(x, y) Power function, x to the power of y | |
1193 xp(x) Base-e exponential, equivalent to pow(e,x) | |
1194 | |
1000 </help> | 1195 </help> |
1001 <expand macro="citations"/> | 1196 <expand macro="citations"/> |
1002 </tool> | 1197 </tool> |