comparison samtools_markdup.xml @ 3:d0a568e1904b draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_markdup commit 0f75269223c0821c6c82acf98fde947d0f816f2b"
author iuc
date Tue, 28 Sep 2021 16:14:29 +0000
parents a312a0fdaf31
children 8c440c3002bc
comparison
equal deleted inserted replaced
2:a312a0fdaf31 3:d0a568e1904b
1 <tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@+galaxy3"> 1 <tool id="samtools_markdup" name="Samtools markdup" version="@TOOL_VERSION@" profile="@PROFILE@" >
2 <description>marks duplicate alignments</description> 2 <description>marks duplicate alignments</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <expand macro="stdio"/> 7 <expand macro="stdio"/>
8 <expand macro="version_command"/> 8 <expand macro="version_command"/>
9 <command><![CDATA[ 9 <command><![CDATA[
10 @ADDTHREADS@ 10 @ADDTHREADS@
11 ## coordinate sort input 11 ## coordinate sort input
12 #if not $bamfile.is_of_type('bam'): 12 #if not $bamfile.is_of_type('bam'):
13 samtools sort 13 samtools sort
14 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}" 14 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
15 -O sam 15 -O sam
16 -o coordsort.sam 16 -o coordsort.sam
17 '$bamfile' && 17 '$bamfile' &&
18 #else: 18 #else:
19 ln -s '$bamfile' coordsort.sam && 19 ln -s '$bamfile' coordsort.sam &&
20 #end if 20 #end if
21 21
22 samtools markdup 22 samtools markdup
23 23
24 -@ \$addthreads 24 -@ \$addthreads
25 #if str($maxlen) != '': 25 #if str($maxlen) != '':
26 -l $maxlen 26 -l $maxlen
27 #end if 27 #end if
28 $remove 28 $remove
29 $stats
30 $supp 29 $supp
31 coordsort.sam 30 #if $odist:
31 -d $odist
32 #end if
33 $existing_tags
34 -m $mode
35 $include_fails
36 #if $output_options.stats == 'yes'
37 -s
38 -f '$stats_output'
39 #end if
40 -O $output_options.output_format.select_oformat
41 #if $output_options.output_format.select_oformat == "CRAM"
42 --reference '$output_options.output_format.ref_file'
43 #end if
44 coordsort.sam
32 '$output' 45 '$output'
33 #if $stats
34 2> >(tee -a '$stats_output' >&2)
35 #end if
36 ]]></command> 46 ]]></command>
37 <inputs> 47 <inputs>
38 <param name="bamfile" type="data" format="sam,bam,cram" optional="false" label="Alignment" /> 48 <param name="bamfile" type="data" format="sam,bam,cram" optional="false" label="Alignment" />
39 <param name="remove" type="boolean" argument="-r" truevalue="-r" falsevalue="" label="Remove duplicate reads" /> 49 <param name="remove" type="boolean" argument="-r" truevalue="-r" falsevalue="" label="Remove duplicate reads" />
40 <param name="maxlen" type="integer" optional="true" argument="-l" min="0" label="Expected maximum read length of INT bases. (default 300)"/>
41 <param name="stats" type="boolean" argument="-s" truevalue="-s" falsevalue="" label="Print basic statistics" />
42 <param name="supp" type="boolean" argument="-S" truevalue="-S" falsevalue="" label="Mark supplementary reads of duplicates as duplicates" /> 50 <param name="supp" type="boolean" argument="-S" truevalue="-S" falsevalue="" label="Mark supplementary reads of duplicates as duplicates" />
51 <param name="existing_tags" type="boolean" argument="-c" truevalue="-c" falsevalue="" label="Clear previous duplicate settings and tags." />
52 <param name="maxlen" type="integer" optional="true" argument="-l" min="0" label="Expected maximum read length of INT bases. (default 300, min=0)"/>
53 <param name="odist" type="integer" optional="true" argument="-d" min="1" label="Optical distance (if set, marks with dt tag, min=1)"/>
54 <param argument="--mode" type="select" label="Duplicate decision method for paired reads.">
55 <option selected="true" value="t">(t) measure positions based on template start/end.</option>
56 <option value="s">(s) measure positions based on sequence start. </option>
57 </param>
58 <param argument="--include-fails" type="boolean" truevalue="--include-fails" falsevalue="" label="Include quality check failed reads." />
59 <section name="output_options" title="Output Options" expanded="true">
60 <param name="stats" type="select" argument="-s" label="Print basic statistics">
61 <option value="yes">Yes</option>
62 <option value="no" selected="True">No</option>
63 </param>
64 <conditional name="output_format">
65 <param name="select_oformat" type="select" label="Output format" help="Specify output format">
66 <option value="SAM">SAM</option>
67 <option value="BAM" selected="True">BAM</option>
68 <option value="CRAM">CRAM</option>
69 </param>
70 <when value="SAM" />
71 <when value="BAM" />
72 <when value="CRAM">
73 <param name="ref_file" type="data" format="fasta" label="Reference FASTA file" />
74 </when>
75 </conditional>
76 </section>
43 </inputs> 77 </inputs>
44 <outputs> 78 <outputs>
45 <!-- output bam, if input was name sorted then restore this sorting order --> 79 <!-- output bam, if input was name sorted then restore this sorting order -->
46 <data name="output" format="bam"/> 80 <data name="output" format="bam" from_work_dir="outfile" label="${tool.name} on ${on_string}">
81 <change_format>
82 <when input="output_format.select_oformat" value="SAM" format="sam" />
83 <when input="output_format.select_oformat" value="BAM" format="bam" />
84 <when input="output_format.select_oformat" value="CRAM" format="cram" />
85 </change_format>
86 </data>
47 <data name="stats_output" format="txt" label="${tool.name} on ${on_string}: statistics"> 87 <data name="stats_output" format="txt" label="${tool.name} on ${on_string}: statistics">
48 <filter>stats</filter> 88 <filter>(output_options['stats'] == 'yes')</filter>
49 </data> 89 </data>
50 </outputs> 90 </outputs>
51 <tests> 91 <tests>
52 <!-- tests and data extracted from 92 <!-- 1) -->
53 https://github.com/samtools/samtools/blob/6d79411685d8f0fbb34e123f52d72b63271f4dcb/test/test.pl#L2616
54 TODO the 1st 4 tests are negative, I do not know how to test for the error code
55 -->
56 <!-- test_cmd($opts, out=>'markdup/1_name_sort.expected.sam', err=>'1_name_sort.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/1_name_sort.sam -", expect_fail=>1);
57 test_cmd($opts, out=>'markdup/2_bad_order.expected.sam', err=>'2_bad_order.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/2_bad_order.sam -", expect_fail=>1);
58 test_cmd($opts, out=>'markdup/3_missing_mc.expected.sam', err=>'3_missing_mc.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/3_missing_mc.sam -", expect_fail=>1);
59 test_cmd($opts, out=>'markdup/4_missing_ms.expected.sam', err=>'4_missing_ms.expected.sam.err', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/4_missing_ms.sam -", expect_fail=>1);-->
60 <!-- test_cmd($opts, out=>'markdup/5_markdup.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam $$opts{path}/markdup/5_markdup.sam -");-->
61 <test expect_num_outputs="1"> 93 <test expect_num_outputs="1">
62 <param name="bamfile" value="5_markdup.sam" /> 94 <param name="bamfile" value="1_markdup.sam" />
63 <output name="output" file="5_markdup.expected.bam" /> 95 <output name="output" file="1_markdup.expected.bam" ftype="bam" lines_diff="4" />
64 </test> 96 </test>
65 <!-- test_cmd($opts, out=>'markdup/6_remove_dups.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -O sam -r $$opts{path}/markdup/6_remove_dups.sam -");--> 97 <!-- 2) -->
66 <test expect_num_outputs="1"> 98 <test expect_num_outputs="1">
67 <param name="bamfile" value="6_remove_dups.sam" /> 99 <param name="bamfile" value="2_remove_dups.sam" />
68 <param name="remove" value="-r" /> 100 <param name="remove" value="-r" />
69 <output name="output" file="6_remove_dups.expected.bam" /> 101 <output name="output" file="2_remove_dups.expected.bam" ftype="bam" lines_diff="4" />
70 </test> 102 </test>
71 <!-- test_cmd($opts, out=>'markdup/7_mark_supp_dup.expected.sam', cmd=>"$$opts{bin}/samtools markdup${threads} -S -O sam $$opts{path}/markdup/7_mark_supp_dup.sam -");--> 103 <!-- 3) -->
72 <test expect_num_outputs="1"> 104 <test expect_num_outputs="1">
73 <param name="bamfile" value="7_mark_supp_dup.bam" /> 105 <param name="bamfile" value="3_mark_supp_dup.bam" />
74 <param name="supp" value="-S" /> 106 <param name="supp" value="-S" />
75 <output name="output" file="7_mark_supp_dup.expected.bam" /> 107 <output name="output" file="3_mark_supp_dup.expected.bam" ftype="bam" lines_diff="4" />
76 </test> 108 </test>
77 <!-- test stats output --> 109 <!-- 4) test stats output -->
78 <test expect_num_outputs="2"> 110 <test expect_num_outputs="2">
79 <param name="bamfile" value="5_markdup.sam" /> 111 <param name="bamfile" value="1_markdup.sam" />
80 <param name="stats" value="-s" /> 112 <param name="stats" value="yes" />
81 <output name="output" file="5_markdup.expected.bam" /> 113 <output name="output" file="1_markdup.expected.bam" ftype="bam" lines_diff="4" />
82 <output name="stats_output" file="stats.txt" /> 114 <output name="stats_output" file="stats.txt" lines_diff="2" />
83 </test> 115 </test>
84 <!-- check that stderr is not swallowed w test data from fixmate --> 116 <!-- 5) check that stderr is not swallowed w test data from fixmate -->
85 <test expect_num_outputs="2" expect_exit_code="1" expect_failure="true"> 117 <test expect_num_outputs="2" expect_exit_code="1" expect_failure="true">
86 <param name="bamfile" value="7_two_read_mapped.sam" /> 118 <param name="bamfile" value="3_two_read_mapped.sam" />
87 <param name="stats" value="true"/> 119 <param name="stats" value="yes"/>
88 <!-- for some reason this is not possible at the moment 120 <!-- for some reason this is not possible at the moment
89 <output name="stats_output"> 121 <output name="stats_output">
90 <assert_contents> 122 <assert_contents>
91 <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/> 123 <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
92 </assert_contents> 124 </assert_contents>
93 </output> --> 125 </output> -->
94 <assert_stderr> 126 <assert_stderr>
95 <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/> 127 <has_line line="[markdup] error: no MC tag. Please run samtools fixmate on file first."/>
96 </assert_stderr> 128 </assert_stderr>
97 </test> 129 </test>
130 <!-- 6) check optical distance and check -c option -->
131 <test expect_num_outputs="1">
132 <param name="bamfile" value="1_markdup.sam" />
133 <param name="odist" value="10" />
134 <param name="existing_tags" value="-c" />
135 <output name="output" file="6_markdup.expected.bam" ftype="bam" lines_diff="4" />
136 </test>
137 <!-- 7) check new mode s -->
138 <test expect_num_outputs="1">
139 <param name="bamfile" value="1_markdup.sam" />
140 <param name="mode_selector" value="s" />
141 <output name="output" file="7_markdup.expected.bam" ftype="bam" lines_diff="4" />
142 </test>
143 <!-- 8) check include-fails -->
144 <test expect_num_outputs="1">
145 <param name="bamfile" value="1_markdup.sam" />
146 <param name="fails" value="- -include-fails" />
147 <output name="output" file="8_markdup.expected.bam" ftype="bam" lines_diff="4" />
148 </test>
149 <!-- 9) test sam format -->
150 <test expect_num_outputs="1">
151 <param name="bamfile" value="1_markdup.sam" />
152 <param name="select_oformat" value="SAM" />
153 <output name="output" file="9_markdup.expected.sam" lines_diff="4" />
154 </test>
155 <!-- 10) test cram format -->
156 <test expect_num_outputs="1">
157 <param name="bamfile" value="10_markdup.sam" />
158 <param name="select_oformat" value="CRAM" />
159 <param name="ref_file" value="test.fa" />
160 <output name="output" file="10_markdup.expected.cram" compare="sim_size" delta="250"/>
161 </test>
98 </tests> 162 </tests>
99 <help> 163 <help>
100 Mark duplicate alignments from a coordinate sorted file that has been run through fixmate with the -m option. This program relies on the MC and ms tags that fixmate provides. 164 Mark duplicate alignments from a coordinate sorted file that has been run through fixmate with the -m option. This program relies on the MC and ms tags that fixmate provides.
101 165
102 Note: The Galaxy tool sorts the data automatically if the input is SAM or query name sorted. 166 Note: The Galaxy tool sorts the data automatically if the input is SAM or query name sorted.
103 The output is BAM (which is query name sorted again if the input is). 167 The output is BAM (which is query name sorted again if the input is).
104 168
105 The optional basic statistics output of samtools markdup can be visualized with MultiQC. 169 The optional basic statistics output of samtools markdup can be visualized with MultiQC.
106 170
107 </help> 171 </help>