comparison Tasmanian.xml @ 0:bc0b40dec7d2 draft

"planemo upload for repository https://github.com/nebiolabs/tasmanian-mismatch commit a64a371ca1ed117c9bd8af743d847128fbce461c"
author iuc
date Wed, 20 May 2020 17:23:42 -0400
parents
children b15fbf90db53
comparison
equal deleted inserted replaced
-1:000000000000 0:bc0b40dec7d2
1 <tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="0.1.3" profile="18.09">
2 <requirements>
3 <requirement type="package" version="0.1.3">tasmanian-mismatch</requirement>
4 <requirement type="package" version="1.9">samtools</requirement>
5 </requirements>
6 <command detect_errors="exit_code">
7 <![CDATA[
8
9 #set $reference_fasta_filename = "localref.fa"
10
11 #if str( $reference_source.reference_source_selector ) == "history":
12 ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
13 #else:
14 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
15 #end if
16
17 samtools view '${bam_input}' |
18
19 #if $bed_filename
20 run_intersections -b '$bed_filename' |
21 #end if
22
23 run_tasmanian
24 -q '${basequality}'
25 -s '${softclips}'
26 -m '${mapquality}'
27 -c '${confidence}'
28 -r '${reference_fasta_filename}' > '${output_table}'
29
30 ]]></command>
31 <inputs>
32 <!-- Bam alignment file -->
33 <param type="data" name="bam_input" label="Bam/Sam alignemnt file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED"/>
34 <!-- reference genome upload -->
35 <conditional name="reference_source">
36 <param name="reference_source_selector" type="select" label="Reference genome" help="You can select a reference genome from your history or use a built-in index (Local cache)">
37 <option value="cached">Local cache</option>
38 <option value="history">History</option>
39 </param>
40 <when value="cached">
41 <param name="ref_file" type="select" label="Select the reference genome from the list">
42 <options from_data_table="all_fasta">
43 <filter type="sort_by" column="2" />
44 <validator type="no_options" message="No indexes are available" />
45 </options>
46 </param>
47 </when>
48 <when value="history">
49 <param name="ref_file" type="data" format="fasta" label="Use reference genome from history" help="You can first upload a FASTA sequence to the history" />
50 </when>
51 </conditional>
52
53 <!-- bed file -->
54 <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: &quot;chrN&quot;, &quot;start&quot; and &quot;stop&quot;, and is tab separated."/>
55
56 <!-- Additional parameters -->
57 <param name="confidence" label="Boundary" type="integer" value="20" min="0" max="100"
58 help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20"/>
59 <param name="softclips" label="Choose an action with softclips" type="select" display="radio"
60 help="How sofclips whould be treated. Values include 0,1 or 2 (read the help below). Default=0">
61 <option value="1">Never use softcliped bases</option>
62 <option value="2">Always use softcliped bases</option>
63 <option value="0" selected="True">Automatic desicion (Default)</option>
64 </param>
65 <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20"/>
66 <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20"/>
67 <param name="keepHTML_conditional" type="select" label="keep HTML output file?">
68 <option value="yes">Yes</option>
69 <option value="no">No</option>
70 </param>
71 </inputs>
72
73 <outputs>
74 <data name="output_table" format="txt" />
75 <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="tasmanian-mismatch results table">
76 <filter>keepHTML_conditional == "yes"</filter>
77 </data>
78 </outputs>
79
80 <tests>
81 <!-- test when reference from history with bed-->
82 <test>
83 <param name="bam_input" value="test2.bam" ftype="bam"/>
84 <param name="reference_source_selector" value="history"/>
85 <param name="ref_file" value="small_region.fa"/>
86 <param name="bed_filename" value="test2.bed" ftype="bed"/>
87 <output name="output_table" file="test2-bed.output" lines_diff="4"/>
88 </test>
89 <!-- test when reference from history without bed-->
90 <test>
91 <param name="bam_input" value="test2.bam" ftype="bam"/>
92 <param name="reference_source_selector" value="history"/>
93 <param name="ref_file" value="small_region.fa"/>
94 <output name="output_table" file="test2-nobed.output" lines_diff="4"/>
95 </test>
96 <!-- test when reference from cached-->
97 <test>
98 <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38"/>
99 <param name="reference_source_selector" value="cached"/>
100 <param name="ref_file" value="hg38"/>
101 <output name="output_table" file="test2-nobed.output" lines_diff="4"/>
102 </test>
103 </tests>
104
105 <help>
106 <![CDATA[
107
108 **What it does**
109
110 This tool counts the number/proportion of mismatches per position along the read,
111 for each read (see figure below).
112
113 .. image:: ${static_path}/images/snapshot_good.jpg
114 :height: 350
115 :width: 650
116
117 -----
118
119 **What is special**
120
121 By providing a bed file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below,
122 and will report them separately. Also, a parameter defined as *"confidence"* allows including reads with >=
123 bases in the boundary region in a separate group. This is useful when the bed refers to repeat regions. Since these
124 regions might not have been correctly placed in the assembly or are not the same in different individuals, we can
125 include this *confidence* repeat regions where we have high confidence on the reference genome to which we mapped the reads.
126
127 .. image:: ${static_path}/images/intersections_tasmanian.jpg
128 :height: 150
129 :width: 650
130
131 Softclips are critical in FFPE (Formalin-fixed paraffin-embedded) experiments as mismatches tend to accumulate at the ends of the reads. Most often, softclips
132 are all accepted during the analysis and many real mismatches are indirectly excluded from the analysis. Hence, this tool
133 provides different ways to deal with this:
134
135 The *softclips* field allows for 3 different ways at treating softclips:
136 0) Exclude these region if there is less than 2/3 identity with the reference genome
137 1) Exclude all softclipped bases
138 2) Include all softclipped bases
139
140 .. class:: warningmark
141
142 BAM/SAM file must be **sorted** if not using a bed file.
143
144 ]]>
145 </help>
146 <citations>
147 <citation type="bibtex">
148 @misc{githubtasmanian,
149 author = {Langhorst B., Others, Erijman A.},
150 year = {2020},
151 title = {TBD},
152 publisher = {GitHub},
153 journal = {GitHub repository},
154 url = {https://github.com/nebiolabs/tasmanian-mismatch},
155 }
156 </citation>
157 </citations>
158 </tool>