Mercurial > repos > iuc > tasmanian_mismatch
comparison Tasmanian.xml @ 0:bc0b40dec7d2 draft
"planemo upload for repository https://github.com/nebiolabs/tasmanian-mismatch commit a64a371ca1ed117c9bd8af743d847128fbce461c"
author | iuc |
---|---|
date | Wed, 20 May 2020 17:23:42 -0400 |
parents | |
children | b15fbf90db53 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bc0b40dec7d2 |
---|---|
1 <tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="0.1.3" profile="18.09"> | |
2 <requirements> | |
3 <requirement type="package" version="0.1.3">tasmanian-mismatch</requirement> | |
4 <requirement type="package" version="1.9">samtools</requirement> | |
5 </requirements> | |
6 <command detect_errors="exit_code"> | |
7 <![CDATA[ | |
8 | |
9 #set $reference_fasta_filename = "localref.fa" | |
10 | |
11 #if str( $reference_source.reference_source_selector ) == "history": | |
12 ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && | |
13 #else: | |
14 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) | |
15 #end if | |
16 | |
17 samtools view '${bam_input}' | | |
18 | |
19 #if $bed_filename | |
20 run_intersections -b '$bed_filename' | | |
21 #end if | |
22 | |
23 run_tasmanian | |
24 -q '${basequality}' | |
25 -s '${softclips}' | |
26 -m '${mapquality}' | |
27 -c '${confidence}' | |
28 -r '${reference_fasta_filename}' > '${output_table}' | |
29 | |
30 ]]></command> | |
31 <inputs> | |
32 <!-- Bam alignment file --> | |
33 <param type="data" name="bam_input" label="Bam/Sam alignemnt file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED"/> | |
34 <!-- reference genome upload --> | |
35 <conditional name="reference_source"> | |
36 <param name="reference_source_selector" type="select" label="Reference genome" help="You can select a reference genome from your history or use a built-in index (Local cache)"> | |
37 <option value="cached">Local cache</option> | |
38 <option value="history">History</option> | |
39 </param> | |
40 <when value="cached"> | |
41 <param name="ref_file" type="select" label="Select the reference genome from the list"> | |
42 <options from_data_table="all_fasta"> | |
43 <filter type="sort_by" column="2" /> | |
44 <validator type="no_options" message="No indexes are available" /> | |
45 </options> | |
46 </param> | |
47 </when> | |
48 <when value="history"> | |
49 <param name="ref_file" type="data" format="fasta" label="Use reference genome from history" help="You can first upload a FASTA sequence to the history" /> | |
50 </when> | |
51 </conditional> | |
52 | |
53 <!-- bed file --> | |
54 <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: "chrN", "start" and "stop", and is tab separated."/> | |
55 | |
56 <!-- Additional parameters --> | |
57 <param name="confidence" label="Boundary" type="integer" value="20" min="0" max="100" | |
58 help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20"/> | |
59 <param name="softclips" label="Choose an action with softclips" type="select" display="radio" | |
60 help="How sofclips whould be treated. Values include 0,1 or 2 (read the help below). Default=0"> | |
61 <option value="1">Never use softcliped bases</option> | |
62 <option value="2">Always use softcliped bases</option> | |
63 <option value="0" selected="True">Automatic desicion (Default)</option> | |
64 </param> | |
65 <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20"/> | |
66 <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20"/> | |
67 <param name="keepHTML_conditional" type="select" label="keep HTML output file?"> | |
68 <option value="yes">Yes</option> | |
69 <option value="no">No</option> | |
70 </param> | |
71 </inputs> | |
72 | |
73 <outputs> | |
74 <data name="output_table" format="txt" /> | |
75 <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="tasmanian-mismatch results table"> | |
76 <filter>keepHTML_conditional == "yes"</filter> | |
77 </data> | |
78 </outputs> | |
79 | |
80 <tests> | |
81 <!-- test when reference from history with bed--> | |
82 <test> | |
83 <param name="bam_input" value="test2.bam" ftype="bam"/> | |
84 <param name="reference_source_selector" value="history"/> | |
85 <param name="ref_file" value="small_region.fa"/> | |
86 <param name="bed_filename" value="test2.bed" ftype="bed"/> | |
87 <output name="output_table" file="test2-bed.output" lines_diff="4"/> | |
88 </test> | |
89 <!-- test when reference from history without bed--> | |
90 <test> | |
91 <param name="bam_input" value="test2.bam" ftype="bam"/> | |
92 <param name="reference_source_selector" value="history"/> | |
93 <param name="ref_file" value="small_region.fa"/> | |
94 <output name="output_table" file="test2-nobed.output" lines_diff="4"/> | |
95 </test> | |
96 <!-- test when reference from cached--> | |
97 <test> | |
98 <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38"/> | |
99 <param name="reference_source_selector" value="cached"/> | |
100 <param name="ref_file" value="hg38"/> | |
101 <output name="output_table" file="test2-nobed.output" lines_diff="4"/> | |
102 </test> | |
103 </tests> | |
104 | |
105 <help> | |
106 <![CDATA[ | |
107 | |
108 **What it does** | |
109 | |
110 This tool counts the number/proportion of mismatches per position along the read, | |
111 for each read (see figure below). | |
112 | |
113 .. image:: ${static_path}/images/snapshot_good.jpg | |
114 :height: 350 | |
115 :width: 650 | |
116 | |
117 ----- | |
118 | |
119 **What is special** | |
120 | |
121 By providing a bed file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below, | |
122 and will report them separately. Also, a parameter defined as *"confidence"* allows including reads with >= | |
123 bases in the boundary region in a separate group. This is useful when the bed refers to repeat regions. Since these | |
124 regions might not have been correctly placed in the assembly or are not the same in different individuals, we can | |
125 include this *confidence* repeat regions where we have high confidence on the reference genome to which we mapped the reads. | |
126 | |
127 .. image:: ${static_path}/images/intersections_tasmanian.jpg | |
128 :height: 150 | |
129 :width: 650 | |
130 | |
131 Softclips are critical in FFPE (Formalin-fixed paraffin-embedded) experiments as mismatches tend to accumulate at the ends of the reads. Most often, softclips | |
132 are all accepted during the analysis and many real mismatches are indirectly excluded from the analysis. Hence, this tool | |
133 provides different ways to deal with this: | |
134 | |
135 The *softclips* field allows for 3 different ways at treating softclips: | |
136 0) Exclude these region if there is less than 2/3 identity with the reference genome | |
137 1) Exclude all softclipped bases | |
138 2) Include all softclipped bases | |
139 | |
140 .. class:: warningmark | |
141 | |
142 BAM/SAM file must be **sorted** if not using a bed file. | |
143 | |
144 ]]> | |
145 </help> | |
146 <citations> | |
147 <citation type="bibtex"> | |
148 @misc{githubtasmanian, | |
149 author = {Langhorst B., Others, Erijman A.}, | |
150 year = {2020}, | |
151 title = {TBD}, | |
152 publisher = {GitHub}, | |
153 journal = {GitHub repository}, | |
154 url = {https://github.com/nebiolabs/tasmanian-mismatch}, | |
155 } | |
156 </citation> | |
157 </citations> | |
158 </tool> |