comparison integron_finder.xml @ 0:1ae00120dd24 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bakta/integron_finder commit 6e0d10965c02c249844f1eddd1c7442990695a6a
author iuc
date Thu, 22 Sep 2022 13:51:14 +0000
parents
children 4768f7f8e93f
comparison
equal deleted inserted replaced
-1:000000000000 0:1ae00120dd24
1 <tool id="integron_finder" name="Integron Finder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description> is a program that detects integrons in DNA sequences</description>
3 <macros>
4 <import>macro.xml</import>
5 </macros>
6 <expand macro="edam_info"/>
7 <expand macro="xrefs"/>
8 <expand macro="requirements"/>
9 <command detect_errors="aggressive"><![CDATA[
10 integron_finder
11 '$sequence'
12 --cpu @THREADS@
13 --keep-tmp
14 $local_max
15 #if $type_replicon
16 $type_replicon
17 #end if
18 #if $topology_file
19 --topology-file '$topology_file'
20 #end if
21 $promoter_attI
22 -dt $settings.attc_settings.dist_thresh
23 --calin-threshold $settings.attc_settings.calin_threshold
24 --max-attc-size $settings.attc_settings.max_attc_size
25 --min-attc-size $settings.attc_settings.min_attc_size
26 $settings.attc_settings.keep_palindromes
27 #if $settings.attc_settings.covar_matrix
28 --attc-model '$settings.attc_settings.covar_matrix'
29 #end if
30 $settings.protein_settings.no_proteins
31 $settings.protein_settings.union_integrases
32 $settings.protein_settings.func_annot
33 $gbk
34 $pdf
35 && mv Results_Integron_Finder_* Results_Integron_Finder
36 ]]></command>
37 <inputs>
38 <param type="data" name="sequence" format="fasta" label="Replicon file" help="Replicon can be entire chromosome, contif, PCR fragments..." />
39 <param name="local_max" argument="--local-max" type="boolean" checked="false" truevalue="--local-max" falsevalue="" label="Thorough local detection" help="This option allows a more sensitive search. I will be slower (dependant on the number of hits) if integrons are found, but will be as fast if nothing is detected and will not increase the false positive rate." />
40 <param name="type_replicon" type="select" optional="true" label="Default replicons topology" help="Set the default topology for replicons, linear, circular (deault: no topology)">
41 <option value="--linear">linear (--linear)</option>
42 <option value="--circ">circular (--circ)</option>
43 </param>
44 <param name="topology_file" argument="--topology-file" type="data" format="txt" optional="true" label="Select a topology file from your history"/>
45 <param name="promoter_attI" argument="--promoter-attI" type="boolean" checked="false" truevalue="--promoter-attI" falsevalue="" label="Search also for promoter and attI sites?" />
46 <param argument="--gbk" type="boolean" checked="false" truevalue="--gbk" falsevalue="" label="Genbank output?" help="Generate a GenBank file with the sequence annotated with the same annotations than .integrons file."/>
47 <param argument="--pdf" type="boolean" checked="false" truevalue="--pdf" falsevalue="" label="pdf output?" help="For each complete integron, a simple graphic of the region is depicted (in pdf format)"/>
48 <section name="settings" title="Advanced Parameters" expanded="False">
49 <section name="attc_settings" title="Attc options" expanded="False">
50 <param name="dist_thresh" argument="--distance-thresh" type="integer" value="4000" label="Threshold for clustering (in base)" min="0" help="By default, to cluster an array of attC sites and an integron integrase, they must be less than 4 kb apart. You can here change this value." />
51 <param name="calin_threshold" type="integer" value="2" label="Threshold to filter CALIN" min="0" help="Keep 'CALIN' only if attC sites number >= calin-threshold" />
52 <param name="max_attc_size" type="integer" value="200" label="Maximum value for attC size" min="0"/>
53 <param name="min_attc_size" type="integer" value="40" label="Minimum value for attC size" min="0" />
54 <param name="keep_palindromes" argument="--keep-palindromes" type="boolean" checked="false" truevalue="--keep-palindromes" falsevalue="" label="Keep palindromes with the highest evalue" help="For a given hit, if the palindromic version is found, don't remove the one with highest evalue"/>
55 <param name="covar_matrix" argument="--attc-model" type="data" optional="true" format="txt" label="Covariance Matrix" />
56 </section>
57 <section name="protein_settings" title="Protein options" expanded="False">
58 <param name="no_proteins" argument="--no-proteins" type="boolean" checked="false" truevalue="--no-proteins" falsevalue="" label="Just look for attC sites" help="When enabled, it does not annotate CDS and does not find integrase."/>
59 <param name="union_integrases" argument="--union-integrases" type="boolean" checked="false" truevalue="--union-integrases" falsevalue="" label="Use the union of the hits" help="Instead of taking intersection of hits from Phage_int profile (Tyr recombinases) and integron_integrase profile, use the union of the hits" />
60 <param name="func_annot" argument="--func-annot" type="boolean" checked="false" truevalue="--func-annot" falsevalue="" label="Annotate cassettes given HMM profiles" />
61 </section>
62 </section>
63 <param name="no_logfile" type="boolean" truevalue="true" falsevalue="false" label="Remove log file"/>
64 </inputs>
65 <outputs>
66 <collection type="list" label="Genbank files from [$tool.name] on $[on_string]" name="genbank_out">
67 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gbk" format="gbk" visible="false" directory="Results_Integron_Finder/" />
68 <filter>gbk</filter>
69 </collection>
70 <data format="txt" name="integron_log" from_work_dir="Results_Integron_Finder/integron_finder.out" label="Log from [$tool.name] on $[on_string]">
71 <filter> no_logfile == False</filter>
72 </data>
73 <data format="tsv" name="integrons_table" from_work_dir="Results_Integron_Finder/*.integrons" label="Integrons annotations from [$tool.name] on $[on_string]"/>
74 <data format="tsv" name="summary" from_work_dir="Results_Integron_Finder/*.summary" label="Summary from [$tool.name] on $[on_string]"/>
75 <collection type="list" label="Graphic from [$tool.name] on $[on_string]" name="pdf_out">
76 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.pdf" format="pdf" visible="false" directory="Results_Integron_Finder/" />
77 <filter>pdf</filter>
78 </collection>
79 </outputs>
80 <tests>
81 <test expect_num_outputs="3">
82 <param name="sequence" value="input.fasta"/>
83 <output name="integron_log" value="integron_log" lines_diff="3" />
84 <output name="integrons_table" value="test1_integrons_table.tsv" lines_diff="3"/>
85 <output name="summary" value="summary.tsv" lines_diff="3"/>
86 </test>
87 <test expect_num_outputs="2">
88 <param name="sequence" value="input.fasta"/>
89 <param name="local_max" value="true"/>
90 <param name="type_replicon" value="--linear"/>
91 <param name="no_logfile" value="true"/>
92 <output name="integrons_table" value="test2_integrons_table.tsv" lines_diff="3" />
93 <output name="summary" value="summary.tsv" lines_diff="4" />
94 </test>
95 <test expect_num_outputs="2">
96 <param name="sequence" value="input.fasta"/>
97 <param name="type_replicon" value="--circ"/>
98 <param name="no_logfile" value="true"/>
99 <output name="integrons_table" value="test3_integrons_table.tsv" lines_diff="3" />
100 <output name="summary" value="summary.tsv" lines_diff="3" />
101 </test>
102 <test expect_num_outputs="2">
103 <param name="sequence" value="input.fasta"/>
104 <param name="topology_file" value="topology.txt"/>
105 <param name="no_logfile" value="true"/>
106 <output name="integrons_table" value="test4_integrons_table.tsv" lines_diff="3" />
107 <output name="summary" value="summary.tsv" lines_diff="5" />
108 </test>
109 <test expect_num_outputs="2">
110 <param name="sequence" value="input.fasta"/>
111 <param name="promoter_attI" value="true"/>
112 <param name="no_logfile" value="true"/>
113 <output name="integrons_table" value="test5_integrons_table.tsv" lines_diff="3" />
114 <output name="summary" value="summary.tsv" lines_diff="3" />
115 </test>
116 <test expect_num_outputs="4">
117 <param name="sequence" value="input.fasta"/>
118 <param name="gbk" value="true"/>
119 <param name="pdf" value="true"/>
120 <param name="no_logfile" value="true"/>
121 <output_collection name="genbank_out" type="list">
122 <element name="ACBA.007.P01_13">
123 <assert_contents>
124 <has_text text="MKTATAPLPPLRSVKVLDQLRERIRYLHYSLRTEQAYVNWVRAFI"/>
125 </assert_contents>
126 </element>
127 </output_collection>
128 <output_collection name="pdf_out" type="list">
129 <element name="ACBA.007.P01_13_1">
130 <assert_contents>
131 <has_text text=">"/>
132 </assert_contents>
133 </element>
134 </output_collection>
135 <output name="integrons_table" value="test6_integrons_table.tsv" lines_diff="3" />
136 <output name="summary" value="summary.tsv" lines_diff="3" />
137 </test>
138 <test expect_num_outputs="2">
139 <param name="sequence" value="input.fasta"/>
140 <param name="no_logfile" value="true"/>
141 <section name="settings">
142 <section name="attc_settings">
143 <param name="dist_thresh" value="2000"/>
144 <param name="calin_threshold" value="3"/>
145 <param name="max_attc_size" value="188"/>
146 <param name="min_attc_size" value="30"/>
147 <param name="keep_palindromes" value=""/>
148 </section>
149 </section>
150 <output name="integrons_table" value="test7_integrons_table.tsv" lines_diff="3" />
151 <output name="summary" value="summary.tsv" lines_diff="3" />
152 </test>
153 <test expect_num_outputs="2">
154 <param name="sequence" value="input.fasta"/>
155 <param name="no_logfile" value="true"/>
156 <section name="settings">
157 <section name="attc_settings">
158 <param name="covar_matrix" value="covar.txt"/>
159 </section>
160 </section>
161 <output name="integrons_table" value="test8_integrons_table.tsv" lines_diff="10" />
162 <output name="summary" value="summary.tsv" lines_diff="3" />
163 </test>
164 <test expect_num_outputs="2">
165 <param name="sequence" value="input.fasta"/>
166 <param name="no_logfile" value="true"/>
167 <section name="settings">
168 <section name="protein_settings">
169 <param name="no_proteins" value="true"/>
170 </section>
171 </section>
172 <output name="integrons_table" value="test9_integrons_table.tsv" lines_diff="3" />
173 <output name="summary" value="test9_summary.tsv" lines_diff="3" />
174 </test>
175 <test expect_num_outputs="2">
176 <param name="sequence" value="input.fasta"/>
177 <param name="no_logfile" value="true"/>
178 <section name="settings">
179 <section name="protein_settings">
180 <param name="union_integrases" value="true" />
181 <param name="func_annot" value="true"/>
182 </section>
183 </section>
184 <output name="integrons_table" value="test10_integrons_table.tsv" lines_diff="3" />
185 <output name="summary" value="summary.tsv" lines_diff="3" />
186 </test>
187 </tests>
188 <help><![CDATA[
189
190 How does it work ?
191 ==================
192
193 - First, IntegronFinder annotates the DNA sequence's CDS with Prodigal.
194
195 - Second, IntegronFinder detects independently integron integrase and *attC*
196 recombination sites. The Integron integrase is detected by using the intersection
197 of two HMM profiles:
198
199 - one specific of tyrosine-recombinase (PF00589)
200 - one specific of the integron integrase, near the patch III domain of tyrosine recombinases.
201
202 The *attC* recombination site is detected with a covariance model (CM), which
203 models the secondary structure in addition to the few conserved sequence
204 positions.
205
206
207 - Third, the results are integrated, and IntegronFinder distinguishes 3 types of
208 elements:
209
210 - complete integron
211 Integron with integron integrase nearby *attC* site(s)
212 - In0 element
213 Integron integrase only, without any *attC* site nearby
214 - CALIN element
215 Cluster of *attC* sites Lacking INtegrase nearby.
216 A rule of thumb to avoid false positive is to filter out singleton of
217 *attC* site.
218
219 IntegronFinder can also annotate gene cassettes (CDS nearby *attC* sites) using
220 Resfams, a database of HMM profiles aiming at annotating antibiotic resistance
221 genes. This database is provided but the user can add any other HMM profiles
222 database of its own interest.
223
224 When available, IntegronFinder annotates the promoters and attI sites by pattern
225 matching.
226 ]]></help>
227 <expand macro="citations"/>
228 </tool>