Mercurial > repos > thanhlv > integron_finder
comparison integron_finder.xml @ 0:3a24265075bd draft default tip
planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/blob/master/tools/integron_finder commit 6b06711cfba45855d5a992ed1c73c472eaef644f
| author | thanhlv |
|---|---|
| date | Mon, 13 Feb 2023 13:53:43 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3a24265075bd |
|---|---|
| 1 <tool id="integron_finder" name="Integron Finder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
| 2 <description> is a program that detects integrons in DNA sequences</description> | |
| 3 <macros> | |
| 4 <import>macro.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="edam_info"/> | |
| 7 <expand macro="xrefs"/> | |
| 8 <expand macro="requirements"/> | |
| 9 <command detect_errors="aggressive"><![CDATA[ | |
| 10 integron_finder | |
| 11 '$sequence' | |
| 12 --cpu @THREADS@ | |
| 13 --keep-tmp | |
| 14 $local_max | |
| 15 #if $type_replicon | |
| 16 $type_replicon | |
| 17 #end if | |
| 18 #if $topology_file | |
| 19 --topology-file '$topology_file' | |
| 20 #end if | |
| 21 $promoter_attI | |
| 22 -dt $settings.attc_settings.dist_thresh | |
| 23 --calin-threshold $settings.attc_settings.calin_threshold | |
| 24 --max-attc-size $settings.attc_settings.max_attc_size | |
| 25 --min-attc-size $settings.attc_settings.min_attc_size | |
| 26 $settings.attc_settings.keep_palindromes | |
| 27 #if $settings.attc_settings.covar_matrix | |
| 28 --attc-model '$settings.attc_settings.covar_matrix' | |
| 29 #end if | |
| 30 $settings.protein_settings.no_proteins | |
| 31 $settings.protein_settings.union_integrases | |
| 32 $settings.protein_settings.func_annot | |
| 33 $gbk | |
| 34 $pdf | |
| 35 && mv Results_Integron_Finder_* Results_Integron_Finder | |
| 36 ]]></command> | |
| 37 <inputs> | |
| 38 <param type="data" name="sequence" format="fasta" label="Replicon file" help="Replicon can be entire chromosome, contif, PCR fragments..." /> | |
| 39 <param name="local_max" argument="--local-max" type="boolean" checked="false" truevalue="--local-max" falsevalue="" label="Thorough local detection" help="This option allows a more sensitive search. I will be slower (dependant on the number of hits) if integrons are found, but will be as fast if nothing is detected and will not increase the false positive rate." /> | |
| 40 <param name="type_replicon" type="select" optional="true" label="Default replicons topology" help="Set the default topology for replicons, linear, circular (deault: no topology)"> | |
| 41 <option value="--linear">linear (--linear)</option> | |
| 42 <option value="--circ">circular (--circ)</option> | |
| 43 </param> | |
| 44 <param name="topology_file" argument="--topology-file" type="data" format="txt" optional="true" label="Select a topology file from your history"/> | |
| 45 <param name="promoter_attI" argument="--promoter-attI" type="boolean" checked="false" truevalue="--promoter-attI" falsevalue="" label="Search also for promoter and attI sites?" /> | |
| 46 <param argument="--gbk" type="boolean" checked="false" truevalue="--gbk" falsevalue="" label="Genbank output?" help="Generate a GenBank file with the sequence annotated with the same annotations than .integrons file."/> | |
| 47 <param argument="--pdf" type="boolean" checked="false" truevalue="--pdf" falsevalue="" label="pdf output?" help="For each complete integron, a simple graphic of the region is depicted (in pdf format)"/> | |
| 48 <section name="settings" title="Advanced Parameters" expanded="False"> | |
| 49 <section name="attc_settings" title="Attc options" expanded="False"> | |
| 50 <param name="dist_thresh" argument="--distance-thresh" type="integer" value="4000" label="Threshold for clustering (in base)" min="0" help="By default, to cluster an array of attC sites and an integron integrase, they must be less than 4 kb apart. You can here change this value." /> | |
| 51 <param name="calin_threshold" type="integer" value="2" label="Threshold to filter CALIN" min="0" help="Keep 'CALIN' only if attC sites number >= calin-threshold" /> | |
| 52 <param name="max_attc_size" type="integer" value="200" label="Maximum value for attC size" min="0"/> | |
| 53 <param name="min_attc_size" type="integer" value="40" label="Minimum value for attC size" min="0" /> | |
| 54 <param name="keep_palindromes" argument="--keep-palindromes" type="boolean" checked="false" truevalue="--keep-palindromes" falsevalue="" label="Keep palindromes with the highest evalue" help="For a given hit, if the palindromic version is found, don't remove the one with highest evalue"/> | |
| 55 <param name="covar_matrix" argument="--attc-model" type="data" optional="true" format="txt" label="Covariance Matrix" /> | |
| 56 </section> | |
| 57 <section name="protein_settings" title="Protein options" expanded="False"> | |
| 58 <param name="no_proteins" argument="--no-proteins" type="boolean" checked="false" truevalue="--no-proteins" falsevalue="" label="Just look for attC sites" help="When enabled, it does not annotate CDS and does not find integrase."/> | |
| 59 <param name="union_integrases" argument="--union-integrases" type="boolean" checked="false" truevalue="--union-integrases" falsevalue="" label="Use the union of the hits" help="Instead of taking intersection of hits from Phage_int profile (Tyr recombinases) and integron_integrase profile, use the union of the hits" /> | |
| 60 <param name="func_annot" argument="--func-annot" type="boolean" checked="false" truevalue="--func-annot" falsevalue="" label="Annotate cassettes given HMM profiles" /> | |
| 61 </section> | |
| 62 </section> | |
| 63 <param name="no_logfile" type="boolean" truevalue="true" falsevalue="false" label="Remove log file"/> | |
| 64 </inputs> | |
| 65 <outputs> | |
| 66 <collection type="list" label="Genbank files from [$tool.name] on $[on_string]" name="genbank_out"> | |
| 67 <discover_datasets pattern="(?P<designation>.+)\.gbk" format="gbk" visible="false" directory="Results_Integron_Finder/" /> | |
| 68 <filter>gbk</filter> | |
| 69 </collection> | |
| 70 <data format="txt" name="integron_log" from_work_dir="Results_Integron_Finder/integron_finder.out" label="Log from [$tool.name] on $[on_string]"> | |
| 71 <filter> no_logfile == False</filter> | |
| 72 </data> | |
| 73 <data format="tsv" name="integrons_table" from_work_dir="Results_Integron_Finder/*.integrons" label="Integrons annotations from [$tool.name] on $[on_string]"/> | |
| 74 <data format="tsv" name="summary" from_work_dir="Results_Integron_Finder/*.summary" label="Summary from [$tool.name] on $[on_string]"/> | |
| 75 <collection type="list" label="Graphic from [$tool.name] on $[on_string]" name="pdf_out"> | |
| 76 <discover_datasets pattern="(?P<designation>.+)\.pdf" format="pdf" visible="false" directory="Results_Integron_Finder/" /> | |
| 77 <filter>pdf</filter> | |
| 78 </collection> | |
| 79 </outputs> | |
| 80 <tests> | |
| 81 <test expect_num_outputs="3"> | |
| 82 <param name="sequence" value="input.fasta"/> | |
| 83 <output name="integron_log" value="integron_log" lines_diff="3" /> | |
| 84 <output name="integrons_table" value="test1_integrons_table.tsv" lines_diff="3"/> | |
| 85 <output name="summary" value="summary.tsv" lines_diff="3"/> | |
| 86 </test> | |
| 87 <test expect_num_outputs="2"> | |
| 88 <param name="sequence" value="input.fasta"/> | |
| 89 <param name="local_max" value="true"/> | |
| 90 <param name="type_replicon" value="--linear"/> | |
| 91 <param name="no_logfile" value="true"/> | |
| 92 <output name="integrons_table" value="test2_integrons_table.tsv" lines_diff="3" /> | |
| 93 <output name="summary" value="summary.tsv" lines_diff="4" /> | |
| 94 </test> | |
| 95 <test expect_num_outputs="2"> | |
| 96 <param name="sequence" value="input.fasta"/> | |
| 97 <param name="type_replicon" value="--circ"/> | |
| 98 <param name="no_logfile" value="true"/> | |
| 99 <output name="integrons_table" value="test3_integrons_table.tsv" lines_diff="3" /> | |
| 100 <output name="summary" value="summary.tsv" lines_diff="3" /> | |
| 101 </test> | |
| 102 <test expect_num_outputs="2"> | |
| 103 <param name="sequence" value="input.fasta"/> | |
| 104 <param name="topology_file" value="topology.txt"/> | |
| 105 <param name="no_logfile" value="true"/> | |
| 106 <output name="integrons_table" value="test4_integrons_table.tsv" lines_diff="3" /> | |
| 107 <output name="summary" value="summary.tsv" lines_diff="5" /> | |
| 108 </test> | |
| 109 <test expect_num_outputs="2"> | |
| 110 <param name="sequence" value="input.fasta"/> | |
| 111 <param name="promoter_attI" value="true"/> | |
| 112 <param name="no_logfile" value="true"/> | |
| 113 <output name="integrons_table" value="test5_integrons_table.tsv" lines_diff="3" /> | |
| 114 <output name="summary" value="summary.tsv" lines_diff="3" /> | |
| 115 </test> | |
| 116 <test expect_num_outputs="4"> | |
| 117 <param name="sequence" value="input.fasta"/> | |
| 118 <param name="gbk" value="true"/> | |
| 119 <param name="pdf" value="true"/> | |
| 120 <param name="no_logfile" value="true"/> | |
| 121 <output_collection name="genbank_out" type="list"> | |
| 122 <element name="ACBA.007.P01_13"> | |
| 123 <assert_contents> | |
| 124 <has_text text="MKTATAPLPPLRSVKVLDQLRERIRYLHYSLRTEQAYVNWVRAFI"/> | |
| 125 </assert_contents> | |
| 126 </element> | |
| 127 </output_collection> | |
| 128 <output_collection name="pdf_out" type="list"> | |
| 129 <element name="ACBA.007.P01_13_1"> | |
| 130 <assert_contents> | |
| 131 <has_text text=">"/> | |
| 132 </assert_contents> | |
| 133 </element> | |
| 134 </output_collection> | |
| 135 <output name="integrons_table" value="test6_integrons_table.tsv" lines_diff="3" /> | |
| 136 <output name="summary" value="summary.tsv" lines_diff="3" /> | |
| 137 </test> | |
| 138 <test expect_num_outputs="2"> | |
| 139 <param name="sequence" value="input.fasta"/> | |
| 140 <param name="no_logfile" value="true"/> | |
| 141 <section name="settings"> | |
| 142 <section name="attc_settings"> | |
| 143 <param name="dist_thresh" value="2000"/> | |
| 144 <param name="calin_threshold" value="3"/> | |
| 145 <param name="max_attc_size" value="188"/> | |
| 146 <param name="min_attc_size" value="30"/> | |
| 147 <param name="keep_palindromes" value=""/> | |
| 148 </section> | |
| 149 </section> | |
| 150 <output name="integrons_table" value="test7_integrons_table.tsv" lines_diff="3" /> | |
| 151 <output name="summary" value="summary.tsv" lines_diff="3" /> | |
| 152 </test> | |
| 153 <test expect_num_outputs="2"> | |
| 154 <param name="sequence" value="input.fasta"/> | |
| 155 <param name="no_logfile" value="true"/> | |
| 156 <section name="settings"> | |
| 157 <section name="attc_settings"> | |
| 158 <param name="covar_matrix" value="covar.txt"/> | |
| 159 </section> | |
| 160 </section> | |
| 161 <output name="integrons_table" value="test8_integrons_table.tsv" lines_diff="10" /> | |
| 162 <output name="summary" value="summary.tsv" lines_diff="3" /> | |
| 163 </test> | |
| 164 <test expect_num_outputs="2"> | |
| 165 <param name="sequence" value="input.fasta"/> | |
| 166 <param name="no_logfile" value="true"/> | |
| 167 <section name="settings"> | |
| 168 <section name="protein_settings"> | |
| 169 <param name="no_proteins" value="true"/> | |
| 170 </section> | |
| 171 </section> | |
| 172 <output name="integrons_table" value="test9_integrons_table.tsv" lines_diff="3" /> | |
| 173 <output name="summary" value="test9_summary.tsv" lines_diff="3" /> | |
| 174 </test> | |
| 175 <test expect_num_outputs="2"> | |
| 176 <param name="sequence" value="input.fasta"/> | |
| 177 <param name="no_logfile" value="true"/> | |
| 178 <section name="settings"> | |
| 179 <section name="protein_settings"> | |
| 180 <param name="union_integrases" value="true" /> | |
| 181 <param name="func_annot" value="true"/> | |
| 182 </section> | |
| 183 </section> | |
| 184 <output name="integrons_table" value="test10_integrons_table.tsv" lines_diff="3" /> | |
| 185 <output name="summary" value="summary.tsv" lines_diff="3" /> | |
| 186 </test> | |
| 187 </tests> | |
| 188 <help><![CDATA[ | |
| 189 | |
| 190 How does it work ? | |
| 191 ================== | |
| 192 | |
| 193 - First, IntegronFinder annotates the DNA sequence's CDS with Prodigal. | |
| 194 | |
| 195 - Second, IntegronFinder detects independently integron integrase and *attC* | |
| 196 recombination sites. The Integron integrase is detected by using the intersection | |
| 197 of two HMM profiles: | |
| 198 | |
| 199 - one specific of tyrosine-recombinase (PF00589) | |
| 200 - one specific of the integron integrase, near the patch III domain of tyrosine recombinases. | |
| 201 | |
| 202 The *attC* recombination site is detected with a covariance model (CM), which | |
| 203 models the secondary structure in addition to the few conserved sequence | |
| 204 positions. | |
| 205 | |
| 206 | |
| 207 - Third, the results are integrated, and IntegronFinder distinguishes 3 types of | |
| 208 elements: | |
| 209 | |
| 210 - complete integron | |
| 211 Integron with integron integrase nearby *attC* site(s) | |
| 212 - In0 element | |
| 213 Integron integrase only, without any *attC* site nearby | |
| 214 - CALIN element | |
| 215 Cluster of *attC* sites Lacking INtegrase nearby. | |
| 216 A rule of thumb to avoid false positive is to filter out singleton of | |
| 217 *attC* site. | |
| 218 | |
| 219 IntegronFinder can also annotate gene cassettes (CDS nearby *attC* sites) using | |
| 220 Resfams, a database of HMM profiles aiming at annotating antibiotic resistance | |
| 221 genes. This database is provided but the user can add any other HMM profiles | |
| 222 database of its own interest. | |
| 223 | |
| 224 When available, IntegronFinder annotates the promoters and attI sites by pattern | |
| 225 matching. | |
| 226 ]]></help> | |
| 227 <expand macro="citations"/> | |
| 228 </tool> |
