comparison mob_recon.xml @ 10:2fd93022d5d7 draft default tip

planemo upload for repository https://github.com/phac-nml/galaxy_tools/tree/master/tools/mob-suite commit 8bd2d0ff4101e7e7e6a45247d616929593a03cd4
author nml
date Fri, 21 Feb 2025 18:41:14 +0000
parents 53f50adcb78e
children
comparison
equal deleted inserted replaced
9:93ba63eaf394 10:2fd93022d5d7
1 <tool id="mob_recon" name="MOB-Recon" version="@VERSION@+galaxy0"> 1 <tool id="mob_recon" name="MOB-Recon" version="@VERSION@+galaxy0">
2 <description>Type contigs and extract plasmid sequences</description> 2 <description>Type contigs and extract plasmid sequences</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/>
6 <expand macro="requirements" /> 7 <expand macro="requirements" />
7 <version_command>mob_recon --version</version_command> 8 <version_command>mob_recon --version</version_command>
8 <command detect_errors="exit_code"> 9 <command detect_errors="exit_code">
9 <![CDATA[ 10 <![CDATA[
10 #import re 11 #import re
12 13
13 #set $named_input = re.sub(r'(\s|\(|\)|:|!)', '_', str($input.element_identifier)+'.fasta') 14 #set $named_input = re.sub(r'(\s|\(|\)|:|!)', '_', str($input.element_identifier)+'.fasta')
14 ln -s '$input' '$named_input' && 15 ln -s '$input' '$named_input' &&
15 16
16 17
17 mob_recon --num_threads \${GALAXY_SLOTS:-4} --infile '${named_input}' --run_typer 18 mob_recon --num_threads \${GALAXY_SLOTS:-4} --infile '${named_input}'
18 19
19 #if $adv_param.unicycler_contigs: 20 #if $adv_param.unicycler_contigs:
20 --unicycler_contigs 21 --unicycler_contigs
21 #end if 22 #end if
22 23
54 55
55 #if $adv_param.plasmid_orit 56 #if $adv_param.plasmid_orit
56 --plasmid_orit '$adv_param.plasmid_orit' 57 --plasmid_orit '$adv_param.plasmid_orit'
57 #end if 58 #end if
58 59
60 #if $adv_param.filter_db
61 --filter_db '$adv_param.filter_db'
62 #end if
63
64 --mash_genome_neighbor_threshold '${adv_param.mash_genome_neighbor_threshold}'
65 --primary_cluster_dist '${adv_param.primary_cluster_dist}'
66 --secondary_cluster_dist '${adv_param.secondary_cluster_dist}'
67 --max_contig_size '${adv_param.max_contig_size}'
68 --max_plasmid_size '${adv_param.max_plasmid_size}'
69
59 --min_length '${adv_param.min_length}' 70 --min_length '${adv_param.min_length}'
60 --min_rep_evalue '${adv_param.min_rep_evalue}' 71 --min_rep_evalue '${adv_param.min_rep_evalue}'
61 --min_rep_evalue '${adv_param.min_rep_evalue}' 72 --min_rep_evalue '${adv_param.min_rep_evalue}'
62 --min_mob_evalue '${adv_param.min_mob_evalue}' 73 --min_mob_evalue '${adv_param.min_mob_evalue}'
63 --min_con_evalue '${adv_param.min_con_evalue}' 74 --min_con_evalue '${adv_param.min_con_evalue}'
68 79
69 --min_rep_cov '${adv_param.min_rep_cov}' 80 --min_rep_cov '${adv_param.min_rep_cov}'
70 --min_mob_cov '${adv_param.min_mob_cov}' 81 --min_mob_cov '${adv_param.min_mob_cov}'
71 --min_con_cov '${adv_param.min_con_cov}' 82 --min_con_cov '${adv_param.min_con_cov}'
72 --min_rpp_cov '${adv_param.min_rpp_cov}' 83 --min_rpp_cov '${adv_param.min_rpp_cov}'
84
85
73 --outdir 'outdir' && 86 --outdir 'outdir' &&
74 mkdir ./outdir/plasmids && (mv outdir/plasmid*.fasta ./outdir/plasmids 2> /dev/null || true) 87 mkdir ./outdir/plasmids && (mv outdir/plasmid*.fasta ./outdir/plasmids 2> /dev/null || true)
75 ]]> 88 ]]>
76 </command> 89 </command>
77 <inputs> 90 <inputs>
78 <param name="input" type="data" format="fasta" label="Input" help="FASTA file with contig(s)"/> 91 <param name="input" type="data" format="fasta" label="Input" help="FASTA file with contig(s)"/>
79 <section name="adv_param" title="Advanced parameters" expanded="False"> 92 <section name="adv_param" title="Advanced parameters" expanded="False">
80 <param name="unicycler_contigs" type="boolean" truevalue="true" falsevalue="" checked="true" label="Check for circularity flag generated by unicycler in contigs fasta headers?"/>
81 <param name="run_overhang" type="boolean" truevalue="true" falsevalue="" checked="true" label="Detect circular contigs (i.e. potential plasmids) with assembly overhangs?"/>
82 <param name="debug" type="boolean" truevalue="true" falsevalue="" checked="false" label="Provide debug information?"/>
83 93
84 <param name="min_rep_evalue" label="Minimum evalue threshold for replicon blastn" type="float" min="0.00001" max="1" value="0.00001"/> 94 <param name="unicycler_contigs" type="boolean" truevalue="true" falsevalue="" checked="true" label="Check for circularity flag generated by unicycler in contigs fasta headers?" help="(--unicycler_contigs)"/>
85 <param name="min_mob_evalue" label="Minimum evalue threshold for relaxase tblastn" type="float" min="0.00001" max="1" value="0.00001"/> 95 <param name="run_overhang" type="boolean" truevalue="true" falsevalue="" checked="true" label="Detect circular contigs (i.e. potential plasmids) with assembly overhangs?" help="(--run_overhang)"/>
86 <param name="min_con_evalue" label="Minimum evalue threshold for contig blastn" type="float" min="0.00001" max="1" value="0.00001"/> 96 <param name="debug" type="boolean" truevalue="true" falsevalue="" checked="false" label="Provide debug information?" help="(--debug)"/>
87 <param name="min_rpp_evalue" label="Minimum evalue threshold for repetitve elements blastn" type="float" min="0.00001" max="1" value="0.00001"/> 97 <param name="mash_genome_neighbor_threshold" label="Mash distance selecting valid closed genomes to filter" type="float" min="0.00001" max="1" value="0.002" help="(--mash_genome_neighbor_threshold)"/>
88 <param name="min_length" label="Minimum length of contigs to classify" type="integer" value="1000"/> 98 <param name="primary_cluster_dist" label="MASH distance for assigning primary plasmid cluster id" type="float" min="0.00001" max="1" value="0.06" help="(--primary_cluster_dist)"/>
89 <param name="min_rep_ident" label="Minimum sequence identity for replicons" type="integer" min="0" max="100" value="80"/> 99 <param name="secondary_cluster_dist" label="MASH distance for assigning secondary plasmid cluster id" type="float" min="0.00001" max="1" value="0.025" help="(--secondary_cluster_dist)"/>
90 <param name="min_mob_ident" label="Minimum sequence identity for relaxases" type="integer" min="0" max="100" value="80"/> 100 <param name="max_contig_size" label="Maximum size of a contig to be considered a plasmid (bp)" type="integer" min="1" max="1000000" value="450000" help="(--max_contig_size)"/>
91 <param name="min_con_ident" label="Minimum sequence identity for contigs" type="integer" min="0" max="100" value="80"/> 101 <param name="max_plasmid_size" label="Maximum size of a reconstructed plasmid (bp)" type="integer" min="1" max="1000000" value="450000" help="(--max_plasmid_size)"/>
92 <param name="min_rpp_ident" label="Minimum sequence identity for repetitive elements" type="integer" min="0" max="100" value="80"/> 102
103 <param name="min_rep_evalue" label="Minimum evalue threshold for replicon blastn" type="float" min="0.00001" max="1" value="0.00001" help="(--min_rep_evalue)"/>
104 <param name="min_mob_evalue" label="Minimum evalue threshold for relaxase tblastn" type="float" min="0.00001" max="1" value="0.00001" help="(--min_mob_evalue)"/>
105 <param name="min_con_evalue" label="Minimum evalue threshold for contig blastn" type="float" min="0.00001" max="1" value="0.00001" help="(--min_con_evalue)"/>
106 <param name="min_rpp_evalue" label="Minimum evalue threshold for repetitve elements blastn" type="float" min="0.00001" max="1" value="0.00001" help="(--min_rpp_evalue)"/>
107 <param name="min_length" label="Minimum length of contigs to classify (bp)" type="integer" value="1000" help="(--min_length)"/>
108 <param name="min_rep_ident" label="Minimum sequence identity for replicons" type="integer" min="0" max="100" value="80" help="(--min_rep_ident)"/>
109 <param name="min_mob_ident" label="Minimum sequence identity for relaxases" type="integer" min="0" max="100" value="80" help="(--min_mob_ident)"/>
110 <param name="min_con_ident" label="Minimum sequence identity for contigs" type="integer" min="0" max="100" value="80" help="(--min_con_ident)"/>
111 <param name="min_rpp_ident" label="Minimum sequence identity for repetitive elements" type="integer" min="0" max="100" value="80" help="(--min_rpp_ident)"/>
93 112
94 <param name="min_rep_cov" label="Minimum percentage coverage of replicon query by input assembly" type="integer" min="0" max="100" value="80"/> 113 <param name="min_rep_cov" label="Minimum percentage coverage of replicon query by input assembly" type="integer" min="0" max="100" value="80" help="(--min_rep_cov)"/>
95 <param name="min_mob_cov" label="Minimum percentage coverage of relaxase query by input assembly" type="integer" min="0" max="100" value="80"/> 114 <param name="min_mob_cov" label="Minimum percentage coverage of relaxase query by input assembly" type="integer" min="0" max="100" value="80" help="(--min_mob_cov)"/>
96 <param name="min_con_cov" label="Minimum percentage coverage of assembly contig by the plasmid reference database to be considered" type="integer" min="0" max="100" value="60"/> 115 <param name="min_con_cov" label="Minimum percentage coverage of assembly contig by the plasmid reference database to be considered" type="integer" min="0" max="100" value="60" help="(--min_con_cov)"/>
97 <param name="min_rpp_cov" label="Minimum percentage coverage of contigs by repetitive elements" type="integer" min="0" max="100" value="80"/> 116 <param name="min_rpp_cov" label="Minimum percentage coverage of contigs by repetitive elements" type="integer" min="0" max="100" value="80" help="(--min_rpp_cov)"/>
98 117
99 <param name="plasmid_db" optional="true" type="data" format="fasta" label="Reference Database of complete plasmids" help=""/> 118 <param name="plasmid_db" optional="true" type="data" format="fasta" label="Reference custom database of complete plasmids" help="(--plasmid_db)"/>
100 <param name="plasmid_mash_db" optional="true" type="data" format="binary" label="Custom MASH database of plasmids" help="MASH sketch of the reference plasmids database"/> 119 <param name="plasmid_mash_db" optional="true" type="data" format="binary" label="Custom MASH database of plasmids" help="MASH sketch of the reference plasmids database (--plasmid_mash_db)"/>
101 <param name="plasmid_meta" type="data" optional="true" format="text" label="Plasmid cluster metadata file" help=""/> 120 <param name="plasmid_meta" type="data" optional="true" format="text" label="Plasmid cluster metadata file" help="(--plasmid_meta)"/>
102 <param name="plasmid_replicons" type="data" optional="true" format="fasta" label="FASTA file with plasmid replicons" help=""/> 121 <param name="plasmid_replicons" type="data" optional="true" format="fasta" label="FASTA file with plasmid replicons" help="(--plasmid_replicons)"/>
103 <param name="repetitive_mask" type="data" optional="true" format="fasta" label="FASTA of known repetitive elements" help=""/> 122 <param name="repetitive_mask" type="data" optional="true" format="fasta" label="FASTA of known repetitive elements" help="(--reptitive_mask)"/>
104 <param name="plasmid_mob" type="data" optional="true" format="fasta" label="FASTA of plasmid relaxases" help=""/> 123 <param name="plasmid_mob" type="data" optional="true" format="fasta" label="FASTA of plasmid relaxases" help="(--plasmid_mob)"/>
105 <param name="plasmid_mpf" type="data" optional="true" format="fasta" label="FASTA of known plasmid mate-pair proteins" help=""/> 124 <param name="plasmid_mpf" type="data" optional="true" format="fasta" label="FASTA of known plasmid mate-pair proteins" help="(--plasmid_mpf)"/>
106 <param name="plasmid_orit" type="data" optional="true" format="fasta" label="FASTA of known plasmid oriT dna sequences" help=""/> 125 <param name="plasmid_orit" type="data" optional="true" format="fasta" label="FASTA of known plasmid oriT dna sequences" help="(--plasmid_orit)"/>
126 <param name="filter_db" type="data" optional="true" format="fasta" label="Path to fasta file to mask sequences" help="(--filter_db)"/>
127
128
129
107 </section> 130 </section>
108 </inputs> 131 </inputs>
109 <outputs> 132 <outputs>
110 <data name="contig_report" format="tabular" from_work_dir="outdir/contig_report.txt" label="${tool.name} on ${input.element_identifier}: Overall contig MOB-recon report"/> 133 <data name="contig_report" format="tabular" from_work_dir="outdir/contig_report.txt" label="${tool.name} on ${input.element_identifier}: Overall contig MOB-recon report"/>
111 <data name="mobtyper_aggregate_report" format="tabular" from_work_dir="outdir/mobtyper_results.txt" label="${tool.name} on ${input.element_identifier}: Aggregate MOB-typer report for all contigs"/> 134 <data name="mobtyper_aggregate_report" format="tabular" from_work_dir="outdir/mobtyper_results.txt" label="${tool.name} on ${input.element_identifier}: Aggregate MOB-typer report for all contigs"/>
112 <data name="chromosome" format="fasta" from_work_dir="outdir/chromosome.fasta" label="${tool.name} on ${input.element_identifier}: Chromosomal sequences"/> 135 <data name="chromosome" format="fasta" from_work_dir="outdir/chromosome.fasta" label="${tool.name} on ${input.element_identifier}: CHROMOSOMAL contigs"/>
113 <collection name="plasmids" type="list" label="${tool.name} on ${input.element_identifier}: Plasmids"> 136 <collection name="plasmids" type="list" label="${tool.name} on ${input.element_identifier}: PLASMIDS RECONSTRUCTED">
114 <discover_datasets pattern="__name_and_ext__" directory="outdir/plasmids" /> 137 <discover_datasets pattern="__name_and_ext__" directory="outdir/plasmids" />
115 </collection> 138 </collection>
116 </outputs> 139 </outputs>
117 <tests> 140 <tests>
118 <test> 141 <test>
144 167
145 This tool reconstructs individual plasmid sequences from draft genome assemblies using the plasmid reference databases. 168 This tool reconstructs individual plasmid sequences from draft genome assemblies using the plasmid reference databases.
146 169
147 For more information please visit https://github.com/phac-nml/mob-suite/. 170 For more information please visit https://github.com/phac-nml/mob-suite/.
148 171
149 **Workflow**
150
151 This preliminary \"Mobilome and Resistome Analysis Workflow\" linking mob_recon with staramr provides reports on mobilome and resistome for a given isolate given a draft genome assembly. The workflow is located in Shared Data --> Workflows --> Mobilome and Resistome Analysis Workflow (MOB-Recon and STARAMR). The workflow file can also be manually downloaded from https://raw.githubusercontent.com/phac-nml/galaxy_tools/master/tools/mob_suite/workflows/AMRworkflow_STARAMR.ga.
152
153 ----- 172 -----
154 173
155 **Input:** 174 **Input:**
156 175
157 A FASTA file with a single or multiple contigs (e.g. a draft genome assembly): 176 A FASTA file with a single or multiple contigs (e.g. a draft genome assembly):
159 178
160 **Output:** 179 **Output:**
161 180
162 Tab-delimited report listing information for each input contig on its cluster number, possible replicon, relaxase, and repetitive elements types, etc. Refer to https://github.com/phac-nml/mob-suite#mob-recon-contig-report-format for the description of each column. 181 Tab-delimited report listing information for each input contig on its cluster number, possible replicon, relaxase, and repetitive elements types, etc. Refer to https://github.com/phac-nml/mob-suite#mob-recon-contig-report-format for the description of each column.
163 182
164 Note: Plasmid sequences will not be output if none are found. Some plasmid could be intergrated into a chromosome. 183 **Note:** Plasmid sequences will not be output if none are found. Some plasmid could be intergrated into a chromosome.
165 184
166 185
167 </help> 186 </help>
168 <citations> 187 <citations>
169 <citation type="bibtex"> 188 <citation type="bibtex">