diff mob_recon.xml @ 10:2fd93022d5d7 draft default tip

planemo upload for repository https://github.com/phac-nml/galaxy_tools/tree/master/tools/mob-suite commit 8bd2d0ff4101e7e7e6a45247d616929593a03cd4
author nml
date Fri, 21 Feb 2025 18:41:14 +0000
parents 53f50adcb78e
children
line wrap: on
line diff
--- a/mob_recon.xml	Thu Aug 05 21:53:36 2021 +0000
+++ b/mob_recon.xml	Fri Feb 21 18:41:14 2025 +0000
@@ -2,7 +2,8 @@
   <description>Type contigs and extract plasmid sequences</description>
   <macros>
     <import>macros.xml</import>
-  </macros>  
+  </macros> 
+  <expand macro="bio_tools"/> 
   <expand macro="requirements" />
   <version_command>mob_recon --version</version_command>
   <command detect_errors="exit_code">
@@ -14,7 +15,7 @@
     ln -s '$input' '$named_input' &&
     
 
-    mob_recon  --num_threads \${GALAXY_SLOTS:-4}  --infile '${named_input}' --run_typer
+    mob_recon  --num_threads \${GALAXY_SLOTS:-4}  --infile '${named_input}'
 
     #if $adv_param.unicycler_contigs:
       --unicycler_contigs 
@@ -56,6 +57,16 @@
    --plasmid_orit '$adv_param.plasmid_orit'
    #end if
 
+   #if $adv_param.filter_db
+   --filter_db '$adv_param.filter_db'
+   #end if
+
+    --mash_genome_neighbor_threshold '${adv_param.mash_genome_neighbor_threshold}'
+    --primary_cluster_dist '${adv_param.primary_cluster_dist}'
+    --secondary_cluster_dist '${adv_param.secondary_cluster_dist}'
+    --max_contig_size '${adv_param.max_contig_size}'
+    --max_plasmid_size '${adv_param.max_plasmid_size}'
+
     --min_length '${adv_param.min_length}'  
     --min_rep_evalue '${adv_param.min_rep_evalue}'
     --min_rep_evalue '${adv_param.min_rep_evalue}'
@@ -70,6 +81,8 @@
     --min_mob_cov '${adv_param.min_mob_cov}'
     --min_con_cov '${adv_param.min_con_cov}'
     --min_rpp_cov '${adv_param.min_rpp_cov}'
+    
+    
     --outdir 'outdir' &&
     mkdir ./outdir/plasmids && (mv outdir/plasmid*.fasta ./outdir/plasmids 2> /dev/null || true)
   ]]>  
@@ -77,40 +90,50 @@
   <inputs>
     <param name="input" type="data" format="fasta" label="Input" help="FASTA file with contig(s)"/>
     <section name="adv_param" title="Advanced parameters" expanded="False">
-      <param name="unicycler_contigs" type="boolean" truevalue="true" falsevalue="" checked="true" label="Check for circularity flag generated by unicycler in contigs fasta headers?"/>
-      <param name="run_overhang" type="boolean" truevalue="true" falsevalue="" checked="true" label="Detect circular contigs (i.e. potential plasmids) with assembly overhangs?"/>  
-      <param name="debug" type="boolean" truevalue="true" falsevalue="" checked="false" label="Provide debug information?"/>
+      
+      <param name="unicycler_contigs" type="boolean" truevalue="true" falsevalue="" checked="true" label="Check for circularity flag generated by unicycler in contigs fasta headers?" help="(--unicycler_contigs)"/>
+      <param name="run_overhang" type="boolean" truevalue="true" falsevalue="" checked="true" label="Detect circular contigs (i.e. potential plasmids) with assembly overhangs?" help="(--run_overhang)"/>  
+      <param name="debug" type="boolean" truevalue="true" falsevalue="" checked="false" label="Provide debug information?" help="(--debug)"/>
+      <param name="mash_genome_neighbor_threshold" label="Mash distance selecting valid closed genomes to filter" type="float"  min="0.00001" max="1" value="0.002" help="(--mash_genome_neighbor_threshold)"/>
+      <param name="primary_cluster_dist" label="MASH distance for assigning primary plasmid cluster id" type="float"  min="0.00001" max="1" value="0.06" help="(--primary_cluster_dist)"/>
+      <param name="secondary_cluster_dist" label="MASH distance for assigning secondary plasmid cluster id" type="float"  min="0.00001" max="1" value="0.025" help="(--secondary_cluster_dist)"/>
+      <param name="max_contig_size" label="Maximum size of a contig to be considered a plasmid (bp)" type="integer"  min="1" max="1000000" value="450000" help="(--max_contig_size)"/> 
+      <param name="max_plasmid_size" label="Maximum size of a reconstructed plasmid (bp)" type="integer"  min="1" max="1000000" value="450000" help="(--max_plasmid_size)"/> 
       
-      <param name="min_rep_evalue" label="Minimum evalue threshold for replicon blastn" type="float"  min="0.00001" max="1" value="0.00001"/>
-      <param name="min_mob_evalue" label="Minimum evalue threshold for relaxase tblastn" type="float"  min="0.00001" max="1" value="0.00001"/>
-      <param name="min_con_evalue" label="Minimum evalue threshold for contig blastn" type="float"  min="0.00001" max="1" value="0.00001"/>
-      <param name="min_rpp_evalue" label="Minimum evalue threshold for repetitve elements blastn" type="float"  min="0.00001" max="1" value="0.00001"/>
-      <param name="min_length" label="Minimum length of contigs to classify" type="integer" value="1000"/>
-      <param name="min_rep_ident" label="Minimum sequence identity for replicons" type="integer"  min="0" max="100" value="80"/>
-      <param name="min_mob_ident" label="Minimum sequence identity for relaxases" type="integer"  min="0" max="100" value="80"/>
-      <param name="min_con_ident" label="Minimum sequence identity for contigs" type="integer"  min="0" max="100" value="80"/>
-      <param name="min_rpp_ident" label="Minimum sequence identity for repetitive elements" type="integer"  min="0" max="100" value="80"/>
+      <param name="min_rep_evalue" label="Minimum evalue threshold for replicon blastn" type="float"  min="0.00001" max="1" value="0.00001" help="(--min_rep_evalue)"/>
+      <param name="min_mob_evalue" label="Minimum evalue threshold for relaxase tblastn" type="float"  min="0.00001" max="1" value="0.00001" help="(--min_mob_evalue)"/>
+      <param name="min_con_evalue" label="Minimum evalue threshold for contig blastn" type="float"  min="0.00001" max="1" value="0.00001" help="(--min_con_evalue)"/>
+      <param name="min_rpp_evalue" label="Minimum evalue threshold for repetitve elements blastn" type="float"  min="0.00001" max="1" value="0.00001" help="(--min_rpp_evalue)"/>
+      <param name="min_length" label="Minimum length of contigs to classify (bp)" type="integer" value="1000" help="(--min_length)"/>
+      <param name="min_rep_ident" label="Minimum sequence identity for replicons" type="integer"  min="0" max="100" value="80" help="(--min_rep_ident)"/>
+      <param name="min_mob_ident" label="Minimum sequence identity for relaxases" type="integer"  min="0" max="100" value="80" help="(--min_mob_ident)"/>
+      <param name="min_con_ident" label="Minimum sequence identity for contigs" type="integer"  min="0" max="100" value="80" help="(--min_con_ident)"/>
+      <param name="min_rpp_ident" label="Minimum sequence identity for repetitive elements" type="integer"  min="0" max="100" value="80" help="(--min_rpp_ident)"/>
 
-      <param name="min_rep_cov" label="Minimum percentage coverage of replicon query by input assembly" type="integer"  min="0" max="100" value="80"/>
-      <param name="min_mob_cov" label="Minimum percentage coverage of relaxase query by input assembly" type="integer"  min="0" max="100" value="80"/>
-      <param name="min_con_cov" label="Minimum percentage coverage of assembly contig by the plasmid reference database to be considered" type="integer"  min="0" max="100" value="60"/>
-      <param name="min_rpp_cov" label="Minimum percentage coverage of contigs by repetitive elements" type="integer"  min="0" max="100" value="80"/>
+      <param name="min_rep_cov" label="Minimum percentage coverage of replicon query by input assembly" type="integer"  min="0" max="100" value="80" help="(--min_rep_cov)"/>
+      <param name="min_mob_cov" label="Minimum percentage coverage of relaxase query by input assembly" type="integer"  min="0" max="100" value="80" help="(--min_mob_cov)"/>
+      <param name="min_con_cov" label="Minimum percentage coverage of assembly contig by the plasmid reference database to be considered" type="integer"  min="0" max="100" value="60" help="(--min_con_cov)"/>
+      <param name="min_rpp_cov" label="Minimum percentage coverage of contigs by repetitive elements" type="integer"  min="0" max="100" value="80" help="(--min_rpp_cov)"/>
 
-      <param name="plasmid_db" optional="true" type="data" format="fasta" label="Reference Database of complete plasmids" help=""/>
-      <param name="plasmid_mash_db" optional="true" type="data" format="binary" label="Custom MASH database of plasmids" help="MASH sketch of the reference plasmids database"/>
-      <param name="plasmid_meta" type="data" optional="true" format="text" label="Plasmid cluster metadata file" help=""/>
-      <param name="plasmid_replicons" type="data" optional="true" format="fasta" label="FASTA file with plasmid replicons" help=""/>
-      <param name="repetitive_mask" type="data" optional="true" format="fasta" label="FASTA of known repetitive elements" help=""/>
-      <param name="plasmid_mob" type="data" optional="true" format="fasta" label="FASTA of plasmid relaxases" help=""/>
-      <param name="plasmid_mpf" type="data" optional="true" format="fasta" label="FASTA of known plasmid mate-pair proteins" help=""/>
-      <param name="plasmid_orit" type="data" optional="true" format="fasta" label="FASTA of known plasmid oriT dna sequences" help=""/>
+      <param name="plasmid_db" optional="true" type="data" format="fasta" label="Reference custom database of complete plasmids" help="(--plasmid_db)"/>
+      <param name="plasmid_mash_db" optional="true" type="data" format="binary" label="Custom MASH database of plasmids" help="MASH sketch of the reference plasmids database (--plasmid_mash_db)"/>
+      <param name="plasmid_meta" type="data" optional="true" format="text" label="Plasmid cluster metadata file" help="(--plasmid_meta)"/>
+      <param name="plasmid_replicons" type="data" optional="true" format="fasta" label="FASTA file with plasmid replicons" help="(--plasmid_replicons)"/>
+      <param name="repetitive_mask" type="data" optional="true" format="fasta" label="FASTA of known repetitive elements" help="(--reptitive_mask)"/>
+      <param name="plasmid_mob" type="data" optional="true" format="fasta" label="FASTA of plasmid relaxases" help="(--plasmid_mob)"/>
+      <param name="plasmid_mpf" type="data" optional="true" format="fasta" label="FASTA of known plasmid mate-pair proteins" help="(--plasmid_mpf)"/>
+      <param name="plasmid_orit" type="data" optional="true" format="fasta" label="FASTA of known plasmid oriT dna sequences" help="(--plasmid_orit)"/>
+      <param name="filter_db" type="data" optional="true" format="fasta" label="Path to fasta file to mask sequences" help="(--filter_db)"/>
+
+      
+      
     </section>  
   </inputs>
   <outputs>
     <data name="contig_report" format="tabular" from_work_dir="outdir/contig_report.txt" label="${tool.name} on ${input.element_identifier}: Overall contig MOB-recon report"/> 
     <data name="mobtyper_aggregate_report" format="tabular" from_work_dir="outdir/mobtyper_results.txt" label="${tool.name} on ${input.element_identifier}: Aggregate MOB-typer report for all contigs"/>
-    <data name="chromosome" format="fasta" from_work_dir="outdir/chromosome.fasta" label="${tool.name} on ${input.element_identifier}: Chromosomal sequences"/>
-    <collection name="plasmids" type="list" label="${tool.name} on ${input.element_identifier}: Plasmids">
+    <data name="chromosome" format="fasta" from_work_dir="outdir/chromosome.fasta" label="${tool.name} on ${input.element_identifier}: CHROMOSOMAL contigs"/>
+    <collection name="plasmids" type="list" label="${tool.name} on ${input.element_identifier}: PLASMIDS RECONSTRUCTED">
       <discover_datasets pattern="__name_and_ext__" directory="outdir/plasmids" />
     </collection>
   </outputs>
@@ -146,10 +169,6 @@
 
 For more information please visit https://github.com/phac-nml/mob-suite/. 
 
-**Workflow**
-
-This preliminary \"Mobilome and Resistome Analysis Workflow\" linking mob_recon with staramr provides reports on mobilome and resistome for a given isolate given a draft genome assembly. The workflow is located in Shared Data --> Workflows --> Mobilome and Resistome Analysis Workflow (MOB-Recon and STARAMR). The workflow file can also be manually downloaded from https://raw.githubusercontent.com/phac-nml/galaxy_tools/master/tools/mob_suite/workflows/AMRworkflow_STARAMR.ga.
-
 -----
 
 **Input:**
@@ -161,7 +180,7 @@
 
 Tab-delimited report listing information for each input contig on its cluster number, possible replicon, relaxase, and repetitive elements types, etc. Refer to https://github.com/phac-nml/mob-suite#mob-recon-contig-report-format for the description of each column.
 
-Note: Plasmid sequences will not be output if none are found. Some plasmid could be intergrated into a chromosome.
+**Note:** Plasmid sequences will not be output if none are found. Some plasmid could be intergrated into a chromosome.
 
 
   </help>