diff mob_recon.xml @ 7:822575bf359f draft

"planemo upload for repository https://github.com/phac-nml/mob-suite commit 814769a7d86fa4ca552e93f8d707412e9ba23b1b"
author nml
date Fri, 05 Jun 2020 13:02:15 -0400
parents 9424de64bfa8
children 53f50adcb78e
line wrap: on
line diff
--- a/mob_recon.xml	Wed Dec 11 19:17:11 2019 -0500
+++ b/mob_recon.xml	Fri Jun 05 13:02:15 2020 -0400
@@ -1,7 +1,7 @@
-<tool id="mob_recon" name="MOB-Recon" version="2.0.5+galaxy0">
+<tool id="mob_recon" name="MOB-Recon" version="3.0.0">
   <description>Type contigs and extract plasmid sequences</description>
   <requirements>
-     <requirement type="package" version="2.0.5">mob_suite</requirement>
+     <requirement type="package" version="3.0.0">mob_suite</requirement>
   </requirements>
   <version_command>mob_recon --version</version_command>
   <command detect_errors="exit_code">
@@ -12,17 +12,51 @@
     #set $named_input = re.sub(r'(\s|\(|\)|:|!)', '_', str($input.element_identifier)+'.fasta')
     ln -s '$input' '$named_input' &&
     
-    mob_recon  --num_threads \${GALAXY_SLOTS:-4}  --infile '${named_input}'
-    #if str($adv_param.unicycler_contigs) == "True":
+
+    mob_recon  --num_threads \${GALAXY_SLOTS:-4}  --infile '${named_input}' --run_typer
+
+    #if $adv_param.unicycler_contigs:
       --unicycler_contigs 
     #end if   
-    #if str($adv_param.run_circlator) == "True":
-      --run_circlator 
+
+    #if $adv_param.run_overhang:
+      --run_overhang
     #end if 
-    #if str($adv_param.min_length_condition.min_length_param) == "True":
-      --min_length '${adv_param.min_length_condition.min_length_value}'
-    #end if 
-    --run_typer --min_rep_evalue '${adv_param.min_rep_evalue}'
+    
+    #if $adv_param.debug:
+      --debug
+    #end if
+
+    #if $adv_param.plasmid_db
+      --plasmid_db '$adv_param.plasmid_db'
+    #end if
+
+    #if $adv_param.plasmid_mash_db
+      --plasmid_mash_db '$adv_param.plasmid_mash_db'
+    #end if
+
+    #if $adv_param.plasmid_meta
+      --plasmid_meta '$adv_param.plasmid_meta'
+    #end if
+
+    #if $adv_param.repetitive_mask
+      --repetitive_mask '$adv_param.repetitive_mask'
+    #end if
+
+   #if $adv_param.plasmid_mob
+   --plasmid_mob '$adv_param.plasmid_mob'
+   #end if
+   
+   #if $adv_param.plasmid_mpf
+   --plasmid_mpf '$adv_param.plasmid_mpf'
+   #end if
+
+   #if $adv_param.plasmid_orit
+   --plasmid_orit '$adv_param.plasmid_orit'
+   #end if
+
+    --min_length '${adv_param.min_length}'  
+    --min_rep_evalue '${adv_param.min_rep_evalue}'
     --min_rep_evalue '${adv_param.min_rep_evalue}'
     --min_mob_evalue '${adv_param.min_mob_evalue}'
     --min_con_evalue '${adv_param.min_con_evalue}'
@@ -30,6 +64,11 @@
     --min_mob_ident '${adv_param.min_mob_ident}'
     --min_con_ident  '${adv_param.min_con_ident}'
     --min_rpp_ident   '${adv_param.min_rpp_ident}'
+
+    --min_rep_cov '${adv_param.min_rep_cov}'
+    --min_mob_cov '${adv_param.min_mob_cov}'
+    --min_con_cov '${adv_param.min_con_cov}'
+    --min_rpp_cov '${adv_param.min_rpp_cov}'
     --outdir 'outdir' &&
     mkdir ./outdir/plasmids && (mv outdir/plasmid*.fasta ./outdir/plasmids 2> /dev/null || true)
   ]]>  
@@ -37,38 +76,38 @@
   <inputs>
     <param name="input" type="data" format="fasta" label="Input" help="FASTA file with contig(s)"/>
     <section name="adv_param" title="Advanced parameters" expanded="False">
-      <param name="unicycler_contigs" label="Check for circularity flag generated by unicycler in contigs fasta headers" type="select" value="True">
-        <option value="True">Yes</option>
-        <option value="False">No</option>
-      </param>
-      <param name="run_circlator" label="Run circlator minums2 pipeline to check for circular contigs" type="select" value="True">
-        <option value="True">Yes</option>
-        <option value="False">No</option>
-      </param>
-      <conditional name="min_length_condition">
-        <param name="min_length_param" label="Minimum length of contigs to process" type="select" value="False">
-          <option value="False">No</option>
-          <option value="True">Yes</option>
-       </param>   
-        <when value="True">
-          <param name="min_length_value" type="integer" value="500" min="50"/> 
-        </when>  
-        <when value="False"/>
-      </conditional> 
+      <param name="unicycler_contigs" type="boolean" truevalue="true" falsevalue="" checked="true" label="Check for circularity flag generated by unicycler in contigs fasta headers?"/>
+      <param name="run_overhang" type="boolean" truevalue="true" falsevalue="" checked="true" label="Detect circular contigs (i.e. potential plasmids) with assembly overhangs?"/>  
+      <param name="debug" type="boolean" truevalue="true" falsevalue="" checked="false" label="Provide debug information?"/>
+      
       <param name="min_rep_evalue" label="Minimum evalue threshold for replicon blastn" type="float"  min="0.00001" max="1" value="0.00001"/>
       <param name="min_mob_evalue" label="Minimum evalue threshold for relaxase tblastn" type="float"  min="0.00001" max="1" value="0.00001"/>
       <param name="min_con_evalue" label="Minimum evalue threshold for contig blastn" type="float"  min="0.00001" max="1" value="0.00001"/>
       <param name="min_rpp_evalue" label="Minimum evalue threshold for repetitve elements blastn" type="float"  min="0.00001" max="1" value="0.00001"/>
+      <param name="min_length" label="Minimum length of contigs to classify" type="integer" value="1000"/>
       <param name="min_rep_ident" label="Minimum sequence identity for replicons" type="integer"  min="0" max="100" value="80"/>
       <param name="min_mob_ident" label="Minimum sequence identity for relaxases" type="integer"  min="0" max="100" value="80"/>
       <param name="min_con_ident" label="Minimum sequence identity for contigs" type="integer"  min="0" max="100" value="80"/>
       <param name="min_rpp_ident" label="Minimum sequence identity for repetitive elements" type="integer"  min="0" max="100" value="80"/>
+
+      <param name="min_rep_cov" label="Minimum percentage coverage of replicon query by input assembly" type="integer"  min="0" max="100" value="80"/>
+      <param name="min_mob_cov" label="Minimum percentage coverage of relaxase query by input assembly" type="integer"  min="0" max="100" value="80"/>
+      <param name="min_con_cov" label="Minimum percentage coverage of assembly contig by the plasmid reference database to be considered" type="integer"  min="0" max="100" value="60"/>
+      <param name="min_rpp_cov" label="Minimum percentage coverage of contigs by repetitive elements" type="integer"  min="0" max="100" value="80"/>
+
+      <param name="plasmid_db" optional="true" type="data" format="fasta" label="Reference Database of complete plasmids" help=""/>
+      <param name="plasmid_mash_db" optional="true" type="data" format="binary" label="Custom MASH database of plasmids" help="MASH sketch of the reference plasmids database"/>
+      <param name="plasmid_meta" type="data" optional="true" format="text" label="Plasmid cluster metadata file" help=""/>
+      <param name="plasmid_replicons" type="data" optional="true" format="fasta" label="FASTA file with plasmid replicons" help=""/>
+      <param name="repetitive_mask" type="data" optional="true" format="fasta" label="FASTA of known repetitive elements" help=""/>
+      <param name="plasmid_mob" type="data" optional="true" format="fasta" label="FASTA of plasmid relaxases" help=""/>
+      <param name="plasmid_mpf" type="data" optional="true" format="fasta" label="FASTA of known plasmid mate-pair proteins" help=""/>
+      <param name="plasmid_orit" type="data" optional="true" format="fasta" label="FASTA of known plasmid oriT dna sequences" help=""/>
     </section>  
   </inputs>
   <outputs>
     <data name="contig_report" format="tabular" from_work_dir="outdir/contig_report.txt" label="${tool.name} on ${input.element_identifier}: Overall contig MOB-recon report"/> 
-    <data name="repetitive_blast_report" format="tabular" from_work_dir="outdir/repetitive_blast_report.txt" label="${tool.name} on ${input.element_identifier}: Repetitive elements BLAST report"/>
-    <data name="mobtyper_aggregate_report" format="tabular" from_work_dir="outdir/mobtyper_aggregate_report.txt" label="${tool.name} on ${input.element_identifier}: Aggregate MOB-typer report for all contigs"/>
+    <data name="mobtyper_aggregate_report" format="tabular" from_work_dir="outdir/mobtyper_results.txt" label="${tool.name} on ${input.element_identifier}: Aggregate MOB-typer report for all contigs"/>
     <data name="chromosome" format="fasta" from_work_dir="outdir/chromosome.fasta" label="${tool.name} on ${input.element_identifier}: Chromosomal sequences"/>
     <collection name="plasmids" type="list" label="${tool.name} on ${input.element_identifier}: Plasmids">
       <discover_datasets pattern="__name_and_ext__" directory="outdir/plasmids" />
@@ -76,14 +115,24 @@
   </outputs>
   <tests>
     <test>
-      <param name="input" value="plasmid_476.fasta" ftype="fasta"/>
+      <param name="input" value="Ecoli_strain_KV7_complete_LT795502.fasta" ftype="fasta"/>
       <section name="adv_param">
         <param name="unicycler_contigs" value="True"/>
-        <param name="run_circlator"  value="True"/>
+        <param name="run_overhang" value="True"/>
       </section>
       <output name="contig_report">
         <assert_contents>
-          <has_text text="NC_019097"/>
+          <has_text text="chromosome"/>
+          <has_text text="plasmid"/>
+          <has_text text="IncHI1A"/>
+          <has_text text="IncN"/>
+        </assert_contents>
+      </output>
+      <output name="mobtyper_aggregate_report">
+        <assert_contents>
+          <has_text text="conjugative"/>
+          <has_text text="Gammaproteobacteria"/>
+          <has_text text="223020"/>
         </assert_contents>
       </output>
     </test>