diff gecco.xml @ 14:56b924f62165 draft

"Update tests files for Galaxy tool wrapper"
author althonos
date Tue, 05 Apr 2022 23:18:49 +0000
parents d64fe390f3c9
children cc91d730cc4f
line wrap: on
line diff
--- a/gecco.xml	Thu Mar 31 18:00:15 2022 +0000
+++ b/gecco.xml	Tue Apr 05 23:18:49 2022 +0000
@@ -1,8 +1,8 @@
 <?xml version='1.0' encoding='utf-8'?>
-<tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5">
+<tool id="gecco" name="GECCO" version="0.9.1" python_template_version="3.5">
     <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description>
     <requirements>
-        <requirement type="package" version="0.8.10">gecco</requirement>
+        <requirement type="package" version="0.9.1">gecco</requirement>
     </requirements>
     <version_command>gecco --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
@@ -18,8 +18,10 @@
         --format $input.ext
         --genome input_tempfile.$file_extension
         --postproc $postproc
-        --edge-distance $edge_distance
-        --force-clusters-tsv
+        --force-tsv
+        #if $edge_distance
+            --edge-distance $edge_distance
+        #end if
         #if $mask
             --mask
         #end if
@@ -33,6 +35,7 @@
             --antismash-sideload
         #end if
 
+        && mv input_tempfile.genes.tsv '$genes'
         && mv input_tempfile.features.tsv '$features'
         && mv input_tempfile.clusters.tsv '$clusters'
         #if $antismash_sideload
@@ -49,13 +52,14 @@
             <option value="antismash">antiSMASH</option>
             <option value="gecco" selected="true">GECCO</option>
         </param>
-        <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/>
+        <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/>
         <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/>
     </inputs>
     <outputs>
         <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)">
             <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" ext="genbank" visible="false" />
         </collection>
+        <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/>
         <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/>
         <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/>
         <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)">
@@ -66,12 +70,14 @@
         <test>
             <param name="input" value="BGC0001866.fna"/>
             <output name="features" file="features.tsv"/>
+            <output name="genes" file="genes.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
         </test>
         <test>
             <param name="input" value="BGC0001866.fna"/>
             <param name="edge_distance" value="0"/>
             <output name="features" file="features.tsv"/>
+            <output name="genes" file="genes.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
             <output_collection name="records" type="list">
                 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
@@ -82,6 +88,7 @@
             <param name="antismash_sideload" value="True"/>
             <param name="edge_distance" value="0"/>
             <output name="features" file="features.tsv"/>
+            <output name="genes" file="genes.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
             <output name="sideload" file="sideload.json"/>
             <output_collection name="records" type="list">
@@ -107,8 +114,9 @@
 
 GECCO will create the following files once done (using the same prefix as the input file):
 
-- ``features.tsv``: The features file, containing the identified proteins and domains in the input sequences.
-- ``clusters.tsv``: If any were found, a clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type.
+- ``features.tsv``: The genes file, containing the genes identified in the input sequences.
+- ``features.tsv``: The features file, containing the protein domains identified in the input sequences.
+- ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type.
 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains.
 
 Contact