changeset 9:d64fe390f3c9 draft

"Fix test files used in Galaxy test workflow"
author althonos
date Thu, 24 Feb 2022 16:35:48 +0000
parents cebc53d02da6
children 9156eb4ee20c
files CHANGELOG.md gecco.xml test-data/BGC0001866.1_cluster_1.gbk test-data/clusters.tsv test-data/sideload.json
diffstat 5 files changed, 27 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGELOG.md	Tue Feb 22 16:04:07 2022 +0000
+++ b/CHANGELOG.md	Thu Feb 24 16:35:48 2022 +0000
@@ -5,7 +5,12 @@
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
-[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.9...master
+[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.10...master
+
+## [v0.8.10] - 2022-02-23
+[v0.8.10]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.9...v0.8.10
+### Fixed
+- `--antismash-sideload` flag of `gecco run` causing command to crash.
 
 ## [v0.8.9] - 2022-02-22
 [v0.8.9]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.8...v0.8.9
--- a/gecco.xml	Tue Feb 22 16:04:07 2022 +0000
+++ b/gecco.xml	Thu Feb 24 16:35:48 2022 +0000
@@ -1,8 +1,8 @@
 <?xml version='1.0' encoding='utf-8'?>
-<tool id="gecco" name="GECCO" version="0.8.5" python_template_version="3.5">
+<tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5">
     <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description>
     <requirements>
-        <requirement type="package" version="0.8.5">gecco</requirement>
+        <requirement type="package" version="0.8.10">gecco</requirement>
     </requirements>
     <version_command>gecco --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
@@ -18,7 +18,11 @@
         --format $input.ext
         --genome input_tempfile.$file_extension
         --postproc $postproc
+        --edge-distance $edge_distance
         --force-clusters-tsv
+        #if $mask
+            --mask
+        #end if
         #if $cds:
             --cds $cds
         #end if
@@ -38,12 +42,14 @@
     ]]></command>
     <inputs>
         <param name="input" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank, EMBL or FASTA format"/>
+        <param argument="--mask" type="boolean" checked="false" label="Enable masking of regions with unknown nucleotides when finding ORFs"/>
         <param argument="--cds" type="integer" min="0" value="" optional="true" label="Minimum number of genes required for a cluster"/>
         <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/>
         <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation">
             <option value="antismash">antiSMASH</option>
             <option value="gecco" selected="true">GECCO</option>
         </param>
+        <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/>
         <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/>
     </inputs>
     <outputs>
@@ -61,6 +67,12 @@
             <param name="input" value="BGC0001866.fna"/>
             <output name="features" file="features.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
+        </test>
+        <test>
+            <param name="input" value="BGC0001866.fna"/>
+            <param name="edge_distance" value="0"/>
+            <output name="features" file="features.tsv"/>
+            <output name="clusters" file="clusters.tsv"/>
             <output_collection name="records" type="list">
                 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
             </output_collection>
@@ -68,6 +80,7 @@
         <test>
             <param name="input" value="BGC0001866.fna"/>
             <param name="antismash_sideload" value="True"/>
+            <param name="edge_distance" value="0"/>
             <output name="features" file="features.tsv"/>
             <output name="clusters" file="clusters.tsv"/>
             <output name="sideload" file="sideload.json"/>
--- a/test-data/BGC0001866.1_cluster_1.gbk	Tue Feb 22 16:04:07 2022 +0000
+++ b/test-data/BGC0001866.1_cluster_1.gbk	Thu Feb 24 16:35:48 2022 +0000
@@ -15,7 +15,7 @@
   JOURNAL   bioRxiv (2021.05.03.442509)
   REMARK    doi:10.1101/2021.05.03.442509
 COMMENT     ##GECCO-Data-START##
-            version                :: GECCO v0.8.5
+            version                :: GECCO v0.8.10
             creation_date          :: 2021-11-21T16:33:58.470847
             biosyn_class           :: Polyketide
             alkaloid_probability   :: 0.0
@@ -23,8 +23,7 @@
             ripp_probability       :: 0.0
             saccharide_probability :: 0.0
             terpene_probability    :: 0.0
-            nrp_probability        :: 0.14
-            other_probability      :: 0.0
+            nrp_probability        :: 0.09999999999999998
             ##GECCO-Data-END##
 FEATURES             Location/Qualifiers
      CDS             complement(1..1143)
--- a/test-data/clusters.tsv	Tue Feb 22 16:04:07 2022 +0000
+++ b/test-data/clusters.tsv	Thu Feb 24 16:35:48 2022 +0000
@@ -1,2 +1,2 @@
-sequence_id	bgc_id	start	end	average_p	max_p	type	alkaloid_probability	polyketide_probability	ripp_probability	saccharide_probability	terpene_probability	nrp_probability	other_probability	proteins	domains
-BGC0001866.1	BGC0001866.1_cluster_1	347	32979	0.9969495815733557	0.9999999447224028	Polyketide	0.0	0.98	0.0	0.0	0.0	0.14	0.0	BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23	PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197
+sequence_id	bgc_id	start	end	average_p	max_p	type	alkaloid_probability	polyketide_probability	ripp_probability	saccharide_probability	terpene_probability	nrp_probability	proteins	domains
+BGC0001866.1	BGC0001866.1_cluster_1	347	32979	0.9969495815733557	0.9999999447224028	Polyketide	0.0	0.98	0.0	0.0	0.0	0.09999999999999998	BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23	PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197
--- a/test-data/sideload.json	Tue Feb 22 16:04:07 2022 +0000
+++ b/test-data/sideload.json	Thu Feb 24 16:35:48 2022 +0000
@@ -8,8 +8,7 @@
                         "alkaloid_probability": "0.000",
                         "average_p": "0.997",
                         "max_p": "1.000",
-                        "nrp_probability": "0.140",
-                        "other_probability": "0.000",
+                        "nrp_probability": "0.100",
                         "polyketide_probability": "0.980",
                         "ripp_probability": "0.000",
                         "saccharide_probability": "0.000",
@@ -31,6 +30,6 @@
         },
         "description": "Biosynthetic Gene Cluster prediction with Conditional Random Fields.",
         "name": "GECCO",
-        "version": "0.8.5"
+        "version": "0.8.10"
     }
 }
\ No newline at end of file