Mercurial > repos > althonos > gecco
changeset 9:d64fe390f3c9 draft
"Fix test files used in Galaxy test workflow"
author | althonos |
---|---|
date | Thu, 24 Feb 2022 16:35:48 +0000 |
parents | cebc53d02da6 |
children | 9156eb4ee20c |
files | CHANGELOG.md gecco.xml test-data/BGC0001866.1_cluster_1.gbk test-data/clusters.tsv test-data/sideload.json |
diffstat | 5 files changed, 27 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/CHANGELOG.md Tue Feb 22 16:04:07 2022 +0000 +++ b/CHANGELOG.md Thu Feb 24 16:35:48 2022 +0000 @@ -5,7 +5,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] -[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.9...master +[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.10...master + +## [v0.8.10] - 2022-02-23 +[v0.8.10]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.9...v0.8.10 +### Fixed +- `--antismash-sideload` flag of `gecco run` causing command to crash. ## [v0.8.9] - 2022-02-22 [v0.8.9]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.8...v0.8.9
--- a/gecco.xml Tue Feb 22 16:04:07 2022 +0000 +++ b/gecco.xml Thu Feb 24 16:35:48 2022 +0000 @@ -1,8 +1,8 @@ <?xml version='1.0' encoding='utf-8'?> -<tool id="gecco" name="GECCO" version="0.8.5" python_template_version="3.5"> +<tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5"> <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description> <requirements> - <requirement type="package" version="0.8.5">gecco</requirement> + <requirement type="package" version="0.8.10">gecco</requirement> </requirements> <version_command>gecco --version</version_command> <command detect_errors="aggressive"><![CDATA[ @@ -18,7 +18,11 @@ --format $input.ext --genome input_tempfile.$file_extension --postproc $postproc + --edge-distance $edge_distance --force-clusters-tsv + #if $mask + --mask + #end if #if $cds: --cds $cds #end if @@ -38,12 +42,14 @@ ]]></command> <inputs> <param name="input" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank, EMBL or FASTA format"/> + <param argument="--mask" type="boolean" checked="false" label="Enable masking of regions with unknown nucleotides when finding ORFs"/> <param argument="--cds" type="integer" min="0" value="" optional="true" label="Minimum number of genes required for a cluster"/> <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/> <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation"> <option value="antismash">antiSMASH</option> <option value="gecco" selected="true">GECCO</option> </param> + <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/> <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/> </inputs> <outputs> @@ -61,6 +67,12 @@ <param name="input" value="BGC0001866.fna"/> <output name="features" file="features.tsv"/> <output name="clusters" file="clusters.tsv"/> + </test> + <test> + <param name="input" value="BGC0001866.fna"/> + <param name="edge_distance" value="0"/> + <output name="features" file="features.tsv"/> + <output name="clusters" file="clusters.tsv"/> <output_collection name="records" type="list"> <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> </output_collection> @@ -68,6 +80,7 @@ <test> <param name="input" value="BGC0001866.fna"/> <param name="antismash_sideload" value="True"/> + <param name="edge_distance" value="0"/> <output name="features" file="features.tsv"/> <output name="clusters" file="clusters.tsv"/> <output name="sideload" file="sideload.json"/>
--- a/test-data/BGC0001866.1_cluster_1.gbk Tue Feb 22 16:04:07 2022 +0000 +++ b/test-data/BGC0001866.1_cluster_1.gbk Thu Feb 24 16:35:48 2022 +0000 @@ -15,7 +15,7 @@ JOURNAL bioRxiv (2021.05.03.442509) REMARK doi:10.1101/2021.05.03.442509 COMMENT ##GECCO-Data-START## - version :: GECCO v0.8.5 + version :: GECCO v0.8.10 creation_date :: 2021-11-21T16:33:58.470847 biosyn_class :: Polyketide alkaloid_probability :: 0.0 @@ -23,8 +23,7 @@ ripp_probability :: 0.0 saccharide_probability :: 0.0 terpene_probability :: 0.0 - nrp_probability :: 0.14 - other_probability :: 0.0 + nrp_probability :: 0.09999999999999998 ##GECCO-Data-END## FEATURES Location/Qualifiers CDS complement(1..1143)
--- a/test-data/clusters.tsv Tue Feb 22 16:04:07 2022 +0000 +++ b/test-data/clusters.tsv Thu Feb 24 16:35:48 2022 +0000 @@ -1,2 +1,2 @@ -sequence_id bgc_id start end average_p max_p type alkaloid_probability polyketide_probability ripp_probability saccharide_probability terpene_probability nrp_probability other_probability proteins domains -BGC0001866.1 BGC0001866.1_cluster_1 347 32979 0.9969495815733557 0.9999999447224028 Polyketide 0.0 0.98 0.0 0.0 0.0 0.14 0.0 BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23 PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197 +sequence_id bgc_id start end average_p max_p type alkaloid_probability polyketide_probability ripp_probability saccharide_probability terpene_probability nrp_probability proteins domains +BGC0001866.1 BGC0001866.1_cluster_1 347 32979 0.9969495815733557 0.9999999447224028 Polyketide 0.0 0.98 0.0 0.0 0.0 0.09999999999999998 BGC0001866.1_1;BGC0001866.1_2;BGC0001866.1_3;BGC0001866.1_4;BGC0001866.1_5;BGC0001866.1_6;BGC0001866.1_7;BGC0001866.1_8;BGC0001866.1_9;BGC0001866.1_10;BGC0001866.1_11;BGC0001866.1_12;BGC0001866.1_13;BGC0001866.1_14;BGC0001866.1_15;BGC0001866.1_16;BGC0001866.1_17;BGC0001866.1_18;BGC0001866.1_19;BGC0001866.1_20;BGC0001866.1_21;BGC0001866.1_22;BGC0001866.1_23 PF00106;PF00107;PF00109;PF00135;PF00394;PF00550;PF00698;PF00743;PF00891;PF00975;PF02801;PF06609;PF07690;PF07731;PF08241;PF08242;PF08493;PF08659;PF13434;PF13489;PF13649;PF13847;PF14765;PF16073;PF16197
--- a/test-data/sideload.json Tue Feb 22 16:04:07 2022 +0000 +++ b/test-data/sideload.json Thu Feb 24 16:35:48 2022 +0000 @@ -8,8 +8,7 @@ "alkaloid_probability": "0.000", "average_p": "0.997", "max_p": "1.000", - "nrp_probability": "0.140", - "other_probability": "0.000", + "nrp_probability": "0.100", "polyketide_probability": "0.980", "ripp_probability": "0.000", "saccharide_probability": "0.000", @@ -31,6 +30,6 @@ }, "description": "Biosynthetic Gene Cluster prediction with Conditional Random Fields.", "name": "GECCO", - "version": "0.8.5" + "version": "0.8.10" } } \ No newline at end of file