annotate gecco.xml @ 21:6ba37b7dea42 draft

Release v0.9.7
author althonos
date Fri, 26 May 2023 12:10:43 +0000
parents 64b724dd8d04
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
1 <?xml version='1.0' encoding='utf-8'?>
19
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
2 <tool id="gecco" name="GECCO" version="0.9.6" python_template_version="3.5">
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
3 <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description>
19
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
4 <creator>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
5 <organization name="Zeller Team" url="https://www.embl.org/groups/zeller/"/>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
6 </creator>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
7 <edam_topics>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
8 <edam_topic>topic_0080</edam_topic>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
9 </edam_topics>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
10 <edam_operations>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
11 <edam_operation>operation_0415</edam_operation>
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
12 </edam_operations>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
13 <requirements>
19
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
14 <requirement type="package" version="0.9.6">gecco</requirement>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
15 </requirements>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
16 <version_command>gecco --version</version_command>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
17 <command detect_errors="aggressive"><![CDATA[
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
18
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
19 #if str($input.ext) == 'genbank':
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
20 #set $file_extension = 'gbk'
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
21 #else:
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
22 #set $file_extension = $input.ext
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
23 #end if
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
24 ln -s '$input' input_tempfile.$file_extension &&
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
25
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
26 gecco -vv run
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
27 --format $input.ext
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
28 --genome input_tempfile.$file_extension
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
29 --postproc $postproc
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
30 --force-tsv
20
64b724dd8d04 Regenerate Galaxy test files with Pyrodigal `v2.1.0`
althonos
parents: 19
diff changeset
31 --jobs "\${GALAXY_SLOTS:-4}"
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
32 #if $edge_distance
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
33 --edge-distance $edge_distance
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
34 #end if
9
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
35 #if $mask
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
36 --mask
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
37 #end if
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
38 #if $cds:
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
39 --cds $cds
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
40 #end if
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
41 #if $threshold:
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
42 --threshold $threshold
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
43 #end if
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
44 #if $antismash_sideload:
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
45 --antismash-sideload
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
46 #end if
19
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
47 #unless $pad:
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
48 --no-pad
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
49 #end unless
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
50
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
51 && mv input_tempfile.genes.tsv '$genes'
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
52 && mv input_tempfile.features.tsv '$features'
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
53 && mv input_tempfile.clusters.tsv '$clusters'
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
54 #if $antismash_sideload
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
55 && mv input_tempfile.sideload.json '$sideload'
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
56 #end if
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
57
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
58 ]]></command>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
59 <inputs>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
60 <param name="input" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank, EMBL or FASTA format"/>
9
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
61 <param argument="--mask" type="boolean" checked="false" label="Enable masking of regions with unknown nucleotides when finding ORFs"/>
19
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
62 <param argument="--pad" type="boolean" checked="true" label="Enable padding of gene sequences smaller than the CRF window length"/>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
63 <param argument="--cds" type="integer" min="0" value="" optional="true" label="Minimum number of genes required for a cluster"/>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
64 <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
65 <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation">
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
66 <option value="antismash">antiSMASH</option>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
67 <option value="gecco" selected="true">GECCO</option>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
68 </param>
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
69 <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
70 <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
71 </inputs>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
72 <outputs>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
73 <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)">
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
74 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" ext="genbank" visible="false" />
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
75 </collection>
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
76 <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
77 <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
78 <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
79 <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)">
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
80 <filter>antismash_sideload</filter>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
81 </data>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
82 </outputs>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
83 <tests>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
84 <test>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
85 <param name="input" value="BGC0001866.fna"/>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
86 <output name="features" file="features.tsv"/>
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
87 <output name="genes" file="genes.tsv"/>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
88 <output name="clusters" file="clusters.tsv"/>
19
cc91d730cc4f Fix syntax of Galaxy script for GECCO
althonos
parents: 14
diff changeset
89 <param name="edge_distance" value="10"/>
9
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
90 </test>
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
91 <test>
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
92 <param name="input" value="BGC0001866.fna"/>
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
93 <output name="features" file="features.tsv"/>
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
94 <output name="genes" file="genes.tsv"/>
9
d64fe390f3c9 "Fix test files used in Galaxy test workflow"
althonos
parents: 4
diff changeset
95 <output name="clusters" file="clusters.tsv"/>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
96 <output_collection name="records" type="list">
4
88dc16b4f583 "Fix number of allowed different lines in `galaxy/gecco.xml` tests"
althonos
parents: 3
diff changeset
97 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
98 </output_collection>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
99 </test>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
100 <test>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
101 <param name="input" value="BGC0001866.fna"/>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
102 <param name="antismash_sideload" value="True"/>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
103 <output name="features" file="features.tsv"/>
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
104 <output name="genes" file="genes.tsv"/>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
105 <output name="clusters" file="clusters.tsv"/>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
106 <output name="sideload" file="sideload.json"/>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
107 <output_collection name="records" type="list">
4
88dc16b4f583 "Fix number of allowed different lines in `galaxy/gecco.xml` tests"
althonos
parents: 3
diff changeset
108 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
109 </output_collection>
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
110 </test>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
111 </tests>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
112 <help><![CDATA[
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
113
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
114 Overview
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
115 --------
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
116
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
117 GECCO (Gene Cluster prediction with Conditional Random Fields) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
118 It is developed in the Zeller group and is part of the suite of computational microbiome analysis tools hosted at EMBL.
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
119
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
120 Input
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
121 -----
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
122
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
123 GECCO works with DNA sequences, and loads them using Biopython, allowing it to support a large variety of formats, including the common FASTA and GenBank files.
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
124
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
125 Output
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
126 ------
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
127
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
128 GECCO will create the following files once done (using the same prefix as the input file):
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
129
14
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
130 - ``features.tsv``: The genes file, containing the genes identified in the input sequences.
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
131 - ``features.tsv``: The features file, containing the protein domains identified in the input sequences.
56b924f62165 "Update tests files for Galaxy tool wrapper"
althonos
parents: 9
diff changeset
132 - ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type.
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
133 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains.
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
134
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
135 Contact
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
136 -------
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
137
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
138 If you have any question about GECCO, if you run into any issue, or if you would like to make a feature request, please create an issue in the
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
139 `GitHub repository <https://github.com/zellerlab/gecco>`_. You can also directly contact `Martin Larralde via email <mailto:martin.larralde@embl.de>`_.
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
140 If you want to contribute to GECCO, please have a look at the contribution guide first, and feel free to open a pull request on the GitHub repository.
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
141
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
142 ]]></help>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
143 <citations>
3
359232b58f6a "Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents: 0
diff changeset
144 <citation type="doi">10.1101/2021.05.03.442509</citation>
0
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
145 </citations>
1625927fc16f "Release v0.8.4"
althonos
parents:
diff changeset
146 </tool>