Mercurial > repos > althonos > gecco
annotate gecco.xml @ 21:6ba37b7dea42 draft
Release v0.9.7
author | althonos |
---|---|
date | Fri, 26 May 2023 12:10:43 +0000 |
parents | 64b724dd8d04 |
children |
rev | line source |
---|---|
0 | 1 <?xml version='1.0' encoding='utf-8'?> |
19 | 2 <tool id="gecco" name="GECCO" version="0.9.6" python_template_version="3.5"> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
3 <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description> |
19 | 4 <creator> |
5 <organization name="Zeller Team" url="https://www.embl.org/groups/zeller/"/> | |
6 </creator> | |
7 <edam_topics> | |
8 <edam_topic>topic_0080</edam_topic> | |
9 </edam_topics> | |
10 <edam_operations> | |
11 <edam_operation>operation_0415</edam_operation> | |
12 </edam_operations> | |
0 | 13 <requirements> |
19 | 14 <requirement type="package" version="0.9.6">gecco</requirement> |
0 | 15 </requirements> |
16 <version_command>gecco --version</version_command> | |
17 <command detect_errors="aggressive"><![CDATA[ | |
18 | |
19 #if str($input.ext) == 'genbank': | |
20 #set $file_extension = 'gbk' | |
21 #else: | |
22 #set $file_extension = $input.ext | |
23 #end if | |
24 ln -s '$input' input_tempfile.$file_extension && | |
25 | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
26 gecco -vv run |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
27 --format $input.ext |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
28 --genome input_tempfile.$file_extension |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
29 --postproc $postproc |
14 | 30 --force-tsv |
20
64b724dd8d04
Regenerate Galaxy test files with Pyrodigal `v2.1.0`
althonos
parents:
19
diff
changeset
|
31 --jobs "\${GALAXY_SLOTS:-4}" |
14 | 32 #if $edge_distance |
33 --edge-distance $edge_distance | |
34 #end if | |
9 | 35 #if $mask |
36 --mask | |
37 #end if | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
38 #if $cds: |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
39 --cds $cds |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
40 #end if |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
41 #if $threshold: |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
42 --threshold $threshold |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
43 #end if |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
44 #if $antismash_sideload: |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
45 --antismash-sideload |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
46 #end if |
19 | 47 #unless $pad: |
48 --no-pad | |
49 #end unless | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
50 |
14 | 51 && mv input_tempfile.genes.tsv '$genes' |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
52 && mv input_tempfile.features.tsv '$features' |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
53 && mv input_tempfile.clusters.tsv '$clusters' |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
54 #if $antismash_sideload |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
55 && mv input_tempfile.sideload.json '$sideload' |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
56 #end if |
0 | 57 |
58 ]]></command> | |
59 <inputs> | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
60 <param name="input" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank, EMBL or FASTA format"/> |
9 | 61 <param argument="--mask" type="boolean" checked="false" label="Enable masking of regions with unknown nucleotides when finding ORFs"/> |
19 | 62 <param argument="--pad" type="boolean" checked="true" label="Enable padding of gene sequences smaller than the CRF window length"/> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
63 <param argument="--cds" type="integer" min="0" value="" optional="true" label="Minimum number of genes required for a cluster"/> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
64 <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
65 <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation"> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
66 <option value="antismash">antiSMASH</option> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
67 <option value="gecco" selected="true">GECCO</option> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
68 </param> |
14 | 69 <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
70 <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/> |
0 | 71 </inputs> |
72 <outputs> | |
73 <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)"> | |
74 <discover_datasets pattern="(?P<designation>.*)\.gbk" ext="genbank" visible="false" /> | |
75 </collection> | |
14 | 76 <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/> |
0 | 77 <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/> |
78 <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/> | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
79 <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)"> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
80 <filter>antismash_sideload</filter> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
81 </data> |
0 | 82 </outputs> |
83 <tests> | |
84 <test> | |
85 <param name="input" value="BGC0001866.fna"/> | |
86 <output name="features" file="features.tsv"/> | |
14 | 87 <output name="genes" file="genes.tsv"/> |
0 | 88 <output name="clusters" file="clusters.tsv"/> |
19 | 89 <param name="edge_distance" value="10"/> |
9 | 90 </test> |
91 <test> | |
92 <param name="input" value="BGC0001866.fna"/> | |
93 <output name="features" file="features.tsv"/> | |
14 | 94 <output name="genes" file="genes.tsv"/> |
9 | 95 <output name="clusters" file="clusters.tsv"/> |
0 | 96 <output_collection name="records" type="list"> |
4
88dc16b4f583
"Fix number of allowed different lines in `galaxy/gecco.xml` tests"
althonos
parents:
3
diff
changeset
|
97 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> |
0 | 98 </output_collection> |
99 </test> | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
100 <test> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
101 <param name="input" value="BGC0001866.fna"/> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
102 <param name="antismash_sideload" value="True"/> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
103 <output name="features" file="features.tsv"/> |
14 | 104 <output name="genes" file="genes.tsv"/> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
105 <output name="clusters" file="clusters.tsv"/> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
106 <output name="sideload" file="sideload.json"/> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
107 <output_collection name="records" type="list"> |
4
88dc16b4f583
"Fix number of allowed different lines in `galaxy/gecco.xml` tests"
althonos
parents:
3
diff
changeset
|
108 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
109 </output_collection> |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
110 </test> |
0 | 111 </tests> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
112 <help><![CDATA[ |
0 | 113 |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
114 Overview |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
115 -------- |
0 | 116 |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
117 GECCO (Gene Cluster prediction with Conditional Random Fields) is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs). |
0 | 118 It is developed in the Zeller group and is part of the suite of computational microbiome analysis tools hosted at EMBL. |
119 | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
120 Input |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
121 ----- |
0 | 122 |
123 GECCO works with DNA sequences, and loads them using Biopython, allowing it to support a large variety of formats, including the common FASTA and GenBank files. | |
124 | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
125 Output |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
126 ------ |
0 | 127 |
128 GECCO will create the following files once done (using the same prefix as the input file): | |
129 | |
14 | 130 - ``features.tsv``: The genes file, containing the genes identified in the input sequences. |
131 - ``features.tsv``: The features file, containing the protein domains identified in the input sequences. | |
132 - ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type. | |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
133 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains. |
0 | 134 |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
135 Contact |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
136 ------- |
0 | 137 |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
138 If you have any question about GECCO, if you run into any issue, or if you would like to make a feature request, please create an issue in the |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
139 `GitHub repository <https://github.com/zellerlab/gecco>`_. You can also directly contact `Martin Larralde via email <mailto:martin.larralde@embl.de>`_. |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
140 If you want to contribute to GECCO, please have a look at the contribution guide first, and feel free to open a pull request on the GitHub repository. |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
141 |
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
142 ]]></help> |
0 | 143 <citations> |
3
359232b58f6a
"Update Galaxy tool wrapper to follow the IUC best practices"
althonos
parents:
0
diff
changeset
|
144 <citation type="doi">10.1101/2021.05.03.442509</citation> |
0 | 145 </citations> |
146 </tool> |