comparison gecco.xml @ 14:56b924f62165 draft

"Update tests files for Galaxy tool wrapper"
author althonos
date Tue, 05 Apr 2022 23:18:49 +0000
parents d64fe390f3c9
children cc91d730cc4f
comparison
equal deleted inserted replaced
13:fde43648cba0 14:56b924f62165
1 <?xml version='1.0' encoding='utf-8'?> 1 <?xml version='1.0' encoding='utf-8'?>
2 <tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5"> 2 <tool id="gecco" name="GECCO" version="0.9.1" python_template_version="3.5">
3 <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description> 3 <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description>
4 <requirements> 4 <requirements>
5 <requirement type="package" version="0.8.10">gecco</requirement> 5 <requirement type="package" version="0.9.1">gecco</requirement>
6 </requirements> 6 </requirements>
7 <version_command>gecco --version</version_command> 7 <version_command>gecco --version</version_command>
8 <command detect_errors="aggressive"><![CDATA[ 8 <command detect_errors="aggressive"><![CDATA[
9 9
10 #if str($input.ext) == 'genbank': 10 #if str($input.ext) == 'genbank':
16 16
17 gecco -vv run 17 gecco -vv run
18 --format $input.ext 18 --format $input.ext
19 --genome input_tempfile.$file_extension 19 --genome input_tempfile.$file_extension
20 --postproc $postproc 20 --postproc $postproc
21 --edge-distance $edge_distance 21 --force-tsv
22 --force-clusters-tsv 22 #if $edge_distance
23 --edge-distance $edge_distance
24 #end if
23 #if $mask 25 #if $mask
24 --mask 26 --mask
25 #end if 27 #end if
26 #if $cds: 28 #if $cds:
27 --cds $cds 29 --cds $cds
31 #end if 33 #end if
32 #if $antismash_sideload: 34 #if $antismash_sideload:
33 --antismash-sideload 35 --antismash-sideload
34 #end if 36 #end if
35 37
38 && mv input_tempfile.genes.tsv '$genes'
36 && mv input_tempfile.features.tsv '$features' 39 && mv input_tempfile.features.tsv '$features'
37 && mv input_tempfile.clusters.tsv '$clusters' 40 && mv input_tempfile.clusters.tsv '$clusters'
38 #if $antismash_sideload 41 #if $antismash_sideload
39 && mv input_tempfile.sideload.json '$sideload' 42 && mv input_tempfile.sideload.json '$sideload'
40 #end if 43 #end if
47 <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/> 50 <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/>
48 <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation"> 51 <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation">
49 <option value="antismash">antiSMASH</option> 52 <option value="antismash">antiSMASH</option>
50 <option value="gecco" selected="true">GECCO</option> 53 <option value="gecco" selected="true">GECCO</option>
51 </param> 54 </param>
52 <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/> 55 <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/>
53 <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/> 56 <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/>
54 </inputs> 57 </inputs>
55 <outputs> 58 <outputs>
56 <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)"> 59 <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)">
57 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" ext="genbank" visible="false" /> 60 <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" ext="genbank" visible="false" />
58 </collection> 61 </collection>
62 <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/>
59 <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/> 63 <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/>
60 <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/> 64 <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/>
61 <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)"> 65 <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)">
62 <filter>antismash_sideload</filter> 66 <filter>antismash_sideload</filter>
63 </data> 67 </data>
64 </outputs> 68 </outputs>
65 <tests> 69 <tests>
66 <test> 70 <test>
67 <param name="input" value="BGC0001866.fna"/> 71 <param name="input" value="BGC0001866.fna"/>
68 <output name="features" file="features.tsv"/> 72 <output name="features" file="features.tsv"/>
73 <output name="genes" file="genes.tsv"/>
69 <output name="clusters" file="clusters.tsv"/> 74 <output name="clusters" file="clusters.tsv"/>
70 </test> 75 </test>
71 <test> 76 <test>
72 <param name="input" value="BGC0001866.fna"/> 77 <param name="input" value="BGC0001866.fna"/>
73 <param name="edge_distance" value="0"/> 78 <param name="edge_distance" value="0"/>
74 <output name="features" file="features.tsv"/> 79 <output name="features" file="features.tsv"/>
80 <output name="genes" file="genes.tsv"/>
75 <output name="clusters" file="clusters.tsv"/> 81 <output name="clusters" file="clusters.tsv"/>
76 <output_collection name="records" type="list"> 82 <output_collection name="records" type="list">
77 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> 83 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
78 </output_collection> 84 </output_collection>
79 </test> 85 </test>
80 <test> 86 <test>
81 <param name="input" value="BGC0001866.fna"/> 87 <param name="input" value="BGC0001866.fna"/>
82 <param name="antismash_sideload" value="True"/> 88 <param name="antismash_sideload" value="True"/>
83 <param name="edge_distance" value="0"/> 89 <param name="edge_distance" value="0"/>
84 <output name="features" file="features.tsv"/> 90 <output name="features" file="features.tsv"/>
91 <output name="genes" file="genes.tsv"/>
85 <output name="clusters" file="clusters.tsv"/> 92 <output name="clusters" file="clusters.tsv"/>
86 <output name="sideload" file="sideload.json"/> 93 <output name="sideload" file="sideload.json"/>
87 <output_collection name="records" type="list"> 94 <output_collection name="records" type="list">
88 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> 95 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
89 </output_collection> 96 </output_collection>
105 Output 112 Output
106 ------ 113 ------
107 114
108 GECCO will create the following files once done (using the same prefix as the input file): 115 GECCO will create the following files once done (using the same prefix as the input file):
109 116
110 - ``features.tsv``: The features file, containing the identified proteins and domains in the input sequences. 117 - ``features.tsv``: The genes file, containing the genes identified in the input sequences.
111 - ``clusters.tsv``: If any were found, a clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type. 118 - ``features.tsv``: The features file, containing the protein domains identified in the input sequences.
119 - ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type.
112 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains. 120 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains.
113 121
114 Contact 122 Contact
115 ------- 123 -------
116 124