Mercurial > repos > althonos > gecco
comparison gecco.xml @ 14:56b924f62165 draft
"Update tests files for Galaxy tool wrapper"
author | althonos |
---|---|
date | Tue, 05 Apr 2022 23:18:49 +0000 |
parents | d64fe390f3c9 |
children | cc91d730cc4f |
comparison
equal
deleted
inserted
replaced
13:fde43648cba0 | 14:56b924f62165 |
---|---|
1 <?xml version='1.0' encoding='utf-8'?> | 1 <?xml version='1.0' encoding='utf-8'?> |
2 <tool id="gecco" name="GECCO" version="0.8.10" python_template_version="3.5"> | 2 <tool id="gecco" name="GECCO" version="0.9.1" python_template_version="3.5"> |
3 <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description> | 3 <description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description> |
4 <requirements> | 4 <requirements> |
5 <requirement type="package" version="0.8.10">gecco</requirement> | 5 <requirement type="package" version="0.9.1">gecco</requirement> |
6 </requirements> | 6 </requirements> |
7 <version_command>gecco --version</version_command> | 7 <version_command>gecco --version</version_command> |
8 <command detect_errors="aggressive"><![CDATA[ | 8 <command detect_errors="aggressive"><![CDATA[ |
9 | 9 |
10 #if str($input.ext) == 'genbank': | 10 #if str($input.ext) == 'genbank': |
16 | 16 |
17 gecco -vv run | 17 gecco -vv run |
18 --format $input.ext | 18 --format $input.ext |
19 --genome input_tempfile.$file_extension | 19 --genome input_tempfile.$file_extension |
20 --postproc $postproc | 20 --postproc $postproc |
21 --edge-distance $edge_distance | 21 --force-tsv |
22 --force-clusters-tsv | 22 #if $edge_distance |
23 --edge-distance $edge_distance | |
24 #end if | |
23 #if $mask | 25 #if $mask |
24 --mask | 26 --mask |
25 #end if | 27 #end if |
26 #if $cds: | 28 #if $cds: |
27 --cds $cds | 29 --cds $cds |
31 #end if | 33 #end if |
32 #if $antismash_sideload: | 34 #if $antismash_sideload: |
33 --antismash-sideload | 35 --antismash-sideload |
34 #end if | 36 #end if |
35 | 37 |
38 && mv input_tempfile.genes.tsv '$genes' | |
36 && mv input_tempfile.features.tsv '$features' | 39 && mv input_tempfile.features.tsv '$features' |
37 && mv input_tempfile.clusters.tsv '$clusters' | 40 && mv input_tempfile.clusters.tsv '$clusters' |
38 #if $antismash_sideload | 41 #if $antismash_sideload |
39 && mv input_tempfile.sideload.json '$sideload' | 42 && mv input_tempfile.sideload.json '$sideload' |
40 #end if | 43 #end if |
47 <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/> | 50 <param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/> |
48 <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation"> | 51 <param argument="--postproc" type="select" label="Post-processing method for gene cluster validation"> |
49 <option value="antismash">antiSMASH</option> | 52 <option value="antismash">antiSMASH</option> |
50 <option value="gecco" selected="true">GECCO</option> | 53 <option value="gecco" selected="true">GECCO</option> |
51 </param> | 54 </param> |
52 <param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/> | 55 <param argument="--edge-distance" type="integer" min="0" optional="true" value="" label="Number of genes from the contig edges to filter out"/> |
53 <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/> | 56 <param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/> |
54 </inputs> | 57 </inputs> |
55 <outputs> | 58 <outputs> |
56 <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)"> | 59 <collection name="records" type="list" label="${tool.name} detected Biosynthetic Gene Clusters on ${on_string} (GenBank)"> |
57 <discover_datasets pattern="(?P<designation>.*)\.gbk" ext="genbank" visible="false" /> | 60 <discover_datasets pattern="(?P<designation>.*)\.gbk" ext="genbank" visible="false" /> |
58 </collection> | 61 </collection> |
62 <data name="genes" format="tabular" label="${tool.name} summary of detected genes on ${on_string} (TSV)"/> | |
59 <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/> | 63 <data name="features" format="tabular" label="${tool.name} summary of detected features on ${on_string} (TSV)"/> |
60 <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/> | 64 <data name="clusters" format="tabular" label="${tool.name} summary of detected BGCs on ${on_string} (TSV)"/> |
61 <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)"> | 65 <data name="sideload" format="json" label="antiSMASH v6 sideload file with ${tool.name} detected BGCs on ${on_string} (JSON)"> |
62 <filter>antismash_sideload</filter> | 66 <filter>antismash_sideload</filter> |
63 </data> | 67 </data> |
64 </outputs> | 68 </outputs> |
65 <tests> | 69 <tests> |
66 <test> | 70 <test> |
67 <param name="input" value="BGC0001866.fna"/> | 71 <param name="input" value="BGC0001866.fna"/> |
68 <output name="features" file="features.tsv"/> | 72 <output name="features" file="features.tsv"/> |
73 <output name="genes" file="genes.tsv"/> | |
69 <output name="clusters" file="clusters.tsv"/> | 74 <output name="clusters" file="clusters.tsv"/> |
70 </test> | 75 </test> |
71 <test> | 76 <test> |
72 <param name="input" value="BGC0001866.fna"/> | 77 <param name="input" value="BGC0001866.fna"/> |
73 <param name="edge_distance" value="0"/> | 78 <param name="edge_distance" value="0"/> |
74 <output name="features" file="features.tsv"/> | 79 <output name="features" file="features.tsv"/> |
80 <output name="genes" file="genes.tsv"/> | |
75 <output name="clusters" file="clusters.tsv"/> | 81 <output name="clusters" file="clusters.tsv"/> |
76 <output_collection name="records" type="list"> | 82 <output_collection name="records" type="list"> |
77 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> | 83 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> |
78 </output_collection> | 84 </output_collection> |
79 </test> | 85 </test> |
80 <test> | 86 <test> |
81 <param name="input" value="BGC0001866.fna"/> | 87 <param name="input" value="BGC0001866.fna"/> |
82 <param name="antismash_sideload" value="True"/> | 88 <param name="antismash_sideload" value="True"/> |
83 <param name="edge_distance" value="0"/> | 89 <param name="edge_distance" value="0"/> |
84 <output name="features" file="features.tsv"/> | 90 <output name="features" file="features.tsv"/> |
91 <output name="genes" file="genes.tsv"/> | |
85 <output name="clusters" file="clusters.tsv"/> | 92 <output name="clusters" file="clusters.tsv"/> |
86 <output name="sideload" file="sideload.json"/> | 93 <output name="sideload" file="sideload.json"/> |
87 <output_collection name="records" type="list"> | 94 <output_collection name="records" type="list"> |
88 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> | 95 <element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/> |
89 </output_collection> | 96 </output_collection> |
105 Output | 112 Output |
106 ------ | 113 ------ |
107 | 114 |
108 GECCO will create the following files once done (using the same prefix as the input file): | 115 GECCO will create the following files once done (using the same prefix as the input file): |
109 | 116 |
110 - ``features.tsv``: The features file, containing the identified proteins and domains in the input sequences. | 117 - ``features.tsv``: The genes file, containing the genes identified in the input sequences. |
111 - ``clusters.tsv``: If any were found, a clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type. | 118 - ``features.tsv``: The features file, containing the protein domains identified in the input sequences. |
119 - ``clusters.tsv``: A clusters file, containing the coordinates of the predicted clusters, along their putative biosynthetic type. | |
112 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains. | 120 - ``{sequence}_cluster_{N}.gbk``: If any BGCs were found, a GenBank file per cluster, containing the cluster sequence annotated with its member proteins and domains. |
113 | 121 |
114 Contact | 122 Contact |
115 ------- | 123 ------- |
116 | 124 |