comparison gstf_preparation.xml @ 10:e8e75a79de59 draft

"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 9c8611fee927883f50bc6955771aa69df1ce8457"
author earlhaminst
date Thu, 31 Oct 2019 08:16:51 -0400
parents 92f3966d5bc3
children dbe37a658cd2
comparison
equal deleted inserted replaced
9:f4acbfe8d6fe 10:e8e75a79de59
1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1"> 1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1">
2 <description>converts data for the workflow</description> 2 <description>converts data for the workflow</description>
3 <command detect_errors="exit_code"> 3 <command detect_errors="exit_code"><![CDATA[
4 <![CDATA[
5 python '$__tool_directory__/gstf_preparation.py' 4 python '$__tool_directory__/gstf_preparation.py'
6 #for $q in $queries 5 #for $q in $queries
7 --gff3 '${q.genome}:${q.gff3_input}' 6 --gff3 '${q.genome}:${q.gff3_input}'
8 #end for 7 #end for
9 #if str($json) != 'None' 8 #if str($json) != 'None'
20 #if $longestCDS 19 #if $longestCDS
21 -l 20 -l
22 #end if 21 #end if
23 #if $regions 22 #if $regions
24 --regions '$regions' 23 --regions '$regions'
24 --ff '$filtered_fasta'
25 #end if 25 #end if
26 -o '$output_db' 26 -o '$output_db'
27 --of '$output_fasta' 27 --of '$output_fasta'
28 --ff '$filtered_fasta' 28 ]]></command>
29 ]]>
30 </command>
31 29
32 <inputs> 30 <inputs>
33 <repeat name="queries" title="GFF3 dataset"> 31 <repeat name="queries" title="GFF3 dataset">
34 <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" /> 32 <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" />
35 <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters"> 33 <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters">
38 </repeat> 36 </repeat>
39 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" /> 37 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" />
40 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" /> 38 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" />
41 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" /> 39 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" />
42 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the &gt;TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" /> 40 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the &gt;TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" />
43 <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" /> 41 <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered out" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter out chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" />
44 </inputs> 42 </inputs>
45 43
46 <outputs> 44 <outputs>
47 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> 45 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
48 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> 46 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
49 <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences" /> 47 <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences">
48 <filter>regions</filter>
49 </data>
50 </outputs> 50 </outputs>
51 51
52 <tests> 52 <tests>
53 <test> 53 <test expect_num_outputs="2">
54 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> 54 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
55 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> 55 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
56 <param name="genome" value="caenorhabditis_elegans" /> 56 <param name="genome" value="caenorhabditis_elegans" />
57 <param name="longestCDS" value="false" /> 57 <param name="longestCDS" value="false" />
58 <param name="headers" value="true" /> 58 <param name="headers" value="true" />
59 59
60 <output name="output_db" file="test1.sqlite" compare="sim_size" /> 60 <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
61 <output name="output_fasta" file="test1.fasta" /> 61 <output name="output_fasta" file="test1.fasta" />
62 <output name="filtered_fasta" file="test1.ns.fasta" />
63 </test> 62 </test>
64 <test> 63 <test expect_num_outputs="2">
65 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> 64 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
66 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> 65 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
67 <param name="genome" value="caenorhabditis_elegans" /> 66 <param name="genome" value="caenorhabditis_elegans" />
68 <param name="longestCDS" value="true" /> 67 <param name="longestCDS" value="true" />
69 <param name="headers" value="true" /> 68 <param name="headers" value="true" />
70 69
71 <output name="output_db" file="test1.sqlite" compare="sim_size" /> 70 <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
72 <output name="output_fasta" file="test1_longest.fasta" /> 71 <output name="output_fasta" file="test1_longest.fasta" />
73 <output name="filtered_fasta" file="test1.ns.fasta" />
74 </test> 72 </test>
75 <test> 73 <test expect_num_outputs="2">
76 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> 74 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
77 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> 75 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
78 <param name="genome" value="caenorhabditis_elegans" /> 76 <param name="genome" value="caenorhabditis_elegans" />
79 <param name="longestCDS" value="false" /> 77 <param name="longestCDS" value="false" />
80 <param name="headers" value="false" /> 78 <param name="headers" value="false" />
81 79
82 <output name="output_db" file="test1.sqlite" compare="sim_size" /> 80 <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
83 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> 81 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
84 <output name="filtered_fasta" file="test1.ns.fasta" />
85 </test> 82 </test>
86 <test> 83 <test expect_num_outputs="2">
87 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> 84 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
88 <param name="json" ftype="json" value="gene.json" /> 85 <param name="json" ftype="json" value="gene.json" />
89 <param name="longestCDS" value="false" /> 86 <param name="longestCDS" value="false" />
90 <param name="headers" value="true" /> 87 <param name="headers" value="true" />
91 88
92 <output name="output_db" file="test4.sqlite" compare="sim_size" /> 89 <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" />
93 <output name="output_fasta" file="test4.fasta" /> 90 <output name="output_fasta" file="test4.fasta" />
94 <output name="filtered_fasta" file="test4.ns.fasta" />
95 </test> 91 </test>
96 <test> 92 <test>
97 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> 93 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
98 <param name="json" ftype="json" value="gene.json" /> 94 <param name="json" ftype="json" value="gene.json" />
99 <param name="longestCDS" value="false" /> 95 <param name="longestCDS" value="false" />
100 <param name="headers" value="true" /> 96 <param name="headers" value="true" />
101 <param name="regions" value="X" /> 97 <param name="regions" value="X" />
102 98
103 <output name="output_db" file="test5.sqlite" compare="sim_size" /> 99 <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" />
104 <output name="output_fasta" file="test5_filtered.fasta" /> 100 <output name="output_fasta" file="test5_filtered.fasta" />
105 <output name="filtered_fasta" file="test5.ns.fasta" /> 101 <output name="filtered_fasta" file="test5.ns.fasta" />
106 </test> 102 </test>
103 <test expect_num_outputs="2">
104 <param name="fasta_inputs" ftype="fasta" value="Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa" />
105 <param name="gff3_input" ftype="gff3" value="MGP_PahariEiJ_G0008413.1.gff3" />
106 <param name="genome" value="mus_pahari" />
107 <param name="longestCDS" value="true" />
108 <param name="headers" value="true" />
109
110 <output name="output_db" file="test6.sqlite" compare="sim_size" delta="30000" />
111 <output name="output_fasta" file="test6.fasta" />
112 </test>
107 </tests> 113 </tests>
108 <help> 114 <help><![CDATA[
109 <![CDATA[
110 **What it does** 115 **What it does**
111 116
112 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format. 117 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format.
113 118
114 It also filters the CDS FASTA datasets to: 119 It also filters the CDS FASTA datasets to:
138 The following features are parsed: **gene**, **mRNA**, **transcript**, **exon**, **five_prime_utr**, **three_prime_utr** and **CDS**, all other are ignored. Also, **ID** and **Parent** attributes in the 9th column are needed to create relations among features. 143 The following features are parsed: **gene**, **mRNA**, **transcript**, **exon**, **five_prime_utr**, **three_prime_utr** and **CDS**, all other are ignored. Also, **ID** and **Parent** attributes in the 9th column are needed to create relations among features.
139 144
140 .. class:: warningmark 145 .. class:: warningmark
141 146
142 If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded. 147 If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded.
143 ]]> 148 ]]></help>
144 </help>
145 <citations> 149 <citations>
146 </citations> 150 </citations>
147 </tool> 151 </tool>