comparison gstf_preparation.xml @ 4:284f64ad9d43 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit cda3ecab1a34376cc7d4d392a34dc810847cbf0b-dirty
author earlhaminst
date Fri, 08 Dec 2017 05:32:12 -0500
parents 19644996bc2a
children 56bbdbfe3eaa
comparison
equal deleted inserted replaced
3:7e11a7f4bdba 4:284f64ad9d43
1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.3.0"> 1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.0">
2 <description>converts data for the workflow</description> 2 <description>converts data for the workflow</description>
3 <command detect_errors="exit_code"> 3 <command detect_errors="exit_code">
4 <![CDATA[ 4 <![CDATA[
5 python '$__tool_directory__/gstf_preparation.py' 5 python '$__tool_directory__/gstf_preparation.py'
6 #for $q in $queries 6 #for $q in $queries
12 #end for 12 #end for
13 #end if 13 #end if
14 #for $fasta_input in $fasta_inputs 14 #for $fasta_input in $fasta_inputs
15 --fasta '${fasta_input}' 15 --fasta '${fasta_input}'
16 #end for 16 #end for
17 #if $headers
18 --headers
19 #end if
20 #if $longestCDS
21 -l
22 #end if
17 -o '$output_db' 23 -o '$output_db'
18 --of '$output_fasta' 24 --of '$output_fasta'
19 ]]> 25 ]]>
20 </command> 26 </command>
21 27
26 <validator type="empty_field" /> 32 <validator type="empty_field" />
27 </param> 33 </param>
28 </repeat> 34 </repeat>
29 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" /> 35 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" />
30 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding FASTA datasets" help="Each FASTA header line should start with a transcript id" /> 36 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding FASTA datasets" help="Each FASTA header line should start with a transcript id" />
37 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" />
38 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the &gt;TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" />
31 </inputs> 39 </inputs>
32 40
33 <outputs> 41 <outputs>
34 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> 42 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
35 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> 43 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
38 <tests> 46 <tests>
39 <test> 47 <test>
40 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> 48 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
41 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> 49 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
42 <param name="genome" value="caenorhabditis_elegans" /> 50 <param name="genome" value="caenorhabditis_elegans" />
51 <param name="longestCDS" value="false" />
52 <param name="headers" value="true" />
53
43 <output name="output_db" file="test1.sqlite" compare="sim_size" /> 54 <output name="output_db" file="test1.sqlite" compare="sim_size" />
44 <output name="output_fasta" file="test1.fasta" /> 55 <output name="output_fasta" file="test1.fasta" />
45 </test> 56 </test>
46 <test> 57 <test>
58 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
59 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
60 <param name="genome" value="caenorhabditis_elegans" />
61 <param name="longestCDS" value="true" />
62 <param name="headers" value="true" />
63
64 <output name="output_db" file="test1.sqlite" compare="sim_size" />
65 <output name="output_fasta" file="test1_longest.fasta" />
66 </test>
67 <test>
68 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
69 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
70 <param name="genome" value="caenorhabditis_elegans" />
71 <param name="longestCDS" value="false" />
72 <param name="headers" value="false" />
73
74 <output name="output_db" file="test1.sqlite" compare="sim_size" />
75 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
76 </test>
77 <test>
47 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> 78 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
48 <param name="json" ftype="json" value="gene.json" /> 79 <param name="json" ftype="json" value="gene.json" />
80 <param name="longestCDS" value="false" />
81 <param name="headers" value="true" />
49 82
50 <output name="output_db" file="test2.sqlite" compare="sim_size" /> 83 <output name="output_db" file="test2.sqlite" compare="sim_size" />
51 <output name="output_fasta" file="test2.fasta" /> 84 <output name="output_fasta" file="test2.fasta" />
52 </test> 85 </test>
53 </tests> 86 </tests>
54 <help> 87 <help>
55 <![CDATA[ 88 <![CDATA[
56 **What it does** 89 **What it does**
57 90
58 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format and modify the header lines of a corresponding CDS FASTA to be used with the GeneSeqToFamily workflow. 91 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format.
92
93 It also filters a CDS FASTA dataset to keep only the transcripts present in the gene feature information. Optionally it can also keep only the longest CDS per gene and/or change the header line of the FASTA sequences to the >TranscriptId_species format (as required by TreeBest, part of the GeneSeqToFamily workflow).
59 94
60 Example GFF3 file:: 95 Example GFF3 file::
61 96
62 scaffold_0 MYZPE13164_Clone_G006_v1.0 gene 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding 97 scaffold_0 MYZPE13164_Clone_G006_v1.0 gene 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding
63 scaffold_0 MYZPE13164_Clone_G006_v1.0 mRNA 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31 98 scaffold_0 MYZPE13164_Clone_G006_v1.0 mRNA 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31