Mercurial > repos > earlhaminst > gstf_preparation
comparison gstf_preparation.xml @ 4:284f64ad9d43 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit cda3ecab1a34376cc7d4d392a34dc810847cbf0b-dirty
author | earlhaminst |
---|---|
date | Fri, 08 Dec 2017 05:32:12 -0500 |
parents | 19644996bc2a |
children | 56bbdbfe3eaa |
comparison
equal
deleted
inserted
replaced
3:7e11a7f4bdba | 4:284f64ad9d43 |
---|---|
1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.3.0"> | 1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.0"> |
2 <description>converts data for the workflow</description> | 2 <description>converts data for the workflow</description> |
3 <command detect_errors="exit_code"> | 3 <command detect_errors="exit_code"> |
4 <![CDATA[ | 4 <![CDATA[ |
5 python '$__tool_directory__/gstf_preparation.py' | 5 python '$__tool_directory__/gstf_preparation.py' |
6 #for $q in $queries | 6 #for $q in $queries |
12 #end for | 12 #end for |
13 #end if | 13 #end if |
14 #for $fasta_input in $fasta_inputs | 14 #for $fasta_input in $fasta_inputs |
15 --fasta '${fasta_input}' | 15 --fasta '${fasta_input}' |
16 #end for | 16 #end for |
17 #if $headers | |
18 --headers | |
19 #end if | |
20 #if $longestCDS | |
21 -l | |
22 #end if | |
17 -o '$output_db' | 23 -o '$output_db' |
18 --of '$output_fasta' | 24 --of '$output_fasta' |
19 ]]> | 25 ]]> |
20 </command> | 26 </command> |
21 | 27 |
26 <validator type="empty_field" /> | 32 <validator type="empty_field" /> |
27 </param> | 33 </param> |
28 </repeat> | 34 </repeat> |
29 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" /> | 35 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" /> |
30 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding FASTA datasets" help="Each FASTA header line should start with a transcript id" /> | 36 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding FASTA datasets" help="Each FASTA header line should start with a transcript id" /> |
37 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" /> | |
38 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the >TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" /> | |
31 </inputs> | 39 </inputs> |
32 | 40 |
33 <outputs> | 41 <outputs> |
34 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> | 42 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> |
35 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> | 43 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> |
38 <tests> | 46 <tests> |
39 <test> | 47 <test> |
40 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | 48 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> |
41 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> | 49 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> |
42 <param name="genome" value="caenorhabditis_elegans" /> | 50 <param name="genome" value="caenorhabditis_elegans" /> |
51 <param name="longestCDS" value="false" /> | |
52 <param name="headers" value="true" /> | |
53 | |
43 <output name="output_db" file="test1.sqlite" compare="sim_size" /> | 54 <output name="output_db" file="test1.sqlite" compare="sim_size" /> |
44 <output name="output_fasta" file="test1.fasta" /> | 55 <output name="output_fasta" file="test1.fasta" /> |
45 </test> | 56 </test> |
46 <test> | 57 <test> |
58 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | |
59 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> | |
60 <param name="genome" value="caenorhabditis_elegans" /> | |
61 <param name="longestCDS" value="true" /> | |
62 <param name="headers" value="true" /> | |
63 | |
64 <output name="output_db" file="test1.sqlite" compare="sim_size" /> | |
65 <output name="output_fasta" file="test1_longest.fasta" /> | |
66 </test> | |
67 <test> | |
68 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | |
69 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> | |
70 <param name="genome" value="caenorhabditis_elegans" /> | |
71 <param name="longestCDS" value="false" /> | |
72 <param name="headers" value="false" /> | |
73 | |
74 <output name="output_db" file="test1.sqlite" compare="sim_size" /> | |
75 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | |
76 </test> | |
77 <test> | |
47 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> | 78 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> |
48 <param name="json" ftype="json" value="gene.json" /> | 79 <param name="json" ftype="json" value="gene.json" /> |
80 <param name="longestCDS" value="false" /> | |
81 <param name="headers" value="true" /> | |
49 | 82 |
50 <output name="output_db" file="test2.sqlite" compare="sim_size" /> | 83 <output name="output_db" file="test2.sqlite" compare="sim_size" /> |
51 <output name="output_fasta" file="test2.fasta" /> | 84 <output name="output_fasta" file="test2.fasta" /> |
52 </test> | 85 </test> |
53 </tests> | 86 </tests> |
54 <help> | 87 <help> |
55 <![CDATA[ | 88 <![CDATA[ |
56 **What it does** | 89 **What it does** |
57 | 90 |
58 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format and modify the header lines of a corresponding CDS FASTA to be used with the GeneSeqToFamily workflow. | 91 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format. |
92 | |
93 It also filters a CDS FASTA dataset to keep only the transcripts present in the gene feature information. Optionally it can also keep only the longest CDS per gene and/or change the header line of the FASTA sequences to the >TranscriptId_species format (as required by TreeBest, part of the GeneSeqToFamily workflow). | |
59 | 94 |
60 Example GFF3 file:: | 95 Example GFF3 file:: |
61 | 96 |
62 scaffold_0 MYZPE13164_Clone_G006_v1.0 gene 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding | 97 scaffold_0 MYZPE13164_Clone_G006_v1.0 gene 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding |
63 scaffold_0 MYZPE13164_Clone_G006_v1.0 mRNA 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31 | 98 scaffold_0 MYZPE13164_Clone_G006_v1.0 mRNA 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31 |