Mercurial > repos > earlhaminst > gstf_preparation
comparison gstf_preparation.xml @ 10:e8e75a79de59 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 9c8611fee927883f50bc6955771aa69df1ce8457"
author | earlhaminst |
---|---|
date | Thu, 31 Oct 2019 08:16:51 -0400 |
parents | 92f3966d5bc3 |
children | dbe37a658cd2 |
comparison
equal
deleted
inserted
replaced
9:f4acbfe8d6fe | 10:e8e75a79de59 |
---|---|
1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1"> | 1 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1"> |
2 <description>converts data for the workflow</description> | 2 <description>converts data for the workflow</description> |
3 <command detect_errors="exit_code"> | 3 <command detect_errors="exit_code"><![CDATA[ |
4 <![CDATA[ | |
5 python '$__tool_directory__/gstf_preparation.py' | 4 python '$__tool_directory__/gstf_preparation.py' |
6 #for $q in $queries | 5 #for $q in $queries |
7 --gff3 '${q.genome}:${q.gff3_input}' | 6 --gff3 '${q.genome}:${q.gff3_input}' |
8 #end for | 7 #end for |
9 #if str($json) != 'None' | 8 #if str($json) != 'None' |
20 #if $longestCDS | 19 #if $longestCDS |
21 -l | 20 -l |
22 #end if | 21 #end if |
23 #if $regions | 22 #if $regions |
24 --regions '$regions' | 23 --regions '$regions' |
24 --ff '$filtered_fasta' | |
25 #end if | 25 #end if |
26 -o '$output_db' | 26 -o '$output_db' |
27 --of '$output_fasta' | 27 --of '$output_fasta' |
28 --ff '$filtered_fasta' | 28 ]]></command> |
29 ]]> | |
30 </command> | |
31 | 29 |
32 <inputs> | 30 <inputs> |
33 <repeat name="queries" title="GFF3 dataset"> | 31 <repeat name="queries" title="GFF3 dataset"> |
34 <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" /> | 32 <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" /> |
35 <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters"> | 33 <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters"> |
38 </repeat> | 36 </repeat> |
39 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" /> | 37 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" /> |
40 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" /> | 38 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" /> |
41 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" /> | 39 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" /> |
42 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the >TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" /> | 40 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the >TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" /> |
43 <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" /> | 41 <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered out" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter out chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" /> |
44 </inputs> | 42 </inputs> |
45 | 43 |
46 <outputs> | 44 <outputs> |
47 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> | 45 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> |
48 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> | 46 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> |
49 <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences" /> | 47 <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences"> |
48 <filter>regions</filter> | |
49 </data> | |
50 </outputs> | 50 </outputs> |
51 | 51 |
52 <tests> | 52 <tests> |
53 <test> | 53 <test expect_num_outputs="2"> |
54 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | 54 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> |
55 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> | 55 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> |
56 <param name="genome" value="caenorhabditis_elegans" /> | 56 <param name="genome" value="caenorhabditis_elegans" /> |
57 <param name="longestCDS" value="false" /> | 57 <param name="longestCDS" value="false" /> |
58 <param name="headers" value="true" /> | 58 <param name="headers" value="true" /> |
59 | 59 |
60 <output name="output_db" file="test1.sqlite" compare="sim_size" /> | 60 <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" /> |
61 <output name="output_fasta" file="test1.fasta" /> | 61 <output name="output_fasta" file="test1.fasta" /> |
62 <output name="filtered_fasta" file="test1.ns.fasta" /> | |
63 </test> | 62 </test> |
64 <test> | 63 <test expect_num_outputs="2"> |
65 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | 64 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> |
66 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> | 65 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> |
67 <param name="genome" value="caenorhabditis_elegans" /> | 66 <param name="genome" value="caenorhabditis_elegans" /> |
68 <param name="longestCDS" value="true" /> | 67 <param name="longestCDS" value="true" /> |
69 <param name="headers" value="true" /> | 68 <param name="headers" value="true" /> |
70 | 69 |
71 <output name="output_db" file="test1.sqlite" compare="sim_size" /> | 70 <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" /> |
72 <output name="output_fasta" file="test1_longest.fasta" /> | 71 <output name="output_fasta" file="test1_longest.fasta" /> |
73 <output name="filtered_fasta" file="test1.ns.fasta" /> | |
74 </test> | 72 </test> |
75 <test> | 73 <test expect_num_outputs="2"> |
76 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | 74 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> |
77 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> | 75 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> |
78 <param name="genome" value="caenorhabditis_elegans" /> | 76 <param name="genome" value="caenorhabditis_elegans" /> |
79 <param name="longestCDS" value="false" /> | 77 <param name="longestCDS" value="false" /> |
80 <param name="headers" value="false" /> | 78 <param name="headers" value="false" /> |
81 | 79 |
82 <output name="output_db" file="test1.sqlite" compare="sim_size" /> | 80 <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" /> |
83 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> | 81 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> |
84 <output name="filtered_fasta" file="test1.ns.fasta" /> | |
85 </test> | 82 </test> |
86 <test> | 83 <test expect_num_outputs="2"> |
87 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> | 84 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> |
88 <param name="json" ftype="json" value="gene.json" /> | 85 <param name="json" ftype="json" value="gene.json" /> |
89 <param name="longestCDS" value="false" /> | 86 <param name="longestCDS" value="false" /> |
90 <param name="headers" value="true" /> | 87 <param name="headers" value="true" /> |
91 | 88 |
92 <output name="output_db" file="test4.sqlite" compare="sim_size" /> | 89 <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" /> |
93 <output name="output_fasta" file="test4.fasta" /> | 90 <output name="output_fasta" file="test4.fasta" /> |
94 <output name="filtered_fasta" file="test4.ns.fasta" /> | |
95 </test> | 91 </test> |
96 <test> | 92 <test> |
97 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> | 93 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> |
98 <param name="json" ftype="json" value="gene.json" /> | 94 <param name="json" ftype="json" value="gene.json" /> |
99 <param name="longestCDS" value="false" /> | 95 <param name="longestCDS" value="false" /> |
100 <param name="headers" value="true" /> | 96 <param name="headers" value="true" /> |
101 <param name="regions" value="X" /> | 97 <param name="regions" value="X" /> |
102 | 98 |
103 <output name="output_db" file="test5.sqlite" compare="sim_size" /> | 99 <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" /> |
104 <output name="output_fasta" file="test5_filtered.fasta" /> | 100 <output name="output_fasta" file="test5_filtered.fasta" /> |
105 <output name="filtered_fasta" file="test5.ns.fasta" /> | 101 <output name="filtered_fasta" file="test5.ns.fasta" /> |
106 </test> | 102 </test> |
103 <test expect_num_outputs="2"> | |
104 <param name="fasta_inputs" ftype="fasta" value="Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa" /> | |
105 <param name="gff3_input" ftype="gff3" value="MGP_PahariEiJ_G0008413.1.gff3" /> | |
106 <param name="genome" value="mus_pahari" /> | |
107 <param name="longestCDS" value="true" /> | |
108 <param name="headers" value="true" /> | |
109 | |
110 <output name="output_db" file="test6.sqlite" compare="sim_size" delta="30000" /> | |
111 <output name="output_fasta" file="test6.fasta" /> | |
112 </test> | |
107 </tests> | 113 </tests> |
108 <help> | 114 <help><![CDATA[ |
109 <![CDATA[ | |
110 **What it does** | 115 **What it does** |
111 | 116 |
112 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format. | 117 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format. |
113 | 118 |
114 It also filters the CDS FASTA datasets to: | 119 It also filters the CDS FASTA datasets to: |
138 The following features are parsed: **gene**, **mRNA**, **transcript**, **exon**, **five_prime_utr**, **three_prime_utr** and **CDS**, all other are ignored. Also, **ID** and **Parent** attributes in the 9th column are needed to create relations among features. | 143 The following features are parsed: **gene**, **mRNA**, **transcript**, **exon**, **five_prime_utr**, **three_prime_utr** and **CDS**, all other are ignored. Also, **ID** and **Parent** attributes in the 9th column are needed to create relations among features. |
139 | 144 |
140 .. class:: warningmark | 145 .. class:: warningmark |
141 | 146 |
142 If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded. | 147 If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded. |
143 ]]> | 148 ]]></help> |
144 </help> | |
145 <citations> | 149 <citations> |
146 </citations> | 150 </citations> |
147 </tool> | 151 </tool> |