diff gstf_preparation.xml @ 0:28879ca33b5f draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 651fae48371f845578753052c6fe173e3bb35670
author earlhaminst
date Wed, 15 Mar 2017 20:18:57 -0400
parents
children 19644996bc2a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gstf_preparation.xml	Wed Mar 15 20:18:57 2017 -0400
@@ -0,0 +1,82 @@
+<tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.3.0">
+    <description>converts data for the workflow</description>
+    <command detect_errors="exit_code">
+<![CDATA[
+python '$__tool_directory__/gstf_preparation.py'
+#for $q in $queries
+    --gff3 '${q.genome}:${q.gff3_input}'
+#end for
+#if str($json) != 'None'
+    #for $v in $json
+        --json '$v'
+    #end for
+#end if
+#for $fasta_input in $fasta_inputs
+    --fasta '${fasta_input}'
+#end for
+-o '$output_db'
+--of '$output_fasta'
+]]>
+    </command>
+
+    <inputs>
+        <repeat name="queries" title="GFF3 dataset">
+            <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" />
+            <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters">
+                <validator type="empty_field" />
+            </param>
+        </repeat>
+        <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" />
+        <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding FASTA datasets" help="Each FASTA header line should start with a transcript id" />
+    </inputs>
+
+    <outputs>
+         <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
+         <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
+            <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
+            <param name="genome" value="caenorhabditis_elegans" />
+            <output name="output_db" file="test1.sqlite" compare="sim_size" />
+            <output name="output_fasta" file="test1.fasta" />
+        </test>
+        <test>
+            <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
+            <param name="json" ftype="json" value="gene.json" />
+
+            <output name="output_db" file="test2.sqlite" compare="sim_size" />
+            <output name="output_fasta" file="test2.fasta" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format and modify the header lines of a corresponding CDS FASTA to be used with the GeneSeqToFamily workflow.
+
+Example GFF3 file::
+
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  gene            44968   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  mRNA            44968   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  three_prime_utr 44968   46637   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.3utr1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            44968   47432   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             46638   47432   .   -   0   ID=MYZPE13164_G006_v1.0_000000030.1.cds1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            53325   53539   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon2;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             53325   53539   .   -   2   ID=MYZPE13164_G006_v1.0_000000030.1.cds2;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            54614   54719   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon3;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             54614   54719   .   -   0   ID=MYZPE13164_G006_v1.0_000000030.1.cds3;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             54852   55106   .   -   0   ID=MYZPE13164_G006_v1.0_000000030.1.cds4;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            54852   55117   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon4;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  five_prime_utr  55107   55117   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.5utr1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  five_prime_utr  68851   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.5utr2;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            68851   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon5;Parent=MYZPE13164_G006_v1.0_000000030.1
+
+The following features are parsed: **gene**, **mRNA**, **transcript**, **exon**, **five_prime_utr**, **three_prime_utr** and **CDS**, all other are ignored. Also, **ID** and **Parent** tags are needed to create relations.
+]]>
+    </help>
+    <citations>
+    </citations>
+</tool>