gstf_preparation: gstf_preparation.xml comparison

comparison gstf_preparation.xml @ 4:284f64ad9d43 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit cda3ecab1a34376cc7d4d392a34dc810847cbf0b-dirty

author	earlhaminst
date	Fri, 08 Dec 2017 05:32:12 -0500
parents	19644996bc2a
children	56bbdbfe3eaa

comparison

equal deleted inserted replaced

-:7e11a7f4bdba
+:284f64ad9d43
-<tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.3.0">
+<tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.0">
 <description>converts data for the workflow</description>
 <command detect_errors="exit_code">
 <![CDATA[
 python '$__tool_directory__/gstf_preparation.py'
 #for $q in $queries
 #end for
 #end if
 #for $fasta_input in $fasta_inputs
 --fasta '${fasta_input}'
 #end for
+#if $headers
+--headers
+#end if
+#if $longestCDS
+-l
+#end if
 -o '$output_db'
 --of '$output_fasta'
 ]]>
 </command>
 <validator type="empty_field" />
 </param>
 </repeat>
 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" />
 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding FASTA datasets" help="Each FASTA header line should start with a transcript id" />
+<param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" />
+<param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the &gt;TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" />
 </inputs>
 <outputs>
 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
 <tests>
 <test>
 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
 <param name="genome" value="caenorhabditis_elegans" />
+<param name="longestCDS" value="false" />
+<param name="headers" value="true" />
 <output name="output_db" file="test1.sqlite" compare="sim_size" />
 <output name="output_fasta" file="test1.fasta" />
 </test>
 <test>
+<param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
+<param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
+<param name="genome" value="caenorhabditis_elegans" />
+<param name="longestCDS" value="true" />
+<param name="headers" value="true" />
+<output name="output_db" file="test1.sqlite" compare="sim_size" />
+<output name="output_fasta" file="test1_longest.fasta" />
+</test>
+<test>
+<param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
+<param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
+<param name="genome" value="caenorhabditis_elegans" />
+<param name="longestCDS" value="false" />
+<param name="headers" value="false" />
+<output name="output_db" file="test1.sqlite" compare="sim_size" />
+<output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
+</test>
+<test>
 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
 <param name="json" ftype="json" value="gene.json" />
+<param name="longestCDS" value="false" />
+<param name="headers" value="true" />
 <output name="output_db" file="test2.sqlite" compare="sim_size" />
 <output name="output_fasta" file="test2.fasta" />
 </test>
 </tests>
 <help>
 <![CDATA[
 **What it does**
-This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format and modify the header lines of a corresponding CDS FASTA to be used with the GeneSeqToFamily workflow.
+This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format.
+It also filters a CDS FASTA dataset to keep only the transcripts present in the gene feature information. Optionally it can also keep only the longest CDS per gene and/or change the header line of the FASTA sequences to the >TranscriptId_species format (as required by TreeBest, part of the GeneSeqToFamily workflow).
 Example GFF3 file::
 scaffold_0  MYZPE13164_Clone_G006_v1.0  gene            44968   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding
 scaffold_0  MYZPE13164_Clone_G006_v1.0  mRNA            44968   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31

Mercurial > repos > earlhaminst > gstf_preparation

comparison gstf_preparation.xml @ 4:284f64ad9d43 draft