gstf_preparation: gstf_preparation.xml comparison

comparison gstf_preparation.xml @ 10:e8e75a79de59 draft

"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 9c8611fee927883f50bc6955771aa69df1ce8457"

author	earlhaminst
date	Thu, 31 Oct 2019 08:16:51 -0400
parents	92f3966d5bc3
children	dbe37a658cd2

comparison

equal deleted inserted replaced

-:f4acbfe8d6fe
+:e8e75a79de59
 <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1">
 <description>converts data for the workflow</description>
-<command detect_errors="exit_code">
+<command detect_errors="exit_code"><![CDATA[
-<![CDATA[
 python '$__tool_directory__/gstf_preparation.py'
 #for $q in $queries
 --gff3 '${q.genome}:${q.gff3_input}'
 #end for
 #if str($json) != 'None'
 #if $longestCDS
 -l
 #end if
 #if $regions
 --regions '$regions'
+--ff '$filtered_fasta'
 #end if
 -o '$output_db'
 --of '$output_fasta'
---ff '$filtered_fasta'
+]]></command>
-]]>
-</command>
 <inputs>
 <repeat name="queries" title="GFF3 dataset">
 <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" />
 <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters">
 </repeat>
 <param name="json" type="data" format="json" multiple="true" optional="true" label="Gene features in JSON format generated by 'Get features by Ensembl ID' tool" />
 <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" />
 <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" />
 <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the &gt;TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" />
-<param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" />
+<param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered out" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter out chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" />
 </inputs>
 <outputs>
 <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" />
 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" />
-<data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences" />
+<data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences">
+<filter>regions</filter>
+</data>
 </outputs>
 <tests>
-<test>
+<test expect_num_outputs="2">
 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
 <param name="genome" value="caenorhabditis_elegans" />
 <param name="longestCDS" value="false" />
 <param name="headers" value="true" />
-<output name="output_db" file="test1.sqlite" compare="sim_size" />
+<output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
 <output name="output_fasta" file="test1.fasta" />
-<output name="filtered_fasta" file="test1.ns.fasta" />
 </test>
-<test>
+<test expect_num_outputs="2">
 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
 <param name="genome" value="caenorhabditis_elegans" />
 <param name="longestCDS" value="true" />
 <param name="headers" value="true" />
-<output name="output_db" file="test1.sqlite" compare="sim_size" />
+<output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
 <output name="output_fasta" file="test1_longest.fasta" />
-<output name="filtered_fasta" file="test1.ns.fasta" />
 </test>
-<test>
+<test expect_num_outputs="2">
 <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
 <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" />
 <param name="genome" value="caenorhabditis_elegans" />
 <param name="longestCDS" value="false" />
 <param name="headers" value="false" />
-<output name="output_db" file="test1.sqlite" compare="sim_size" />
+<output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" />
 <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" />
-<output name="filtered_fasta" file="test1.ns.fasta" />
 </test>
-<test>
+<test expect_num_outputs="2">
 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
 <param name="json" ftype="json" value="gene.json" />
 <param name="longestCDS" value="false" />
 <param name="headers" value="true" />
-<output name="output_db" file="test4.sqlite" compare="sim_size" />
+<output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" />
 <output name="output_fasta" file="test4.fasta" />
-<output name="filtered_fasta" file="test4.ns.fasta" />
 </test>
 <test>
 <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" />
 <param name="json" ftype="json" value="gene.json" />
 <param name="longestCDS" value="false" />
 <param name="headers" value="true" />
 <param name="regions" value="X" />
-<output name="output_db" file="test5.sqlite" compare="sim_size" />
+<output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" />
 <output name="output_fasta" file="test5_filtered.fasta" />
 <output name="filtered_fasta" file="test5.ns.fasta" />
 </test>
+<test expect_num_outputs="2">
+<param name="fasta_inputs" ftype="fasta" value="Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa" />
+<param name="gff3_input" ftype="gff3" value="MGP_PahariEiJ_G0008413.1.gff3" />
+<param name="genome" value="mus_pahari" />
+<param name="longestCDS" value="true" />
+<param name="headers" value="true" />
+<output name="output_db" file="test6.sqlite" compare="sim_size" delta="30000" />
+<output name="output_fasta" file="test6.fasta" />
+</test>
 </tests>
-<help>
+<help><![CDATA[
-<![CDATA[
 **What it does**
 This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format.
 It also filters the CDS FASTA datasets to:
 The following features are parsed: **gene**, **mRNA**, **transcript**, **exon**, **five_prime_utr**, **three_prime_utr** and **CDS**, all other are ignored. Also, **ID** and **Parent** attributes in the 9th column are needed to create relations among features.
 .. class:: warningmark
 If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded.
-]]>
+]]></help>
-</help>
 <citations>
 </citations>
 </tool>

Mercurial > repos > earlhaminst > gstf_preparation

comparison gstf_preparation.xml @ 10:e8e75a79de59 draft