Mercurial > repos > earlhaminst > gstf_preparation
diff gstf_preparation.xml @ 10:e8e75a79de59 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 9c8611fee927883f50bc6955771aa69df1ce8457"
author | earlhaminst |
---|---|
date | Thu, 31 Oct 2019 08:16:51 -0400 |
parents | 92f3966d5bc3 |
children | dbe37a658cd2 |
line wrap: on
line diff
--- a/gstf_preparation.xml Wed Oct 17 07:31:29 2018 -0400 +++ b/gstf_preparation.xml Thu Oct 31 08:16:51 2019 -0400 @@ -1,7 +1,6 @@ <tool id="gstf_preparation" name="GeneSeqToFamily preparation" version="0.4.1"> <description>converts data for the workflow</description> - <command detect_errors="exit_code"> -<![CDATA[ + <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/gstf_preparation.py' #for $q in $queries --gff3 '${q.genome}:${q.gff3_input}' @@ -22,12 +21,11 @@ #end if #if $regions --regions '$regions' + --ff '$filtered_fasta' #end if -o '$output_db' --of '$output_fasta' ---ff '$filtered_fasta' -]]> - </command> + ]]></command> <inputs> <repeat name="queries" title="GFF3 dataset"> @@ -40,58 +38,56 @@ <param name="fasta_inputs" type="data" format="fasta" multiple="true" label="Corresponding CDS datasets in FASTA format" help="Each FASTA header line should start with a transcript id" /> <param name="longestCDS" type="boolean" checked="false" label="Keep only the longest CDS per gene" /> <param name="headers" type="boolean" checked="true" label="Change the header line of the FASTA sequences to the >TranscriptId_species format" help="As required by TreeBest, part of the GeneSeqToFamily workflow" /> - <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" /> + <param name="regions" type="text" optional="true" label="Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered out" help="Region IDs are in the `seqid` column for GFF3 and in the `seq_region_name` field in JSON. This is typically used to filter out chromosomes with a non-standard genetic code, like mitochondria, to be analysed separately" /> </inputs> <outputs> - <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> - <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> - <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences" /> + <data name="output_db" format="sqlite" label="${tool.name} on ${on_string}: SQLite" /> + <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string}: FASTA" /> + <data name="filtered_fasta" format="fasta" label="${tool.name} on ${on_string}: filtered sequences"> + <filter>regions</filter> + </data> </outputs> <tests> - <test> + <test expect_num_outputs="2"> <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> <param name="genome" value="caenorhabditis_elegans" /> <param name="longestCDS" value="false" /> <param name="headers" value="true" /> - <output name="output_db" file="test1.sqlite" compare="sim_size" /> + <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" /> <output name="output_fasta" file="test1.fasta" /> - <output name="filtered_fasta" file="test1.ns.fasta" /> </test> - <test> + <test expect_num_outputs="2"> <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> <param name="genome" value="caenorhabditis_elegans" /> <param name="longestCDS" value="true" /> <param name="headers" value="true" /> - <output name="output_db" file="test1.sqlite" compare="sim_size" /> + <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" /> <output name="output_fasta" file="test1_longest.fasta" /> - <output name="filtered_fasta" file="test1.ns.fasta" /> </test> - <test> + <test expect_num_outputs="2"> <param name="fasta_inputs" ftype="fasta" value="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.87.chromosome.I.shortened.gff3" /> <param name="genome" value="caenorhabditis_elegans" /> <param name="longestCDS" value="false" /> <param name="headers" value="false" /> - <output name="output_db" file="test1.sqlite" compare="sim_size" /> + <output name="output_db" file="test1.sqlite" compare="sim_size" delta="30000" /> <output name="output_fasta" file="Caenorhabditis_elegans.WBcel235.cds.all.shortened.fa" /> - <output name="filtered_fasta" file="test1.ns.fasta" /> </test> - <test> + <test expect_num_outputs="2"> <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> <param name="json" ftype="json" value="gene.json" /> <param name="longestCDS" value="false" /> <param name="headers" value="true" /> - <output name="output_db" file="test4.sqlite" compare="sim_size" /> + <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" /> <output name="output_fasta" file="test4.fasta" /> - <output name="filtered_fasta" file="test4.ns.fasta" /> </test> <test> <param name="fasta_inputs" ftype="fasta" value="CDS.fasta" /> @@ -100,13 +96,22 @@ <param name="headers" value="true" /> <param name="regions" value="X" /> - <output name="output_db" file="test5.sqlite" compare="sim_size" /> + <output name="output_db" file="test4.sqlite" compare="sim_size" delta="30000" /> <output name="output_fasta" file="test5_filtered.fasta" /> <output name="filtered_fasta" file="test5.ns.fasta" /> </test> + <test expect_num_outputs="2"> + <param name="fasta_inputs" ftype="fasta" value="Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa" /> + <param name="gff3_input" ftype="gff3" value="MGP_PahariEiJ_G0008413.1.gff3" /> + <param name="genome" value="mus_pahari" /> + <param name="longestCDS" value="true" /> + <param name="headers" value="true" /> + + <output name="output_db" file="test6.sqlite" compare="sim_size" delta="30000" /> + <output name="output_fasta" file="test6.fasta" /> + </test> </tests> - <help> -<![CDATA[ + <help><![CDATA[ **What it does** This tool converts a set of GFF3 and/or JSON gene feature information datasets into SQLite format. @@ -140,8 +145,7 @@ .. class:: warningmark If a value in the **ID** and **Parent** attribute contains a colon, everything up to the first colon will be discarded. -]]> - </help> + ]]></help> <citations> </citations> </tool>