changeset 0:ef712a5e9834 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit 931e98e27ac60b189e2dfbb1c99767bd17860c5e
author iuc
date Mon, 19 Sep 2022 12:30:10 +0000
parents
children ce04c239454b
files macros.xml miniprot.xml test-data/input_genome.fasta.gz test-data/input_query.fasta.gz
diffstat 4 files changed, 160 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Sep 19 12:30:10 2022 +0000
@@ -0,0 +1,3 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.2</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miniprot.xml	Mon Sep 19 12:30:10 2022 +0000
@@ -0,0 +1,157 @@
+<?xml version="1.0"?>
+<tool id="miniprot" name="Miniprot align" version="@TOOL_VERSION@+galaxy0" profile="21.05">
+    <description>align a protein sequence against a genome with affine gap penalty, splicing and frameshift</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">miniprot</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        miniprot
+        -t \${GALAXY_SLOTS:-1}
+        #if str($adv.options) == "yes"
+            $adv.mapping.no_splicing
+            -c $adv.mapping.max_kmer
+            -G $adv.mapping.max_intron
+            -n $adv.mapping.min_syncmers
+            -m $adv.mapping.min_chain_score
+            -l $adv.mapping.second_round_kmer_size
+            -e $adv.mapping.max_extension
+            -p $adv.mapping.score_ratio
+            -N $adv.mapping.max_secondary_alignments
+            -O $adv.alignment.gap_open
+            -E $adv.alignment.gap_extension
+            -J $adv.alignment.intron_open
+            -C $adv.alignment.non_canonical_splice
+            -F $adv.alignment.frameshift
+            -B $adv.alignment.end_bonus
+        #end if
+        #if str($db.dbtype) == 'fasta'
+            '$db.genomic_fasta'
+            -k $db.kmer_size
+            -s $db.submer_size
+            -b $db.bits_per_block
+        #else
+            '$db.genomic_db'
+        #end if
+        #if str($output_format) == "gff"
+            --gff
+        #end if
+        '$protein_fasta'
+        >'$output_alignment'
+    ]]></command>
+    <inputs>
+        <conditional name="db">
+            <param name="dbtype" type="select" label="Database type" help="Build an index from FASTA or use a pre-indexed database">
+                <option value="fasta" selected="true">FASTA</option>
+                <option value="preindexed">Pre-indexed</option>
+            </param>
+            <when value="fasta">
+                <param name="genomic_fasta" type="data" format="fasta,fasta.gz" label="Genomic sequence (FASTA)" help="Genomic contigs / scaffolds to be aligned against in FASTA format" />
+                <param argument="-k" name="kmer_size" type="integer" min="1" value="6" label="K-mer size" />
+                <param argument="-s" name="submer_size" type="integer" min="1" value="4" label="Submer size" help="Submer size (density: 1/(2*(kmer_size-submer_size)+1))" />
+                <param argument="-b" name="bits_per_block" type="integer" min="1" value="8" label="Bits per block" />                
+            </when>
+            <when value="preindexed">
+                <!-- refine the datatype here once Miniprot index data type is in Galaxy -->
+                <param name="genomic_db" type="data" format="binary" label="Pre-indexed genomic database" help="A pre-indexed database built by miniprot" />
+            </when>
+        </conditional>
+        <param name="protein_fasta" type="data" format="fasta,fasta.gz" label="Protein sequence (FASTA)" help="Protein sequences to be aligned in FASTA format" />
+        <param name="output_format" type="select" label="Output format" >
+            <option value="gff" selected="true">GFF3</option>
+            <option value="paf">PAF</option>
+        </param>
+        <conditional name="adv">
+            <param name="options" type="select" label="Advanced options">
+                <option value="yes">Show</option>
+                <option value="no" selected="true">Hide</option>
+            </param>
+            <when value="yes"> 
+                <section name="mapping" title="Mapping">               
+                    <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" />
+                    <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" />
+                    <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" />
+                    <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" />
+                    <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" />
+                    <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" />
+                    <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" />
+                    <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.5" label="Minimum secondary-to-primary score ratio" />
+                    <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="100" label="Max secondary alignments to consider" />
+                </section>
+                <section name="alignment" title="Alignment">
+                    <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" />
+                    <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" />
+                    <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" />
+                    <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" />
+                    <param argument="-F" name="frameshift" type="integer" min="0" value="15" label="Frameshift penalty" />
+                    <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" />
+                </section>
+                <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" />
+            </when>
+            <when value="no">
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_alignment" format="gff3" label="Miniprot on ${on_string}">
+            <change_format>
+                <when input="output_format" value="paf" format="paf" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="db">
+                <param name="dbtype" value="fasta" />
+                <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" />
+            </conditional>
+            <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" />
+            <output name="output_alignment" ftype="gff3">
+                <assert_contents>
+                    <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" />
+                    <has_text text="Parent=MP000372;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" />
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="db">
+                <param name="dbtype" value="fasta" />
+                <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" />
+            </conditional>
+            <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" />
+            <param name="output_format" value="paf" />
+            <output name="output_alignment" ftype="paf">
+                <assert_contents>
+                    <has_text text="tr|O06302|O06302_MYCTU" />
+                    <has_text text="cs:Z::29*agcG:3*gtgA:5*ccgA:9*accS:1*gccV:4*cagL:1*gtcS:3*gtcA*gtcI*accA*gccG:8*gccS:2*ggtA:5*gccI*agcG:1*ctgA:4*gccV:5*gggL:1*gtgS:2" />
+                </assert_contents>
+            </output>
+        </test>        
+        <test expect_num_outputs="1">
+            <conditional name="db">
+                <param name="dbtype" value="fasta" />
+                <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" />
+            </conditional>
+            <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" />
+            <param name="output_format" value="gff" />
+            <conditional name="adv">
+                <param name="options" value="yes" />
+                <param name="second_round_kmer_size" value="32" />
+            </conditional>
+            <output name="output_alignment" ftype="gff3">
+                <assert_contents>
+                    <has_text text="##gff-version 3" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        miniprot_  rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift.
+        It is primarily intended for annotating protein-coding genes in a new species using known genes from other species.
+
+        **NOTE:** miniprot is in the early stages of development and should be considered experimental at this stage.
+        .. _miniprot: https://github.com/lh3/miniprot
+    ]]></help>
+</tool>
Binary file test-data/input_genome.fasta.gz has changed
Binary file test-data/input_query.fasta.gz has changed