Mercurial > repos > iuc > miniprot
view miniprot.xml @ 0:ef712a5e9834 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit 931e98e27ac60b189e2dfbb1c99767bd17860c5e
author | iuc |
---|---|
date | Mon, 19 Sep 2022 12:30:10 +0000 |
parents | |
children | ce04c239454b |
line wrap: on
line source
<?xml version="1.0"?> <tool id="miniprot" name="Miniprot align" version="@TOOL_VERSION@+galaxy0" profile="21.05"> <description>align a protein sequence against a genome with affine gap penalty, splicing and frameshift</description> <macros> <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">miniprot</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ miniprot -t \${GALAXY_SLOTS:-1} #if str($adv.options) == "yes" $adv.mapping.no_splicing -c $adv.mapping.max_kmer -G $adv.mapping.max_intron -n $adv.mapping.min_syncmers -m $adv.mapping.min_chain_score -l $adv.mapping.second_round_kmer_size -e $adv.mapping.max_extension -p $adv.mapping.score_ratio -N $adv.mapping.max_secondary_alignments -O $adv.alignment.gap_open -E $adv.alignment.gap_extension -J $adv.alignment.intron_open -C $adv.alignment.non_canonical_splice -F $adv.alignment.frameshift -B $adv.alignment.end_bonus #end if #if str($db.dbtype) == 'fasta' '$db.genomic_fasta' -k $db.kmer_size -s $db.submer_size -b $db.bits_per_block #else '$db.genomic_db' #end if #if str($output_format) == "gff" --gff #end if '$protein_fasta' >'$output_alignment' ]]></command> <inputs> <conditional name="db"> <param name="dbtype" type="select" label="Database type" help="Build an index from FASTA or use a pre-indexed database"> <option value="fasta" selected="true">FASTA</option> <option value="preindexed">Pre-indexed</option> </param> <when value="fasta"> <param name="genomic_fasta" type="data" format="fasta,fasta.gz" label="Genomic sequence (FASTA)" help="Genomic contigs / scaffolds to be aligned against in FASTA format" /> <param argument="-k" name="kmer_size" type="integer" min="1" value="6" label="K-mer size" /> <param argument="-s" name="submer_size" type="integer" min="1" value="4" label="Submer size" help="Submer size (density: 1/(2*(kmer_size-submer_size)+1))" /> <param argument="-b" name="bits_per_block" type="integer" min="1" value="8" label="Bits per block" /> </when> <when value="preindexed"> <!-- refine the datatype here once Miniprot index data type is in Galaxy --> <param name="genomic_db" type="data" format="binary" label="Pre-indexed genomic database" help="A pre-indexed database built by miniprot" /> </when> </conditional> <param name="protein_fasta" type="data" format="fasta,fasta.gz" label="Protein sequence (FASTA)" help="Protein sequences to be aligned in FASTA format" /> <param name="output_format" type="select" label="Output format" > <option value="gff" selected="true">GFF3</option> <option value="paf">PAF</option> </param> <conditional name="adv"> <param name="options" type="select" label="Advanced options"> <option value="yes">Show</option> <option value="no" selected="true">Hide</option> </param> <when value="yes"> <section name="mapping" title="Mapping"> <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" /> <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" /> <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" /> <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.5" label="Minimum secondary-to-primary score ratio" /> <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="100" label="Max secondary alignments to consider" /> </section> <section name="alignment" title="Alignment"> <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" /> <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" /> <param argument="-F" name="frameshift" type="integer" min="0" value="15" label="Frameshift penalty" /> <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" /> </section> <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> </when> <when value="no"> </when> </conditional> </inputs> <outputs> <data name="output_alignment" format="gff3" label="Miniprot on ${on_string}"> <change_format> <when input="output_format" value="paf" format="paf" /> </change_format> </data> </outputs> <tests> <test expect_num_outputs="1"> <conditional name="db"> <param name="dbtype" value="fasta" /> <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> </conditional> <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> <output name="output_alignment" ftype="gff3"> <assert_contents> <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" /> <has_text text="Parent=MP000372;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="db"> <param name="dbtype" value="fasta" /> <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> </conditional> <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> <param name="output_format" value="paf" /> <output name="output_alignment" ftype="paf"> <assert_contents> <has_text text="tr|O06302|O06302_MYCTU" /> <has_text text="cs:Z::29*agcG:3*gtgA:5*ccgA:9*accS:1*gccV:4*cagL:1*gtcS:3*gtcA*gtcI*accA*gccG:8*gccS:2*ggtA:5*gccI*agcG:1*ctgA:4*gccV:5*gggL:1*gtgS:2" /> </assert_contents> </output> </test> <test expect_num_outputs="1"> <conditional name="db"> <param name="dbtype" value="fasta" /> <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> </conditional> <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> <param name="output_format" value="gff" /> <conditional name="adv"> <param name="options" value="yes" /> <param name="second_round_kmer_size" value="32" /> </conditional> <output name="output_alignment" ftype="gff3"> <assert_contents> <has_text text="##gff-version 3" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. **NOTE:** miniprot is in the early stages of development and should be considered experimental at this stage. .. _miniprot: https://github.com/lh3/miniprot ]]></help> </tool>