Mercurial > repos > iuc > miniprot
comparison miniprot.xml @ 0:ef712a5e9834 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit 931e98e27ac60b189e2dfbb1c99767bd17860c5e
author | iuc |
---|---|
date | Mon, 19 Sep 2022 12:30:10 +0000 |
parents | |
children | ce04c239454b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ef712a5e9834 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="miniprot" name="Miniprot align" version="@TOOL_VERSION@+galaxy0" profile="21.05"> | |
3 <description>align a protein sequence against a genome with affine gap penalty, splicing and frameshift</description> | |
4 <macros> | |
5 <import>macros.xml</import> | |
6 </macros> | |
7 <requirements> | |
8 <requirement type="package" version="@TOOL_VERSION@">miniprot</requirement> | |
9 </requirements> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 miniprot | |
12 -t \${GALAXY_SLOTS:-1} | |
13 #if str($adv.options) == "yes" | |
14 $adv.mapping.no_splicing | |
15 -c $adv.mapping.max_kmer | |
16 -G $adv.mapping.max_intron | |
17 -n $adv.mapping.min_syncmers | |
18 -m $adv.mapping.min_chain_score | |
19 -l $adv.mapping.second_round_kmer_size | |
20 -e $adv.mapping.max_extension | |
21 -p $adv.mapping.score_ratio | |
22 -N $adv.mapping.max_secondary_alignments | |
23 -O $adv.alignment.gap_open | |
24 -E $adv.alignment.gap_extension | |
25 -J $adv.alignment.intron_open | |
26 -C $adv.alignment.non_canonical_splice | |
27 -F $adv.alignment.frameshift | |
28 -B $adv.alignment.end_bonus | |
29 #end if | |
30 #if str($db.dbtype) == 'fasta' | |
31 '$db.genomic_fasta' | |
32 -k $db.kmer_size | |
33 -s $db.submer_size | |
34 -b $db.bits_per_block | |
35 #else | |
36 '$db.genomic_db' | |
37 #end if | |
38 #if str($output_format) == "gff" | |
39 --gff | |
40 #end if | |
41 '$protein_fasta' | |
42 >'$output_alignment' | |
43 ]]></command> | |
44 <inputs> | |
45 <conditional name="db"> | |
46 <param name="dbtype" type="select" label="Database type" help="Build an index from FASTA or use a pre-indexed database"> | |
47 <option value="fasta" selected="true">FASTA</option> | |
48 <option value="preindexed">Pre-indexed</option> | |
49 </param> | |
50 <when value="fasta"> | |
51 <param name="genomic_fasta" type="data" format="fasta,fasta.gz" label="Genomic sequence (FASTA)" help="Genomic contigs / scaffolds to be aligned against in FASTA format" /> | |
52 <param argument="-k" name="kmer_size" type="integer" min="1" value="6" label="K-mer size" /> | |
53 <param argument="-s" name="submer_size" type="integer" min="1" value="4" label="Submer size" help="Submer size (density: 1/(2*(kmer_size-submer_size)+1))" /> | |
54 <param argument="-b" name="bits_per_block" type="integer" min="1" value="8" label="Bits per block" /> | |
55 </when> | |
56 <when value="preindexed"> | |
57 <!-- refine the datatype here once Miniprot index data type is in Galaxy --> | |
58 <param name="genomic_db" type="data" format="binary" label="Pre-indexed genomic database" help="A pre-indexed database built by miniprot" /> | |
59 </when> | |
60 </conditional> | |
61 <param name="protein_fasta" type="data" format="fasta,fasta.gz" label="Protein sequence (FASTA)" help="Protein sequences to be aligned in FASTA format" /> | |
62 <param name="output_format" type="select" label="Output format" > | |
63 <option value="gff" selected="true">GFF3</option> | |
64 <option value="paf">PAF</option> | |
65 </param> | |
66 <conditional name="adv"> | |
67 <param name="options" type="select" label="Advanced options"> | |
68 <option value="yes">Show</option> | |
69 <option value="no" selected="true">Hide</option> | |
70 </param> | |
71 <when value="yes"> | |
72 <section name="mapping" title="Mapping"> | |
73 <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> | |
74 <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" /> | |
75 <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" /> | |
76 <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" /> | |
77 <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> | |
78 <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> | |
79 <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> | |
80 <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.5" label="Minimum secondary-to-primary score ratio" /> | |
81 <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="100" label="Max secondary alignments to consider" /> | |
82 </section> | |
83 <section name="alignment" title="Alignment"> | |
84 <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> | |
85 <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> | |
86 <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" /> | |
87 <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" /> | |
88 <param argument="-F" name="frameshift" type="integer" min="0" value="15" label="Frameshift penalty" /> | |
89 <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" /> | |
90 </section> | |
91 <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> | |
92 </when> | |
93 <when value="no"> | |
94 </when> | |
95 </conditional> | |
96 </inputs> | |
97 <outputs> | |
98 <data name="output_alignment" format="gff3" label="Miniprot on ${on_string}"> | |
99 <change_format> | |
100 <when input="output_format" value="paf" format="paf" /> | |
101 </change_format> | |
102 </data> | |
103 </outputs> | |
104 <tests> | |
105 <test expect_num_outputs="1"> | |
106 <conditional name="db"> | |
107 <param name="dbtype" value="fasta" /> | |
108 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> | |
109 </conditional> | |
110 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> | |
111 <output name="output_alignment" ftype="gff3"> | |
112 <assert_contents> | |
113 <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" /> | |
114 <has_text text="Parent=MP000372;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> | |
115 </assert_contents> | |
116 </output> | |
117 </test> | |
118 <test expect_num_outputs="1"> | |
119 <conditional name="db"> | |
120 <param name="dbtype" value="fasta" /> | |
121 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> | |
122 </conditional> | |
123 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> | |
124 <param name="output_format" value="paf" /> | |
125 <output name="output_alignment" ftype="paf"> | |
126 <assert_contents> | |
127 <has_text text="tr|O06302|O06302_MYCTU" /> | |
128 <has_text text="cs:Z::29*agcG:3*gtgA:5*ccgA:9*accS:1*gccV:4*cagL:1*gtcS:3*gtcA*gtcI*accA*gccG:8*gccS:2*ggtA:5*gccI*agcG:1*ctgA:4*gccV:5*gggL:1*gtgS:2" /> | |
129 </assert_contents> | |
130 </output> | |
131 </test> | |
132 <test expect_num_outputs="1"> | |
133 <conditional name="db"> | |
134 <param name="dbtype" value="fasta" /> | |
135 <param name="genomic_fasta" value="input_genome.fasta.gz" ftype="fasta" /> | |
136 </conditional> | |
137 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> | |
138 <param name="output_format" value="gff" /> | |
139 <conditional name="adv"> | |
140 <param name="options" value="yes" /> | |
141 <param name="second_round_kmer_size" value="32" /> | |
142 </conditional> | |
143 <output name="output_alignment" ftype="gff3"> | |
144 <assert_contents> | |
145 <has_text text="##gff-version 3" /> | |
146 </assert_contents> | |
147 </output> | |
148 </test> | |
149 </tests> | |
150 <help><![CDATA[ | |
151 miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. | |
152 It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. | |
153 | |
154 **NOTE:** miniprot is in the early stages of development and should be considered experimental at this stage. | |
155 .. _miniprot: https://github.com/lh3/miniprot | |
156 ]]></help> | |
157 </tool> |