comparison miniprot.xml @ 1:ce04c239454b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/miniprot commit fbe24c1f66f23448d48a61c84a93cb73e0dbc779
author iuc
date Fri, 23 Sep 2022 22:35:23 +0000
parents ef712a5e9834
children d518cf04b55c
comparison
equal deleted inserted replaced
0:ef712a5e9834 1:ce04c239454b
24 -E $adv.alignment.gap_extension 24 -E $adv.alignment.gap_extension
25 -J $adv.alignment.intron_open 25 -J $adv.alignment.intron_open
26 -C $adv.alignment.non_canonical_splice 26 -C $adv.alignment.non_canonical_splice
27 -F $adv.alignment.frameshift 27 -F $adv.alignment.frameshift
28 -B $adv.alignment.end_bonus 28 -B $adv.alignment.end_bonus
29 #if str($adv.output.prefix) != 'MP'
30 -P '$adv.output.prefix'
31 #end if
32 $adv.output.print_unmapped_proteins
33 --outn=$adv.output.outputs_per_query
29 #end if 34 #end if
30 #if str($db.dbtype) == 'fasta' 35 #if str($db.dbtype) == 'fasta'
31 '$db.genomic_fasta' 36 '$db.genomic_fasta'
32 -k $db.kmer_size 37 -k $db.kmer_size
33 -s $db.submer_size 38 -s $db.submer_size
71 <when value="yes"> 76 <when value="yes">
72 <section name="mapping" title="Mapping"> 77 <section name="mapping" title="Mapping">
73 <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" /> 78 <param argument="-S" name="no_splicing" type="boolean" truevalue="-S" falsevalue="" checked="false" label="No splicing" help="No splicing (apply -G1000 -J1000 -e1000)" />
74 <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" /> 79 <param argument="-c" name="max_kmer" type="integer" min="1" value="50000" label="Max k-mer occurences" />
75 <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" /> 80 <param argument="-G" name="max_intron" type="integer" min="0" value="200000" label="Max intron size" />
81 <!-- the -w option is mentioned in the help text but apparently not implmented: https://github.com/lh3/miniprot/issues/12 -->
82 <!-- <param argument="-w" name="log_gap_penalty_weight" type="float" value="0.75" label="Log gap penalty weight" /> -->
76 <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" /> 83 <param argument="-n" name="min_syncmers" type="integer" min="1" value="5" label="Minimum number of syncmers in a chain" />
77 <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" /> 84 <param argument="-m" name="min_chain_score" type="integer" min="0" value="0" label="Minimum chaining score" />
78 <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" /> 85 <param argument="-l" name="second_round_kmer_size" type="integer" min="1" value="5" label="K-mer size for second round of chaining" />
79 <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" /> 86 <param argument="-e" name="max_extension" type="integer" min="0" value="10000" label="Max extension for second round of chaining" />
80 <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.5" label="Minimum secondary-to-primary score ratio" /> 87 <param argument="-p" name="score_ratio" type="float" min="0" max="1" value="0.7" label="Minimum secondary-to-primary score ratio" />
81 <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="100" label="Max secondary alignments to consider" /> 88 <param argument="-N" name="max_secondary_alignments" type="integer" min="0" value="50" label="Max secondary alignments to consider" />
82 </section> 89 </section>
83 <section name="alignment" title="Alignment"> 90 <section name="alignment" title="Alignment">
84 <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" /> 91 <param argument="-O" name="gap_open" type="integer" min="0" value="11" label="Gap open penalty" />
85 <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" /> 92 <param argument="-E" name="gap_extension" type="integer" min="0" value="1" label="Gap extension penalty" help="A k-long gap costs open_penalty+k*extension_penalty" />
86 <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" /> 93 <param argument="-J" name="intron_open" type="integer" min="0" value="31" label="Intron open penalty" />
87 <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" /> 94 <param argument="-C" name="non_canonical_splice" type="integer" min="0" value="11" label="Penalty for non-canonical splicing" />
88 <param argument="-F" name="frameshift" type="integer" min="0" value="15" label="Frameshift penalty" /> 95 <param argument="-F" name="frameshift" type="integer" min="0" value="17" label="Frameshift penalty" />
89 <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" /> 96 <param argument="-B" name="end_bonus" type="integer" min="0" value="5" label="End bonus" />
97 </section>
98 <section name="output" title="Output">
99 <param argument="-P" name="prefix" type="text" label="Prefix for IDs in GFF3 output" value="MP">
100 <sanitizer invalid_char="">
101 <valid initial="string.ascii_letters,string.digits">
102 <add value="_" />
103 <add value="-" />
104 </valid>
105 </sanitizer>
106 </param>
107 <param argument="-u" name="print_unmapped_proteins" type="boolean" truevalue="-u" falsevalue="" label="Print unmapped proteins" checked="false" />
108 <param argument="--outn" name="outputs_per_query" type="integer" min="0" value="100" label="Outputs per query" help="The number of outputs will be the minimum of this and the max secondary alignments option" />
90 </section> 109 </section>
91 <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" /> 110 <param argument="-K" name="query_batch_size" type="integer" min="1" value="2000000" label="Query batch size" />
92 </when> 111 </when>
93 <when value="no"> 112 <when value="no">
94 </when> 113 </when>
109 </conditional> 128 </conditional>
110 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" /> 129 <param name="protein_fasta" value="input_query.fasta.gz" ftype="fasta" />
111 <output name="output_alignment" ftype="gff3"> 130 <output name="output_alignment" ftype="gff3">
112 <assert_contents> 131 <assert_contents>
113 <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" /> 132 <has_text text="ID=MP000001;Identity=1.0000;Positive=1.0000;Target=tr|O06302|O06302_MYCTU 1 126" />
114 <has_text text="Parent=MP000372;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" /> 133 <has_text text="ID=MP000359;Identity=0.9811;Positive=1.0000;Target=tr|V5QPR5|V5QPR5_MYCTU 1 53" />
115 </assert_contents> 134 </assert_contents>
116 </output> 135 </output>
117 </test> 136 </test>
118 <test expect_num_outputs="1"> 137 <test expect_num_outputs="1">
119 <conditional name="db"> 138 <conditional name="db">
149 </tests> 168 </tests>
150 <help><![CDATA[ 169 <help><![CDATA[
151 miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift. 170 miniprot_ rapidly aligns a protein sequence against a genome with affine gap penalty, splicing and frameshift.
152 It is primarily intended for annotating protein-coding genes in a new species using known genes from other species. 171 It is primarily intended for annotating protein-coding genes in a new species using known genes from other species.
153 172
154 **NOTE:** miniprot is in the early stages of development and should be considered experimental at this stage. 173 While an index of the genome to be mapped to can be built "on the fly", the Miniprot index tool can pre-index a genome
174 and will result in faster performance if the genome index is reused multiple times.
175
176 For details of the algorithm and some insight into how parameters can be tuned see this overview_.
177
155 .. _miniprot: https://github.com/lh3/miniprot 178 .. _miniprot: https://github.com/lh3/miniprot
179 .. _overview: https://github.com/lh3/miniprot#algorithm-overview
156 ]]></help> 180 ]]></help>
181 <citations>
182 <citation type="bibtex"><![CDATA[
183 @misc{Li2022,
184 author = {Li, Heng},
185 title = {miniprot},
186 year = {2022},
187 publisher = {GitHub},
188 journal = {GitHub repository},
189 howpublished = {\url{https://github.com/lh3/miniprot}},
190 commit = {b442b7a6b60dbd15f460ea9af75fa0b7293d4a8c}
191 }
192 ]]></citation>
193 </citations>
157 </tool> 194 </tool>