comparison proteinortho.xml @ 7:c5dd4f86d981 draft

planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 5eba1fb52a5ec1e63ca126be42062323e1a76687
author iuc
date Tue, 23 Jan 2024 12:21:52 +0000
parents 10112d9127af
children
comparison
equal deleted inserted replaced
6:10112d9127af 7:c5dd4f86d981
1 <tool id="proteinortho" name="Proteinortho" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="@PROFILE@"> 1 <tool id="proteinortho" name="Proteinortho" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="@PROFILE@">
2 <description>detects orthologous proteins/genes within different species</description> 2 <description>detects orthologous proteins/genes within different species</description>
3 <macros> 3 <macros>
4 <import>proteinortho_macros.xml</import> 4 <import>proteinortho_macros.xml</import>
5 <xml name="test_output_proteinortho" tokens="nlines"> 5 <xml name="test_output_proteinortho" tokens="nlines" token_nlines_delta="0">
6 <output name="proteinortho"> 6 <output name="proteinortho">
7 <metadata name="column_names" value="species,genes,alg.-conn.,L.fasta,C.fasta,E.fasta,M.fasta"/> 7 <metadata name="column_names" value="species,genes,alg.-conn.,L.fasta,C.fasta,E.fasta,M.fasta"/>
8 <assert_contents> 8 <assert_contents>
9 <has_n_columns n="7"/> 9 <has_n_columns n="7"/>
10 <has_n_lines n="@NLINES@"/> 10 <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/>
11 <has_line_matching expression="# Species\tGenes\tAlg\.-Conn\.\t.*"/> 11 <has_line_matching expression="# Species\tGenes\tAlg\.-Conn\.\t.*"/>
12 <has_line_matching expression="[0-9]+\t[0-9]+\t.*"/> 12 <has_line_matching expression="[0-9]+\t[0-9]+\t.*"/>
13 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+.*"/> 13 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+.*"/>
14 </assert_contents> 14 </assert_contents>
15 </output> 15 </output>
16 </xml> 16 </xml>
17 <xml name="test_output_blastgraph" tokens="nlines"> 17 <xml name="test_output_blastgraph" tokens="nlines" token_nlines_delta="0">
18 <output name="blastgraph"> 18 <output name="blastgraph">
19 <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba"/> 19 <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba"/>
20 <assert_contents> 20 <assert_contents>
21 <has_n_columns n="6" comment="#"/> 21 <has_n_columns n="6" comment="#"/>
22 <has_n_lines n="@NLINES@"/> 22 <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/>
23 <has_line_matching expression="# file_a\tfile_b"/> 23 <has_line_matching expression="# file_a\tfile_b"/>
24 <has_line_matching expression="# a\tb\tevalue_ab\tbitscore_ab\tevalue_ba\tbitscore_ba"/> 24 <has_line_matching expression="# a\tb\tevalue_ab\tbitscore_ab\tevalue_ba\tbitscore_ba"/>
25 <has_line_matching expression="# (C|C2|E|L|M)\.fasta\t(C|C2|E|L|M)\.fasta"/> 25 <has_line_matching expression="# (C|C2|E|L|M)\.fasta\t(C|C2|E|L|M)\.fasta"/>
26 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+\t(C|C2|E|L|M)_[0-9]+.*"/> 26 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+\t(C|C2|E|L|M)_[0-9]+.*"/>
27 </assert_contents> 27 </assert_contents>
95 #for $f in $synteny.input_files_syn# 95 #for $f in $synteny.input_files_syn#
96 ${re.sub('[^\w\-_.]', '_', f.element_identifier)} 96 ${re.sub('[^\w\-_.]', '_', f.element_identifier)}
97 #end for# 97 #end for#
98 #end if 98 #end if
99 2> >(sed -E "s/.\[([0-9]{1,2}(;[0-9]{1,2})?)?[mGK]//g" 1>&2) 99 2> >(sed -E "s/.\[([0-9]{1,2}(;[0-9]{1,2})?)?[mGK]//g" 1>&2)
100 #if $more_options.selfblast:
101 &&
102 mv result.blast-graph_clean result.blast-graph;
103 #end if
100 #if $synteny.synteny_options == "specified": 104 #if $synteny.synteny_options == "specified":
101 && 105 &&
102 mv result.poff-graph result.proteinortho-graph && 106 mv result.poff-graph result.proteinortho-graph &&
103 mv result.poff.tsv result.proteinortho.tsv && 107 mv result.poff.tsv result.proteinortho.tsv &&
104 mv result.poff.html result.proteinortho.html ; 108 mv result.poff.html result.proteinortho.html ;
174 </outputs> 178 </outputs>
175 <tests> 179 <tests>
176 <test expect_num_outputs="3"> <!-- test normal --> 180 <test expect_num_outputs="3"> <!-- test normal -->
177 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> 181 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
178 <param name="p" value="diamond"/> 182 <param name="p" value="diamond"/>
179 <expand macro="test_output_proteinortho" nlines="34"/> 183 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="5"/>
180 <expand macro="test_output_blastgraph" nlines="157"/> 184 <expand macro="test_output_blastgraph" nlines="156" nlines_delta="20"/>
181 <expand macro="test_output_proteinorthograph" nlines="134"/> 185 <expand macro="test_output_proteinorthograph" nlines="139" nlines_delta="20"/>
182 <assert_command> 186 <assert_command>
183 <has_text text="--p=diamond"/> 187 <has_text text="--p=diamond"/>
184 </assert_command> 188 </assert_command>
185 </test> 189 </test>
186 <test expect_num_outputs="3"> <!-- various parameter --> 190 <test expect_num_outputs="3"> <!-- various parameter -->
189 <param name="conn" value="1"/> 193 <param name="conn" value="1"/>
190 <param name="sim" value="42"/> 194 <param name="sim" value="42"/>
191 <section name="more_options"> 195 <section name="more_options">
192 <param name="cov" value="42"/> 196 <param name="cov" value="42"/>
193 <param name="identity" value="42"/> 197 <param name="identity" value="42"/>
194 <param name="selfblast" value="true"/>
195 <param name="singles" value="true"/> 198 <param name="singles" value="true"/>
196 <param name="core" value="true"/> 199 <param name="core" value="true"/>
197 </section> 200 </section>
198 <expand macro="test_output_proteinortho" nlines="177"/> 201 <expand macro="test_output_proteinortho" nlines="151" nlines_delta="50"/>
199 <expand macro="test_output_blastgraph" nlines="2720"/> 202 <expand macro="test_output_blastgraph" nlines="1403" nlines_delta="300"/>
200 <expand macro="test_output_proteinorthograph" nlines="384"/> 203 <expand macro="test_output_proteinorthograph" nlines="239" nlines_delta="150"/>
201 <assert_command> 204 <assert_command>
202 <has_text text="--p=diamond"/> 205 <has_text text="--p=diamond"/>
203 </assert_command> 206 </assert_command>
204 </test> 207 </test>
205 <test expect_num_outputs="3"> <!-- synteny --> 208 <test expect_num_outputs="3"> <!-- synteny -->
207 <param name="input_files_syn" value="L.gff,C.gff,E.gff,M.gff"/> 210 <param name="input_files_syn" value="L.gff,C.gff,E.gff,M.gff"/>
208 <param name="p" value="diamond"/> 211 <param name="p" value="diamond"/>
209 <conditional name="synteny"> 212 <conditional name="synteny">
210 <param name="synteny_options" value="specified"/> 213 <param name="synteny_options" value="specified"/>
211 </conditional> 214 </conditional>
212 <expand macro="test_output_proteinortho" nlines="38"/> 215 <expand macro="test_output_proteinortho" nlines="38" nlines_delta="20"/>
213 <expand macro="test_output_blastgraph" nlines="157"/> 216 <expand macro="test_output_blastgraph" nlines="300" nlines_delta="150"/>
214 <expand macro="test_output_proteinorthograph" nlines="119" nlines_delta="10" ncolumns="8" add_columns=",same_strand,simscore"/> 217 <expand macro="test_output_proteinorthograph" nlines="119" nlines_delta="10" ncolumns="8" add_columns=",same_strand,simscore"/>
215 <assert_command> 218 <assert_command>
216 <has_text text="--p=diamond"/> 219 <has_text text="--p=diamond"/>
217 </assert_command> 220 </assert_command>
218 </test> 221 </test>
219 <test expect_num_outputs="3"> <!-- blast --> 222 <test expect_num_outputs="3"> <!-- blast -->
220 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> 223 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
221 <param name="p" value="blastp"/> 224 <param name="p" value="blastp"/>
222 <expand macro="test_output_proteinortho" nlines="32"/> 225 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/>
223 <expand macro="test_output_blastgraph" nlines="158"/> 226 <expand macro="test_output_blastgraph" nlines="155" nlines_delta="50"/>
224 <expand macro="test_output_proteinorthograph" nlines="142"/> 227 <expand macro="test_output_proteinorthograph" nlines="139" nlines_delta="50"/>
225 <assert_command> 228 <assert_command>
226 <has_text text="--p=blastp"/> 229 <has_text text="--p=blastp"/>
227 </assert_command> 230 </assert_command>
228 </test> 231 </test>
229 <test expect_num_outputs="3"> <!-- auto blast --> 232 <test expect_num_outputs="3"> <!-- auto blast -->
230 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> 233 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
231 <param name="p" value="autoblast"/> 234 <param name="p" value="autoblast"/>
232 <expand macro="test_output_proteinortho" nlines="32"/> 235 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/>
233 <expand macro="test_output_blastgraph" nlines="158"/> 236 <expand macro="test_output_blastgraph" nlines="157" nlines_delta="50"/>
234 <expand macro="test_output_proteinorthograph" nlines="142"/> 237 <expand macro="test_output_proteinorthograph" nlines="136" nlines_delta="50"/>
235 <assert_command> 238 <assert_command>
236 <has_text text="--p=autoblast"/> 239 <has_text text="--p=autoblast"/>
237 </assert_command> 240 </assert_command>
238 </test> 241 </test>
239 <test expect_num_outputs="3"> <!-- last --> 242 <test expect_num_outputs="3"> <!-- last -->
240 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> 243 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
241 <param name="p" value="lastp"/> 244 <param name="p" value="lastp"/>
242 <expand macro="test_output_proteinortho" nlines="34"/> 245 <expand macro="test_output_proteinortho" nlines="34" nlines_delta="20"/>
243 <expand macro="test_output_blastgraph" nlines="148"/> 246 <expand macro="test_output_blastgraph" nlines="148" nlines_delta="50"/>
244 <expand macro="test_output_proteinorthograph" nlines="133"/> 247 <expand macro="test_output_proteinorthograph" nlines="134" nlines_delta="50"/>
245 <assert_command> 248 <assert_command>
246 <has_text text="--p=lastp"/> 249 <has_text text="--p=lastp"/>
247 </assert_command> 250 </assert_command>
248 </test> 251 </test>
249 <test expect_num_outputs="3"> <!-- blat --> 252 <test expect_num_outputs="3"> <!-- blat -->
250 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> 253 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/>
251 <param name="p" value="blastp"/> 254 <param name="p" value="blastp"/>
252 <expand macro="test_output_proteinortho" nlines="32"/> 255 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/>
253 <expand macro="test_output_blastgraph" nlines="158"/> 256 <expand macro="test_output_blastgraph" nlines="156" nlines_delta="50"/>
254 <expand macro="test_output_proteinorthograph" nlines="142"/> 257 <expand macro="test_output_proteinorthograph" nlines="136" nlines_delta="50"/>
255 <assert_command> 258 <assert_command>
256 <has_text text="--p=blastp"/> 259 <has_text text="--p=blastp"/>
257 </assert_command> 260 </assert_command>
258 </test> 261 </test>
259 </tests> 262 </tests>
263 266
264 Proteinortho is a tool to detect orthologous proteins/genes within different species (at least 2). 267 Proteinortho is a tool to detect orthologous proteins/genes within different species (at least 2).
265 268
266 | It compares similarities of given gene/protein sequences and clusters them to find significant groups. 269 | It compares similarities of given gene/protein sequences and clusters them to find significant groups.
267 | The algorithm was designed to handle large-scale data and can be applied to hundreds of species at once. 270 | The algorithm was designed to handle large-scale data and can be applied to hundreds of species at once.
268 | Details can be found in (doi:10.1186/1471-2105-12-124). 271 | Details can be found in (doi:10.1186/1471-2105-12-124 and doi:10.3389/fbinf.2023.1322477).
269 | To enhance the prediction accuracy, the relative order of genes (synteny) can be used as an additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (details see doi:10.1371/journal.pone.0105015), is already built in Proteinortho. 272 | To enhance the prediction accuracy, the relative order of genes (synteny) can be used as an additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (details see doi:10.1371/journal.pone.0105015), is already built in Proteinortho.
270 273
271 ---- 274 ----
272 275
273 **Proteinortho in a nutshell** 276 **Proteinortho in a nutshell**
351 354
352 More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho 355 More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho
353 356
354 **Citations:** 357 **Citations:**
355 358
356 - Lechner, Marcus, et al. "Proteinortho: detection of (co-) orthologs in large-scale analysis." BMC bioinformatics 12.1 (2011): 1-9. (10.1186/1471-2105-12-124)
357 - Lechner, Marcus, et al. "Orthology detection combining clustering and synteny for very large datasets." PLoS one 9.8 (2014): e105015. (10.1371/journal.pone.0105015)
358
359 ]]> 359 ]]>
360 </help> 360 </help>
361 <expand macro="citations" /> <!--- TODO: citations are not working in usegalxy, therefore they are added manually at the above. --> 361 <expand macro="citations" /> <!--- TODO: citations are not working in usegalxy, therefore they are added manually at the above. -->
362 </tool> 362 </tool>