Mercurial > repos > iuc > proteinortho
comparison proteinortho.xml @ 7:c5dd4f86d981 draft
planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 5eba1fb52a5ec1e63ca126be42062323e1a76687
author | iuc |
---|---|
date | Tue, 23 Jan 2024 12:21:52 +0000 |
parents | 10112d9127af |
children |
comparison
equal
deleted
inserted
replaced
6:10112d9127af | 7:c5dd4f86d981 |
---|---|
1 <tool id="proteinortho" name="Proteinortho" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="@PROFILE@"> | 1 <tool id="proteinortho" name="Proteinortho" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="@PROFILE@"> |
2 <description>detects orthologous proteins/genes within different species</description> | 2 <description>detects orthologous proteins/genes within different species</description> |
3 <macros> | 3 <macros> |
4 <import>proteinortho_macros.xml</import> | 4 <import>proteinortho_macros.xml</import> |
5 <xml name="test_output_proteinortho" tokens="nlines"> | 5 <xml name="test_output_proteinortho" tokens="nlines" token_nlines_delta="0"> |
6 <output name="proteinortho"> | 6 <output name="proteinortho"> |
7 <metadata name="column_names" value="species,genes,alg.-conn.,L.fasta,C.fasta,E.fasta,M.fasta"/> | 7 <metadata name="column_names" value="species,genes,alg.-conn.,L.fasta,C.fasta,E.fasta,M.fasta"/> |
8 <assert_contents> | 8 <assert_contents> |
9 <has_n_columns n="7"/> | 9 <has_n_columns n="7"/> |
10 <has_n_lines n="@NLINES@"/> | 10 <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/> |
11 <has_line_matching expression="# Species\tGenes\tAlg\.-Conn\.\t.*"/> | 11 <has_line_matching expression="# Species\tGenes\tAlg\.-Conn\.\t.*"/> |
12 <has_line_matching expression="[0-9]+\t[0-9]+\t.*"/> | 12 <has_line_matching expression="[0-9]+\t[0-9]+\t.*"/> |
13 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+.*"/> | 13 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+.*"/> |
14 </assert_contents> | 14 </assert_contents> |
15 </output> | 15 </output> |
16 </xml> | 16 </xml> |
17 <xml name="test_output_blastgraph" tokens="nlines"> | 17 <xml name="test_output_blastgraph" tokens="nlines" token_nlines_delta="0"> |
18 <output name="blastgraph"> | 18 <output name="blastgraph"> |
19 <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba"/> | 19 <metadata name="column_names" value="seqidA,seqidB,evalue_ab,bitscore_ab,evalue_ba,bitscore_ba"/> |
20 <assert_contents> | 20 <assert_contents> |
21 <has_n_columns n="6" comment="#"/> | 21 <has_n_columns n="6" comment="#"/> |
22 <has_n_lines n="@NLINES@"/> | 22 <has_n_lines n="@NLINES@" delta="@NLINES_DELTA@"/> |
23 <has_line_matching expression="# file_a\tfile_b"/> | 23 <has_line_matching expression="# file_a\tfile_b"/> |
24 <has_line_matching expression="# a\tb\tevalue_ab\tbitscore_ab\tevalue_ba\tbitscore_ba"/> | 24 <has_line_matching expression="# a\tb\tevalue_ab\tbitscore_ab\tevalue_ba\tbitscore_ba"/> |
25 <has_line_matching expression="# (C|C2|E|L|M)\.fasta\t(C|C2|E|L|M)\.fasta"/> | 25 <has_line_matching expression="# (C|C2|E|L|M)\.fasta\t(C|C2|E|L|M)\.fasta"/> |
26 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+\t(C|C2|E|L|M)_[0-9]+.*"/> | 26 <has_line_matching expression=".*(C|C2|E|L|M)_[0-9]+\t(C|C2|E|L|M)_[0-9]+.*"/> |
27 </assert_contents> | 27 </assert_contents> |
95 #for $f in $synteny.input_files_syn# | 95 #for $f in $synteny.input_files_syn# |
96 ${re.sub('[^\w\-_.]', '_', f.element_identifier)} | 96 ${re.sub('[^\w\-_.]', '_', f.element_identifier)} |
97 #end for# | 97 #end for# |
98 #end if | 98 #end if |
99 2> >(sed -E "s/.\[([0-9]{1,2}(;[0-9]{1,2})?)?[mGK]//g" 1>&2) | 99 2> >(sed -E "s/.\[([0-9]{1,2}(;[0-9]{1,2})?)?[mGK]//g" 1>&2) |
100 #if $more_options.selfblast: | |
101 && | |
102 mv result.blast-graph_clean result.blast-graph; | |
103 #end if | |
100 #if $synteny.synteny_options == "specified": | 104 #if $synteny.synteny_options == "specified": |
101 && | 105 && |
102 mv result.poff-graph result.proteinortho-graph && | 106 mv result.poff-graph result.proteinortho-graph && |
103 mv result.poff.tsv result.proteinortho.tsv && | 107 mv result.poff.tsv result.proteinortho.tsv && |
104 mv result.poff.html result.proteinortho.html ; | 108 mv result.poff.html result.proteinortho.html ; |
174 </outputs> | 178 </outputs> |
175 <tests> | 179 <tests> |
176 <test expect_num_outputs="3"> <!-- test normal --> | 180 <test expect_num_outputs="3"> <!-- test normal --> |
177 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> | 181 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> |
178 <param name="p" value="diamond"/> | 182 <param name="p" value="diamond"/> |
179 <expand macro="test_output_proteinortho" nlines="34"/> | 183 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="5"/> |
180 <expand macro="test_output_blastgraph" nlines="157"/> | 184 <expand macro="test_output_blastgraph" nlines="156" nlines_delta="20"/> |
181 <expand macro="test_output_proteinorthograph" nlines="134"/> | 185 <expand macro="test_output_proteinorthograph" nlines="139" nlines_delta="20"/> |
182 <assert_command> | 186 <assert_command> |
183 <has_text text="--p=diamond"/> | 187 <has_text text="--p=diamond"/> |
184 </assert_command> | 188 </assert_command> |
185 </test> | 189 </test> |
186 <test expect_num_outputs="3"> <!-- various parameter --> | 190 <test expect_num_outputs="3"> <!-- various parameter --> |
189 <param name="conn" value="1"/> | 193 <param name="conn" value="1"/> |
190 <param name="sim" value="42"/> | 194 <param name="sim" value="42"/> |
191 <section name="more_options"> | 195 <section name="more_options"> |
192 <param name="cov" value="42"/> | 196 <param name="cov" value="42"/> |
193 <param name="identity" value="42"/> | 197 <param name="identity" value="42"/> |
194 <param name="selfblast" value="true"/> | |
195 <param name="singles" value="true"/> | 198 <param name="singles" value="true"/> |
196 <param name="core" value="true"/> | 199 <param name="core" value="true"/> |
197 </section> | 200 </section> |
198 <expand macro="test_output_proteinortho" nlines="177"/> | 201 <expand macro="test_output_proteinortho" nlines="151" nlines_delta="50"/> |
199 <expand macro="test_output_blastgraph" nlines="2720"/> | 202 <expand macro="test_output_blastgraph" nlines="1403" nlines_delta="300"/> |
200 <expand macro="test_output_proteinorthograph" nlines="384"/> | 203 <expand macro="test_output_proteinorthograph" nlines="239" nlines_delta="150"/> |
201 <assert_command> | 204 <assert_command> |
202 <has_text text="--p=diamond"/> | 205 <has_text text="--p=diamond"/> |
203 </assert_command> | 206 </assert_command> |
204 </test> | 207 </test> |
205 <test expect_num_outputs="3"> <!-- synteny --> | 208 <test expect_num_outputs="3"> <!-- synteny --> |
207 <param name="input_files_syn" value="L.gff,C.gff,E.gff,M.gff"/> | 210 <param name="input_files_syn" value="L.gff,C.gff,E.gff,M.gff"/> |
208 <param name="p" value="diamond"/> | 211 <param name="p" value="diamond"/> |
209 <conditional name="synteny"> | 212 <conditional name="synteny"> |
210 <param name="synteny_options" value="specified"/> | 213 <param name="synteny_options" value="specified"/> |
211 </conditional> | 214 </conditional> |
212 <expand macro="test_output_proteinortho" nlines="38"/> | 215 <expand macro="test_output_proteinortho" nlines="38" nlines_delta="20"/> |
213 <expand macro="test_output_blastgraph" nlines="157"/> | 216 <expand macro="test_output_blastgraph" nlines="300" nlines_delta="150"/> |
214 <expand macro="test_output_proteinorthograph" nlines="119" nlines_delta="10" ncolumns="8" add_columns=",same_strand,simscore"/> | 217 <expand macro="test_output_proteinorthograph" nlines="119" nlines_delta="10" ncolumns="8" add_columns=",same_strand,simscore"/> |
215 <assert_command> | 218 <assert_command> |
216 <has_text text="--p=diamond"/> | 219 <has_text text="--p=diamond"/> |
217 </assert_command> | 220 </assert_command> |
218 </test> | 221 </test> |
219 <test expect_num_outputs="3"> <!-- blast --> | 222 <test expect_num_outputs="3"> <!-- blast --> |
220 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> | 223 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> |
221 <param name="p" value="blastp"/> | 224 <param name="p" value="blastp"/> |
222 <expand macro="test_output_proteinortho" nlines="32"/> | 225 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/> |
223 <expand macro="test_output_blastgraph" nlines="158"/> | 226 <expand macro="test_output_blastgraph" nlines="155" nlines_delta="50"/> |
224 <expand macro="test_output_proteinorthograph" nlines="142"/> | 227 <expand macro="test_output_proteinorthograph" nlines="139" nlines_delta="50"/> |
225 <assert_command> | 228 <assert_command> |
226 <has_text text="--p=blastp"/> | 229 <has_text text="--p=blastp"/> |
227 </assert_command> | 230 </assert_command> |
228 </test> | 231 </test> |
229 <test expect_num_outputs="3"> <!-- auto blast --> | 232 <test expect_num_outputs="3"> <!-- auto blast --> |
230 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> | 233 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> |
231 <param name="p" value="autoblast"/> | 234 <param name="p" value="autoblast"/> |
232 <expand macro="test_output_proteinortho" nlines="32"/> | 235 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/> |
233 <expand macro="test_output_blastgraph" nlines="158"/> | 236 <expand macro="test_output_blastgraph" nlines="157" nlines_delta="50"/> |
234 <expand macro="test_output_proteinorthograph" nlines="142"/> | 237 <expand macro="test_output_proteinorthograph" nlines="136" nlines_delta="50"/> |
235 <assert_command> | 238 <assert_command> |
236 <has_text text="--p=autoblast"/> | 239 <has_text text="--p=autoblast"/> |
237 </assert_command> | 240 </assert_command> |
238 </test> | 241 </test> |
239 <test expect_num_outputs="3"> <!-- last --> | 242 <test expect_num_outputs="3"> <!-- last --> |
240 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> | 243 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> |
241 <param name="p" value="lastp"/> | 244 <param name="p" value="lastp"/> |
242 <expand macro="test_output_proteinortho" nlines="34"/> | 245 <expand macro="test_output_proteinortho" nlines="34" nlines_delta="20"/> |
243 <expand macro="test_output_blastgraph" nlines="148"/> | 246 <expand macro="test_output_blastgraph" nlines="148" nlines_delta="50"/> |
244 <expand macro="test_output_proteinorthograph" nlines="133"/> | 247 <expand macro="test_output_proteinorthograph" nlines="134" nlines_delta="50"/> |
245 <assert_command> | 248 <assert_command> |
246 <has_text text="--p=lastp"/> | 249 <has_text text="--p=lastp"/> |
247 </assert_command> | 250 </assert_command> |
248 </test> | 251 </test> |
249 <test expect_num_outputs="3"> <!-- blat --> | 252 <test expect_num_outputs="3"> <!-- blat --> |
250 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> | 253 <param name="input_files" value="L.fasta,C.fasta,E.fasta,M.fasta"/> |
251 <param name="p" value="blastp"/> | 254 <param name="p" value="blastp"/> |
252 <expand macro="test_output_proteinortho" nlines="32"/> | 255 <expand macro="test_output_proteinortho" nlines="33" nlines_delta="20"/> |
253 <expand macro="test_output_blastgraph" nlines="158"/> | 256 <expand macro="test_output_blastgraph" nlines="156" nlines_delta="50"/> |
254 <expand macro="test_output_proteinorthograph" nlines="142"/> | 257 <expand macro="test_output_proteinorthograph" nlines="136" nlines_delta="50"/> |
255 <assert_command> | 258 <assert_command> |
256 <has_text text="--p=blastp"/> | 259 <has_text text="--p=blastp"/> |
257 </assert_command> | 260 </assert_command> |
258 </test> | 261 </test> |
259 </tests> | 262 </tests> |
263 | 266 |
264 Proteinortho is a tool to detect orthologous proteins/genes within different species (at least 2). | 267 Proteinortho is a tool to detect orthologous proteins/genes within different species (at least 2). |
265 | 268 |
266 | It compares similarities of given gene/protein sequences and clusters them to find significant groups. | 269 | It compares similarities of given gene/protein sequences and clusters them to find significant groups. |
267 | The algorithm was designed to handle large-scale data and can be applied to hundreds of species at once. | 270 | The algorithm was designed to handle large-scale data and can be applied to hundreds of species at once. |
268 | Details can be found in (doi:10.1186/1471-2105-12-124). | 271 | Details can be found in (doi:10.1186/1471-2105-12-124 and doi:10.3389/fbinf.2023.1322477). |
269 | To enhance the prediction accuracy, the relative order of genes (synteny) can be used as an additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (details see doi:10.1371/journal.pone.0105015), is already built in Proteinortho. | 272 | To enhance the prediction accuracy, the relative order of genes (synteny) can be used as an additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (details see doi:10.1371/journal.pone.0105015), is already built in Proteinortho. |
270 | 273 |
271 ---- | 274 ---- |
272 | 275 |
273 **Proteinortho in a nutshell** | 276 **Proteinortho in a nutshell** |
351 | 354 |
352 More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho | 355 More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho |
353 | 356 |
354 **Citations:** | 357 **Citations:** |
355 | 358 |
356 - Lechner, Marcus, et al. "Proteinortho: detection of (co-) orthologs in large-scale analysis." BMC bioinformatics 12.1 (2011): 1-9. (10.1186/1471-2105-12-124) | |
357 - Lechner, Marcus, et al. "Orthology detection combining clustering and synteny for very large datasets." PLoS one 9.8 (2014): e105015. (10.1371/journal.pone.0105015) | |
358 | |
359 ]]> | 359 ]]> |
360 </help> | 360 </help> |
361 <expand macro="citations" /> <!--- TODO: citations are not working in usegalxy, therefore they are added manually at the above. --> | 361 <expand macro="citations" /> <!--- TODO: citations are not working in usegalxy, therefore they are added manually at the above. --> |
362 </tool> | 362 </tool> |