comparison repeatmodeler.xml @ 1:dda44fd49bcd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmodeler commit a4bb321c4a8bd6e8d331df6ed840e00d1c4599f2"
author iuc
date Thu, 26 Aug 2021 13:25:32 +0000
parents 4f0c878b36d4
children 41bfbaf3c959
comparison
equal deleted inserted replaced
0:4f0c878b36d4 1:dda44fd49bcd
1 <tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5"> 1 <tool id="repeatmodeler" name="RepeatModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>Model repetitive DNA</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
2 <requirements> 6 <requirements>
3 <requirement type="package" version="2.0.1">repeatmodeler</requirement> 7 <expand macro="requirements" />
4 </requirements> 8 </requirements>
5 <command detect_errors="exit_code"><![CDATA[ 9 <command detect_errors="exit_code"><![CDATA[
6 BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output' 10 BuildDatabase -name 'rmdb' '$input_file'
11
12 &&
13
14 ## "RMBlast jobs will use 4 cores each"
15 pa=\$(( (\${GALAXY_SLOTS:-1}+3)/4 ))
16
17 &&
18
19 RepeatModeler -database 'rmdb' -pa \$pa
7 ]]></command> 20 ]]></command>
8 <inputs> 21 <inputs>
9 <param type="data" name="input_file" format="fasta" label="Input genome fasta"/> 22 <param type="data" name="input_file" format="fasta" label="Input genome fasta"/>
10 <param argument="-name" type="text" value="" label="Title for building database" />
11 <param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" />
12 </inputs> 23 </inputs>
13 <outputs> 24 <outputs>
14 <!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> --> 25 <data format="fasta" name="sequences" from_work_dir="rmdb-families.fa" label="${tool.name} on ${on_string}: consensus sequences" />
15 <!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> --> 26 <data format="stockholm" name="seeds" from_work_dir="rmdb-families.stk" label="${tool.name} on ${on_string}: seed alignments" />
16 <data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" />
17 </outputs> 27 </outputs>
18 <tests> 28 <tests>
19 <test> 29 <test>
20 <param name="input_file" value="eco.fasta" ftype="fasta"/> 30 <param name="input_file" value="eco.fasta.gz" ftype="fasta.gz"/>
21 <param name="name" value="eco" /> 31 <param name="name" value="eco" />
22 <param name="pa" value="4" /> 32 <param name="pa" value="4" />
23 <output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" /> 33 <output name="sequences" ftype="fasta">
34 <assert_contents>
35 <has_text text="( RepeatScout Family Size ="/>
36 <has_text text="rnd-1_family-0"/>
37 </assert_contents>
38 </output>
39 <output name="seeds" ftype="stockholm">
40 <assert_contents>
41 <has_text text="#=GF DE RepeatModeler Generated"/>
42 </assert_contents>
43 </output>
24 </test> 44 </test>
25 </tests> 45 </tests>
26 <help><![CDATA[ 46 <help><![CDATA[
27 RepeatModeler - 2.0.1 47 RepeatModeler is a de novo transposable element (TE) family identification and modeling package. At the heart of RepeatModeler are three de-novo repeat finding programs ( RECON, RepeatScout and LtrHarvest/Ltr_retriever ) which employ complementary computational methods for identifying repeat element boundaries and family relationships from sequence data.
28 48
29 NAME 49 RepeatModeler assists in automating the runs of the various algorithms given a genomic database, clustering redundant results, refining and classifying the families and producing a high quality library of TE families suitable for use with RepeatMasker and ultimately for submission to the Dfam database (http://dfam.org).
30 RepeatModeler - Model repetitive DNA
31
32 SYNOPSIS
33 RepeatModeler [-options] -database <XDF Database>
34
35 DESCRIPTION
36 The options are:
37
38 -h(elp)
39 Detailed help
40
41 -database <DBNAME>
42 The name of the sequence database to run an analysis on. This is the
43 name that was provided to the BuildDatabase script using the "-name"
44 option.
45
46 -pa #
47 Specify the number of parallel search jobs to run. RMBlast jobs will
48 use 4 cores each and ABBlast jobs will use a single core each. i.e.
49 on a machine with 12 cores and running with RMBlast you would use
50 -pa 3 to fully utilize the machine.
51
52 -recoverDir <Previous Output Directory>
53 If a run fails in the middle of processing, it may be possible
54 recover some results and continue where the previous run left off.
55 Simply supply the output directory where the results of the failed
56 run were saved and the program will attempt to recover and continue
57 the run.
58
59 -srand #
60 Optionally set the seed of the random number generator to a known
61 value before the batches are randomly selected ( using Fisher Yates
62 Shuffling ). This is only useful if you need to reproduce the sample
63 choice between runs. This should be an integer number.
64
65 -LTRStruct [optional]
66 Run the LTR structural discovery pipeline ( LTR_Harvest and
67 LTR_retreiver ) and combine results with the RepeatScout/RECON
68 pipeline. [optional]
69
70 -genomeSampleSizeMax #
71 Optionally change the maximum bp of the genome to sample in all
72 rounds of RECON (default=243000000).
73
74 CONFIGURATION OVERRIDES
75 -ltr_retriever_dir <string>
76 The path to the installation of the LTR_Retriever structural LTR
77 analysis package.
78
79 -rmblast_dir <string>
80 The path to the installation of the RMBLAST sequence alignment
81 program.
82
83 -repeatmasker_dir <string>
84 The path to the installation of RepeatMasker.
85
86 -trf_prgm <string>
87 The full path including the name for the TRF program ( 4.0.9 or
88 higher )
89
90 -ninja_dir <string>
91 The path to the installation of the Ninja phylogenetic analysis
92 package.
93
94 -recon_dir <string>
95 The path to the installation of the RECON de-novo repeatfinding
96 program.
97
98 -genometools_dir <string>
99 The path to the installation of the GenomeTools package.
100
101 -abblast_dir <string>
102 The path to the installation of the ABBLAST sequence alignment
103 program.
104
105 -rscout_dir <string>
106 The path to the installation of the RepeatScout ( 1.0.6 or higher )
107 de-novo repeatfinding program.
108
109 -mafft_dir <string>
110 The path to the installation of the MAFFT multiple alignment
111 program.
112
113 -cdhit_dir <string>
114 The path to the installation of the CD-Hit sequence clustering
115 package.
116
117 SEE ALSO
118 RepeatMasker, RMBlast
119
120 COPYRIGHT
121 Copyright 2005-2019 Institute for Systems Biology
122
123 AUTHOR
124 RepeatModeler:
125 Robert Hubley <rhubley@systemsbiology.org>
126 Arian Smit <asmit@systemsbiology.org>
127
128 LTR Pipeline Extensions:
129 Jullien Michelle Flynn <jmf422@cornell.edu>
130 ]]></help> 50 ]]></help>
131 <citations> 51 <expand macro="citations" />
132 <citation type="doi">10.1073/pnas.1921046117</citation>
133 <citation type="doi">10.1186/s13059-018-1577-z</citation>
134 </citations>
135 </tool> 52 </tool>