repeatmodeler: repeatmodeler.xml comparison

comparison repeatmodeler.xml @ 1:dda44fd49bcd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmodeler commit a4bb321c4a8bd6e8d331df6ed840e00d1c4599f2"

author	iuc
date	Thu, 26 Aug 2021 13:25:32 +0000
parents	4f0c878b36d4
children	41bfbaf3c959

comparison

equal deleted inserted replaced

-:4f0c878b36d4
+:dda44fd49bcd
-<tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5">
+<tool id="repeatmodeler" name="RepeatModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+<description>Model repetitive DNA</description>
+<macros>
+<import>macros.xml</import>
+</macros>
 <requirements>
-<requirement type="package" version="2.0.1">repeatmodeler</requirement>
+<expand macro="requirements" />
 </requirements>
 <command detect_errors="exit_code"><![CDATA[
-BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output'
+BuildDatabase -name 'rmdb' '$input_file'
+&&
+## "RMBlast jobs will use 4 cores each"
+pa=\$(( (\${GALAXY_SLOTS:-1}+3)/4 ))
+&&
+RepeatModeler -database 'rmdb' -pa \$pa
 ]]></command>
 <inputs>
 <param type="data" name="input_file" format="fasta" label="Input genome fasta"/>
-<param argument="-name" type="text" value="" label="Title for building database" />
-<param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" />
 </inputs>
 <outputs>
-<!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> -->
+<data format="fasta" name="sequences" from_work_dir="rmdb-families.fa" label="${tool.name} on ${on_string}: consensus sequences" />
-<!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> -->
+<data format="stockholm" name="seeds" from_work_dir="rmdb-families.stk" label="${tool.name} on ${on_string}: seed alignments" />
-<data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" />
 </outputs>
 <tests>
 <test>
-<param name="input_file" value="eco.fasta" ftype="fasta"/>
+<param name="input_file" value="eco.fasta.gz" ftype="fasta.gz"/>
 <param name="name" value="eco" />
 <param name="pa" value="4" />
-<output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" />
+<output name="sequences" ftype="fasta">
+<assert_contents>
+<has_text text="( RepeatScout Family Size ="/>
+<has_text text="rnd-1_family-0"/>
+</assert_contents>
+</output>
+<output name="seeds" ftype="stockholm">
+<assert_contents>
+<has_text text="#=GF DE    RepeatModeler Generated"/>
+</assert_contents>
+</output>
 </test>
 </tests>
 <help><![CDATA[
-RepeatModeler - 2.0.1
+RepeatModeler is a de novo transposable element (TE) family identification and modeling package. At the heart of RepeatModeler are three de-novo repeat finding programs ( RECON, RepeatScout and LtrHarvest/Ltr_retriever ) which employ complementary computational methods for identifying repeat element boundaries and family relationships from sequence data.
-NAME
+RepeatModeler assists in automating the runs of the various algorithms given a genomic database, clustering redundant results, refining and classifying the families and producing a high quality library of TE families suitable for use with RepeatMasker and ultimately for submission to the Dfam database (http://dfam.org).
-RepeatModeler - Model repetitive DNA
-SYNOPSIS
-RepeatModeler [-options] -database <XDF Database>
-DESCRIPTION
-The options are:
--h(elp)
-Detailed help
--database <DBNAME>
-The name of the sequence database to run an analysis on. This is the
-name that was provided to the BuildDatabase script using the "-name"
-option.
--pa #
-Specify the number of parallel search jobs to run. RMBlast jobs will
-use 4 cores each and ABBlast jobs will use a single core each. i.e.
-on a machine with 12 cores and running with RMBlast you would use
--pa 3 to fully utilize the machine.
--recoverDir <Previous Output Directory>
-If a run fails in the middle of processing, it may be possible
-recover some results and continue where the previous run left off.
-Simply supply the output directory where the results of the failed
-run were saved and the program will attempt to recover and continue
-the run.
--srand #
-Optionally set the seed of the random number generator to a known
-value before the batches are randomly selected ( using Fisher Yates
-Shuffling ). This is only useful if you need to reproduce the sample
-choice between runs. This should be an integer number.
--LTRStruct [optional]
-Run the LTR structural discovery pipeline ( LTR_Harvest and
-LTR_retreiver ) and combine results with the RepeatScout/RECON
-pipeline. [optional]
--genomeSampleSizeMax #
-Optionally change the maximum bp of the genome to sample in all
-rounds of RECON (default=243000000).
-CONFIGURATION OVERRIDES
--ltr_retriever_dir <string>
-The path to the installation of the LTR_Retriever structural LTR
-analysis package.
--rmblast_dir <string>
-The path to the installation of the RMBLAST sequence alignment
-program.
--repeatmasker_dir <string>
-The path to the installation of RepeatMasker.
--trf_prgm <string>
-The full path including the name for the TRF program ( 4.0.9 or
-higher )
--ninja_dir <string>
-The path to the installation of the Ninja phylogenetic analysis
-package.
--recon_dir <string>
-The path to the installation of the RECON de-novo repeatfinding
-program.
--genometools_dir <string>
-The path to the installation of the GenomeTools package.
--abblast_dir <string>
-The path to the installation of the ABBLAST sequence alignment
-program.
--rscout_dir <string>
-The path to the installation of the RepeatScout ( 1.0.6 or higher )
-de-novo repeatfinding program.
--mafft_dir <string>
-The path to the installation of the MAFFT multiple alignment
-program.
--cdhit_dir <string>
-The path to the installation of the CD-Hit sequence clustering
-package.
-SEE ALSO
-RepeatMasker, RMBlast
-COPYRIGHT
-Copyright 2005-2019 Institute for Systems Biology
-AUTHOR
-RepeatModeler:
-Robert Hubley <rhubley@systemsbiology.org>
-Arian Smit <asmit@systemsbiology.org>
-LTR Pipeline Extensions:
-Jullien Michelle Flynn <jmf422@cornell.edu>
 ]]></help>
-<citations>
+<expand macro="citations" />
-<citation type="doi">10.1073/pnas.1921046117</citation>
-<citation type="doi">10.1186/s13059-018-1577-z</citation>
-</citations>
 </tool>

Mercurial > repos > csbl > repeatmodeler

comparison repeatmodeler.xml @ 1:dda44fd49bcd draft