view repeatmodeler.xml @ 0:4f0c878b36d4 draft

"planemo upload commit 5c6a5c0f9aacbc7def652b33cc35ee37aa543d05-dirty"
author csbl
date Tue, 24 Nov 2020 04:14:46 +0000
parents
children dda44fd49bcd
line wrap: on
line source

<tool id="repeatmodeler" name="RepeatModeler - Model repetitive DNA" version="0.1.0" python_template_version="3.5">
    <requirements>
        <requirement type="package" version="2.0.1">repeatmodeler</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        BuildDatabase -name '$name' '$input_file' && RepeatModeler -database '$name' -pa '$pa' && cp '$name'-families.fa '$output'
    ]]></command>
    <inputs>
        <param type="data" name="input_file" format="fasta" label="Input genome fasta"/>
        <param argument="-name" type="text" value="" label="Title for building database" />
        <param argument="-pa" type="text" value="" label="Numer of paralleled job: # of nodes" />
    </inputs>
    <outputs>
      <!-- <data format="fasta" name="RepeatModels" from_work_dir="*-families.fa" label="${tool.name} on ${on_string}: RepeatModels::FASTA" /> -->
      <!-- <data format="txt" name="StockholmFormat" from_work_dir="*-families.stk" label="${tool.name} on ${on_string}: RepeatModels::StockholmFormat" /> -->
      <data format="fasta" name="output" label="${tool.name} on ${on_string}: RepeatModels::FASTA" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" value="eco.fasta" ftype="fasta"/>
            <param name="name" value="eco" />
            <param name="pa" value="4" />
            <output name="output" file="consensi.fa.classified" compare="sim_size" delta_frac="0.1" />
        </test>
    </tests>
    <help><![CDATA[
      RepeatModeler - 2.0.1

      NAME
          RepeatModeler - Model repetitive DNA

      SYNOPSIS
            RepeatModeler [-options] -database <XDF Database>

      DESCRIPTION
          The options are:

          -h(elp)
              Detailed help

          -database <DBNAME>
              The name of the sequence database to run an analysis on. This is the
              name that was provided to the BuildDatabase script using the "-name"
              option.

          -pa #
              Specify the number of parallel search jobs to run. RMBlast jobs will
              use 4 cores each and ABBlast jobs will use a single core each. i.e.
              on a machine with 12 cores and running with RMBlast you would use
              -pa 3 to fully utilize the machine.

          -recoverDir <Previous Output Directory>
              If a run fails in the middle of processing, it may be possible
              recover some results and continue where the previous run left off.
              Simply supply the output directory where the results of the failed
              run were saved and the program will attempt to recover and continue
              the run.

          -srand #
              Optionally set the seed of the random number generator to a known
              value before the batches are randomly selected ( using Fisher Yates
              Shuffling ). This is only useful if you need to reproduce the sample
              choice between runs. This should be an integer number.

          -LTRStruct [optional]
              Run the LTR structural discovery pipeline ( LTR_Harvest and
              LTR_retreiver ) and combine results with the RepeatScout/RECON
              pipeline. [optional]

          -genomeSampleSizeMax #
              Optionally change the maximum bp of the genome to sample in all
              rounds of RECON (default=243000000).

      CONFIGURATION OVERRIDES
          -ltr_retriever_dir <string>
              The path to the installation of the LTR_Retriever structural LTR
              analysis package.

          -rmblast_dir <string>
              The path to the installation of the RMBLAST sequence alignment
              program.

          -repeatmasker_dir <string>
              The path to the installation of RepeatMasker.

          -trf_prgm <string>
              The full path including the name for the TRF program ( 4.0.9 or
              higher )

          -ninja_dir <string>
              The path to the installation of the Ninja phylogenetic analysis
              package.

          -recon_dir <string>
              The path to the installation of the RECON de-novo repeatfinding
              program.

          -genometools_dir <string>
              The path to the installation of the GenomeTools package.

          -abblast_dir <string>
              The path to the installation of the ABBLAST sequence alignment
              program.

          -rscout_dir <string>
              The path to the installation of the RepeatScout ( 1.0.6 or higher )
              de-novo repeatfinding program.

          -mafft_dir <string>
              The path to the installation of the MAFFT multiple alignment
              program.

          -cdhit_dir <string>
              The path to the installation of the CD-Hit sequence clustering
              package.

      SEE ALSO
              RepeatMasker, RMBlast

      COPYRIGHT
           Copyright 2005-2019 Institute for Systems Biology

      AUTHOR
           RepeatModeler:
             Robert Hubley <rhubley@systemsbiology.org>
             Arian Smit <asmit@systemsbiology.org>

           LTR Pipeline Extensions:
             Jullien Michelle Flynn <jmf422@cornell.edu>
    ]]></help>
    <citations>
      <citation type="doi">10.1073/pnas.1921046117</citation>
      <citation type="doi">10.1186/s13059-018-1577-z</citation>
    </citations>
</tool>