view CLIFinder.xml @ 17:3e3370387441 draft

"planemo upload for repository commit 94d5cab008ee4422a8ba63468532d3bb552abcd5"
author clifinder
date Fri, 14 Feb 2020 17:03:24 -0500
parents feecd33c8390
children f25d12179c6c
line wrap: on
line source

<tool name="CLIFinder" id="CLIFinder" version="0.5.0" profile="16.01">
    <description>Find chimerics transcripts containing LINEs sequences</description>
        <xml name="source_bwa" token_arg="Argument" token_build="Build argument" token_ref="">
            <conditional name="source">
                <param name="source" type="select" label="Will you select the reference database from your history or use a built-in index?">
                    <option value="indexed">Use a built-in index</option>
                    <option value="history">Use one from the history</option>
                <when value="indexed">
                    <param name="indices" argument="@ARG@" type="select" label="Select @REF@">
                        <options from_data_table="bwa_mem_indexes">
                            <filter type="sort_by" column="2" />
                            <validator type="no_options" message="No indexes are available" />
                <when value="history">
                    <param name="file" argument="@ARG@" type="data" format="fasta" label="Select @REF@ from history"  help="We will also use @BUILD@"/>
        <xml name="source_blast" token_arg="Argument" token_build="Build argument" token_ref="">
            <conditional name="source">
                <param name="source" type="select" label="Will you select the reference database from your history or use a built-in index?">
                    <option value="indexed">Use a built-in index</option>
                    <option value="history">Generate one from the history</option>
                    <option value="url">Download one from some URL</option>
                <when value="indexed">
                    <param name="indices" argument="@ARG@" type="select" label="Select @REF@">
                        <options from_data_table="blastdb">
                            <filter type="sort_by" column="2" />
                            <validator type="no_options" message="No indexes are available" />
                <when value="history">
                    <param name="file" argument="@ARG@" type="data" format="fasta" label="Select @REF@ from history"  help="We will also use @BUILD@"/>
                <when value="url">
                    <param name="file" argument="@ARG@" type="text" label="Download @REF@ from URL"  help="We will not use @BUILD@: please provide link to tar.gz"/>
        <requirement type="package" version="1.9">samtools</requirement>
        <requirement type="package" version="2.26.0gx">bedtools</requirement>
        <requirement type="package" version="4.0.9_p2">repeatmasker</requirement>
        <requirement type="package" version="0.7.17">bwa</requirement>
        <requirement type="package" version="0.0.14">fastx_toolkit</requirement>
        <requirement type="package" version="1.20.1">wget</requirement>
        <requirement type="package" version="5.26.2">perl</requirement>
        <requirement type="package" version="2.50">perl-getopt-long</requirement>
        <requirement type="package" version="0.45">perl-file-copy-recursive</requirement>
        <requirement type="package" version="2.02">perl-parallel-forkmanager</requirement>
        <requirement type="package" version="0.34">perl-statistics-r</requirement>
        <requirement type="package" version="3.5.1">r-base</requirement>
        <requirement type="package" version="1.8.5">r-plyr</requirement>
        <requirement type="package" version="1.34.0">bioconductor-genomicranges</requirement>
    <version_command>perl '$__tool_directory__/script/' --version | head -n 1 | grep 'version' | cut -d ' ' -f 3</version_command>
    <command detect_errors="aggressive"><![CDATA[
perl '$__tool_directory__/script/'

    #if str($inputs.custom) == 'true'
        #for $x in $inputs.fastq
            --first '$x.first'
            --name '$'
            --second '$x.second'
        #end for
        #for $x in $inputs.fastq
            --first '$x.first'
            --name '$'
            --second '$x.second'
        #end for
    #end if

    #if $genome.source.source == "history"
    --ref '$genome.source.file'
    --ref '$genome.source.indices.fields.path'
    #end if

    #if $te.source.source == "history"
    --TE '$te.source.file'
    --TE '$te.source.indices.fields.path'
    #end if

    #if str($ == 'true'
        #if $rnadb.blast.source.source == "indexed"
        --rnadb '$rnadb.blast.source.indices.fields.path'
        --rnadb '$rnadb.blast.source.file'
        #end if
        #if $rnadb.blast.source.source == "history"
        #end if
    #end if

    #if str($ == 'true'
        #if $estdb.blast.source.source == "indexed"
        --estdb '$estdb.blast.source.indices.fields.path'
        --estdb '$estdb.blast.source.file'
        #end if
        #if $estdb.blast.source.source == "history"
        #end if
    #end if

    --rmsk '$rmsk'
    --refseq '$refseq'
    --html '$chimerae'
    --html_path '${chimerae.files_path}'
    --size_insert '$size_insert'
    --size_read '$size'
    --min_unique '$min_unique'
    --BDir '$BDir'
    --min_L1 '$min_L1'
    --mis_L1 '$mis_L1'
    --threads "\${GALAXY_SLOTS:-4}"
        <conditional name="inputs">
            <param name="custom" type="select" label="Use custom name for the input sequence files?">
                <option value="true">Yes</option>
                <option value="false" selected="true">No: the names will be extracted automatically</option>
            <when value="true">
                <repeat name="fastq" title="Input sequences" min="1">
                    <param argument="--first" type="data" format="fastqsanger" label="First set of paired-end reads"/>
                    <param argument="--name" type="text" value="" label="Label for the input sequences"/>
                    <param argument="--second" type="data" format="fastqsanger" label="Second set of paired-end reads"/>
            <when value="false">
                <repeat name="fastq" title="Input sequences" min="1">
                    <param argument="--first" type="data" format="fastqsanger" label="First set of paired-end reads"/>
                    <param argument="--second" type="data" format="fastqsanger" label="Second set of paired-end reads"/>
        <section name="genome" title="Reference genome" expanded="true">
            <expand macro="source_bwa" arg="--ref" build="--build_ref" ref="a reference genome"/>
        <section name="te" title="Transposable Elements" expanded="true">
            <expand macro="source_bwa" arg="--TE" build="--build_TE" ref="reference TE sequences"/>
        <section name="rnadb" title="RNA Blast database" expanded="true">
            <conditional name="blast">
                <param name="run" type="select" label="Should blast be ran?">
                    <option value="true">Yes</option>
                    <option value="false">No</option>
                <when value="true">
                    <expand macro="source_blast" arg="--rnadb" build="--build_rnadb" ref="reference RNA sequences"/>
                <when value="false" />
        <section name="estdb" title="EST Blast database" expanded="true">
            <conditional name="blast">
                <param name="run" type="select" label="Should blast be ran?">
                    <option value="true">Yes</option>
                    <option value="false">No</option>
                <when value="true">
                    <expand macro="source_blast" arg="--estdb" build="--build_estdb" ref="reference EST sequences"/>
                <when value="false" />
        <param argument="--rmsk" name="rmsk" type="data" format="tabular" label="Tab-delimited text file (with headers) containing reference repeat sequences (e.g. rmsk track from UCSC)"/>
        <param argument="--refseq" name="refseq" type="data" format="tabular" label="Tab-delimited file (with headers) containing reference genes (e.g. RefGene.txt from UCSC)"/>
        <param name="BDir" type="select" >
            <option value="0">Undirectional libraries</option>
            <option value="1">TEs sequences in first read in pair</option>
            <option value="2">TEs sequences in second read in pair</option>
        <param argument="--size_read" name="size" type="integer" value="100" label="Reads size"/>
        <param argument="--size_insert" name="size_insert" type="integer" value="250" label="Maximum insert size (bp)"/>
        <param argument="--min_L1" name="min_L1" type="integer" value="50" label="Minimun bp mapping on selected TEs database"/>
        <param argument="--mis_L1" name="mis_L1" type="integer" value="2" label="Number of mismatches tolerated in TEs mapping sequences"/>
        <param argument="--min_unique" name="min_unique" type="integer" value="33" label="minimum consecutive bp corresponding to a unique sequence"/>
        <data format="html" name="chimerae" label="${}_on_${on_string}"/>
            <conditional name="inputs">
                <param name="custom" value="true"/>
                <repeat name="fastq">
                    <param name="first" value="one.fastq" ftype="fastqsanger" />
                    <param name="name" value="test"/>
                    <param name="second" value="two.fastq" ftype="fastqsanger" />
            <section name="genome">
                <conditional name="source">
                    <param name="source" value="history" />
                    <param name="file" value ="genome.fa" />
            <section name="te">
                <conditional name="source">
                    <param name="source" value="history" />
                    <param name="file" value ="TE.fa" />
            <section name="rnadb">
                <conditional name="blast">
                    <param name="run" value="true" />
                    <conditional name="source">
                        <param name="source" value="history" />
                        <param name="file" value ="rna-small.fa.gz" />
            <section name="estdb">
                <conditional name="blast">
                    <param name="run" value="true" />
                    <conditional name="source">
                        <param name="source" value="history" />
                        <param name="file" value ="est-small.fa.gz" />
            <param name="rmsk" value="rmsk-small.txt" />
            <param name="refseq" value="refseq-small.txt" />
            <param name="BDir" value="0" />
            <param name="size" value="100" />
            <param name="size_insert" value="500" />
            <param name="min_L1" value="30" />
            <param name="mis_L1" value="6" />
            <param name="min_unique" value="30" />
            <output name="chimerae" file="res.html" compare="diff" lines_diff="0">
                <extra_files type="file" name="results.txt" value="res_files/results.txt" compare="diff" />
                <extra_files type="file" name="first_results.txt" value="res_files/first_results.txt" compare="diff" />
                <extra_files type="file" name="final_result_chimerae.txt" value="res_files/final_result_chimerae.txt" compare="diff" />
**CLIFinder version 0.5.0**

**Usage:** --first [first fastq of paired-end set 1] --name [name 1] --second [second fastq of paired-end set 1] [--first [first fastq of paired-end set 2] --name [name 2] --second [second fastq of paired-end set 2] ...] --ref [reference genome] [--build_ref] --TE [transposable elements] [--build_TE] --html [results.html] --html-path [results directory][options]

  --first [fastq]     First fastq file to process from paired-end set

  --name [name]       Name of the content to process

  --second [fastq]    Second fastq file to process from paired-end set

  --ref [reference]   Fasta file containing the reference genome

  --TE [TE]           Fasta file containing the transposable elements

  --rmsk [txt file]   Tab-delimited text file (with headers) containing reference repeat sequences (e.g. rmsk track from UCSC)

  --refseq [txt file] Tab-delimited file (with headers) containing reference genes (e.g. RefGene.txt from UCSC)

  --html [file]       Main HTML file where results will be displayed

  --html-path [path]  Folder where results will be stored

For any fasta file, if a bwa index is not provided, you should build it through the corresponding *--build_[element]* argument

    --rnadb [RNA db]    Blast database with RNA sequences (optional)
    --estdb [EST db]    Blast database with RNA sequences (optional)

    --size_read [INT]   Size of reads

    --BDir [0|1|2]      Orientation of reads (0: undirectional libraries, 1: TEs sequences in first read in pair, 2: TEs sequences in second read in pair)

    --size_insert [INT] Maximum size of insert tolerated between R1 and R2 for alignment on the reference genome

    --min_L1 [INT]       Minimum number of bp matching for L1 mapping

    --mis_L1 [INT]      Maximum number of mismatches tolerated for L1 mapping

    --min_unique [INT]  Number of consecutive bp not annotated by RepeatMasker

    --threads [INT]     Number of threads (default: 1)

For Blast database files, if a fasta is provided, the database can be built with '--build_[db]'. Otherwise, provide a path or URL. "tar(.gz)" files are acceptable, as well as wild card (rna*) URLs.

        <citation type="doi">10.1093/bioinformatics/btx671</citation>