Mercurial > repos > mvdbeek > bam_readtagger

<tool id="findcluster" name="Find clusters of reads" version="0.4.9">
    <description>in bam files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="0.4.9">readtagger</requirement>
    </requirements>
    <version_command>findcluster --version</version_command>
    <command detect_errors="aggressive"><![CDATA[
        #import re
        #set sample_name = re.sub('[^\w\-_\.\,]', '_', str( $input.element_identifier))
        ln -f -s $input input.bam &&
        ln -f -s $input.metadata.bam_index input.bam.bai &&
        findcluster
        --input_path input.bam
        #if $transposon_source.ref_file:
            #if str($transposon_source.reference_source_selector) == "history":
                --transposon_reference_fasta '$transposon_source.ref_file'
            #else :
                --transposon_bwa_index '$transposon_source.ref_file.fields.path'
            #end if
        #end if
        #if $genome_source.ref_file:
            #if str($genome_source.reference_source_selector) == "history":
                --genome_reference_fasta '$genome_source.ref_file'
            #else :
                --genome_bwa_index '$genome_source.ref_file.fields.path'
            #end if
        #end if
        --output_bam '$output_bam'
        --output_gff '$output_gff'
        --output_fasta '$output_fasta'
        --sample_name '$sample_name'
        --threads "\${GALAXY_SLOTS:-2}"
    ]]></command>
    <inputs>
        <param name="input" argument="--input_path" type="data" format="bam"/>

        <expand macro="reference_source_conditional" reference_type="transposon"/>
        <expand macro="reference_source_conditional" reference_type="genome"/>
    </inputs>
    <outputs>
        <data name="output_bam" format="bam" label="findcluster BAM on $on_string"/>
        <data name="output_fasta" format="fasta" label="findcluster contigs on $on_string"/>
        <data name="output_gff" format="gff3" label="findcluster GFF on $on_string"/>
    </outputs>
    <tests>
        <test>
            <param name="input" value="extended_and_annotated_roi.bam" ftype="bam"/>
            <output name="output_bam" file="three_cluster_out.bam" ftype="bam" lines_diff="2"/>
            <output name="output_gff" file="three_cluster_out.gff" ftype="gff3" lines_diff="0" sort="True"/>
        </test>
        <test>
            <param name="input" value="extended_and_annotated_roi.bam" ftype="bam"/>
            <param name="transposon_source|reference_source_selector" value="history"/>
            <param name="transposon_source|ref_file" value="reference.fasta" ftype="fasta"/>
            <output name="output_bam" file="three_cluster_out.bam" ftype="bam" lines_diff="2"/>
            <output name="output_gff">
                <assert_contents>
                    <has_text text="FBti0019066_rover_Gypsy" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. code-block::

    Usage: findcluster [OPTIONS]

      Find clusters of reads that support a TE insertion.

    Options:
      --input_path PATH               Find cluster in this BAM file.
      --region TEXT                   Find clusters in this Region (Format is
                                      chrX:2000-1000).
      --max_proper_pair_size INTEGER  Maximum proper pairs size. If not given will
                                      be inferred from the data.
      --output_bam PATH               Write out BAM file with cluster information
                                      to this path. Reads will have an additional
                                      "CD" tag to indicate the cluster number
      --output_gff PATH               Write out GFF file with cluster information
                                      to this path.
      --output_fasta PATH             Write out supporting evidence for clusters
                                      to this path.
      --sample_name TEXT              Sample name to use when writing out clusters
                                      in GFF file. Default is to infer the name
                                      from the input filename.
      --include_duplicates / --no-include_duplicates
                                      Include reads marked as duplicates when
                                      finding clusters.
      --transposon_reference_fasta TEXT
                                      Transposon fasta to align clipped reads to.
                                      Not necessary if BWA index is provided.
      --transposon_bwa_index TEXT     Transposon BWA index to align clipped reads
                                      to
      --genome_reference_fasta TEXT   Genome fasta to align clipped reads to. Not
                                      necessary if BWA index is provided.
      --genome_bwa_index TEXT         Genome BWA index to align clipped reads to
      --threads INTEGER RANGE         Threads to use for cap3 assembly step
      --shm_dir PATH                  Path to shared memory folder
      --version                       Show the version and exit.
      --help                          Show this message and exit.


]]></help>
</tool>
author	mvdbeek
date	Thu, 15 Mar 2018 07:20:58 -0400
parents	f6311a04a6c9
children	4b1ebfcd0f46