view minced.xml @ 0:20f12e7c279b draft

Imported from capsule None
author bgruening
date Mon, 05 May 2014 04:23:26 -0400
parents
children 6117712107bf
line wrap: on
line source

<tool id="crispr_recognition_tool" name="detect CRISPR sequences" version="0.1.5">
    <description>(minced)</description>
    <parallelism method="multi" split_inputs="input" split_mode="to_size" split_size="50" merge_outputs="output,output_region,output_ggf_full,output_ggf,output_fasta" />
    <requirements>
        <requirement type="package" version="0.1.5">minced</requirement>
        <requirement type="set_environment">MINCED_SCRIPT_PATH</requirement>
    </requirements>
    <stdio>
        <exit_code range=":-1" level="fatal" />
        <exit_code range="1:" level="fatal" />
    </stdio>
    <command>
        temp_output=\$(mktemp);
        minced
            -minNR $minNR
            -minRL $minRL
            -maxRL $maxRL
            -minSL $minSL
            -maxSL $maxSL
            #if str($searchWL) != 'none':
                -searchWL $searchWL
            #end if
            $input
            \$temp_output
            #if '-spacers' in str($outputs).split():
                -spacers
            #end if 
            #if '-gffFull' in str($outputs).split():
                -gffFull
            #end if
            #if '-gff' in str($outputs).split():
                -gff
            #end if
            ;

            python \$MINCED_SCRIPT_PATH/reformat.py \$temp_output results.bed results.tab;
            #if '-spacers' in str($outputs).split():
                mv \$temp_output'_spacers.fa' $output_fasta;
            #end if
    </command>
    <inputs>
        <param format="fasta" name="input" type="data" label="Input sequence as FASTA"/>

        <param name="minNR" type="integer" value="3" label="Minimum number of repeats a CRISPER must have"/>
        <param name="minRL" type="integer" value="19" label="Minimum length of a CRISPER repeated region"/>
        <param name="maxRL" type="integer" value="38" label="Maximum length of a CRISPER repeated region"/>
        <param name="minSL" type="integer" value="19" label="Minimum length of CRISPER non repeated region" help="or spacer region"/>
        <param name="maxSL" type="integer" value="48" label="Maximum length of CRISPER non repeated region" help="or spacer region"/>
        <param name="searchWL" type="select" label="Length of search window to used to discover CRISPERs">
            <option value="none" selected="true">default</option>
            <option value="6">6</option>
            <option value="7">7</option>
            <option value="8">8</option>
            <option value="9">9</option>
        </param>

        <param name="outputs" type="select" multiple="True" label="Additional output format" help="Please select a output format.">
            <option value="-gff">summary results in gff format</option>
            <option value="BED" selected="true">summary results in BED format</option>
            <option value="-gffFull">detailed results in gff format</option>
            <option value="TABULAR" selected="true">detailed results in tabular format</option>
            <option value="-spacers">fasta formatted file containing the spacers</option>
            <validator type="no_options" message="Please select at least one output file." />
        </param>

    </inputs>
    <outputs>
        <data format="tabular" name="output" from_work_dir="results.tab" label="${tool.name} on ${on_string}">
            <filter>'TABULAR' in outputs</filter>
        </data>
        <data format="bed" name="output_region" from_work_dir="results.bed" label="${tool.name} on ${on_string} - BED file">
            <filter>'BED' in outputs</filter>
        </data>
        <data format="gff3" name="output_ggf_full" from_work_dir="results_full.gff" label="${tool.name} on ${on_string} - GFF file extended">
            <filter>'-gffFull' in outputs</filter>
        </data>
        <data format="gff3" name="output_gff" from_work_dir="results.gff" label="${tool.name} on ${on_string} - GFF file">
            <filter>'-gff' in outputs</filter>
        </data>
        <data format="fasta" name="output_fasta" label="${tool.name} on ${on_string} - GFF file">
            <filter>'-spacers' in outputs</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input" value="sequence.fasta" ftype="fasta" />
            <param name="minNR" value="3" />
            <param name="minRL" value="29" />
            <param name="maxRL" value="38" />
            <param name="minSL" value="19" />
            <param name="maxSL" value="48" />
            <param name="searchWL" value="none" />
            <output name="output" file="minced.tabular" ftype="tabular" />
            <output name="output_region" file="minced.bed" ftype="bed" />
        </test>
    </tests>
    <help>
**What it does**

MinCED - Mining CRISPRs in Environmental Datasets


MinCED is a program to find Clustered Regularly Interspaced Short Palindromic
Repeats (CRISPRs) in full genomes or environmental datasets such as metagenomes,
in which sequence size can be anywhere from 100 to 800 bp. MinCED runs from the
command-line and was derived from CRT (http://www.room220.com/crt/).

https://github.com/ctSkennerton/minced

**Citation**

For the underlying tool, please cite `Bland C, Ramsey TL, Sabree F, Lowe M, Brown K, Kyrpides NC, Hugenholtz P:
CRISPR Recognition Tool (CRT): a tool for automatic detection of clustered regularly interspaced palindromic repeats. BMC Bioinformatics. 2007 Jun 18;8(1):209`

If you use this tool in Galaxy, please cite Gruening, BA et al. https://github.com/bgruening/galaxytools

    </help>
</tool>