view kallisto_quant.xml @ 9:2568a3b975cb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kallisto/ commit 168993a4e148506b1d3998c536caa2e501c36ccf
author iuc
date Wed, 31 May 2023 20:09:49 +0000
parents c971db6f0fe5
children 4f9c4e6566e5
line wrap: on
line source

<tool id="kallisto_quant" name="Kallisto quant" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
    <description>quantify abundances of RNA-Seq transcripts</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">kallisto</xref>
    </xrefs>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
        <![CDATA[
        #if $reference_transcriptome.reference_transcriptome_source == "history":
            ln -s '$reference_transcriptome.reference' reference.fa &&
            kallisto index reference.fa -i reference.kallisto &&
            #set index_path = 'reference.kallisto'
        #else:
            #set index_path = $reference_transcriptome.index.fields.path
        #end if
        kallisto quant -i '$index_path'
            $bias --bootstrap-samples $bootstrap_samples --seed $seed $fusion  $pseudobam
            #if $pseudobam:
                -o .
            #else:
                --threads \${GALAXY_SLOTS:-1}
                -o .
            #end if
            #if str($single_paired.single_paired_selector) == 'single':
                --single
                #set $single_reads = $single_paired.reads
                --fragment-length $single_paired.fragment_length
                --sd $single_paired.sd
                '$single_reads'
            #else:
                $single_paired.lib_type
                #if str($single_paired.collection.collection_selector) == 'datasets':
                    #set $forward_reads = $single_paired.collection.forward
                    #set $reverse_reads = $single_paired.collection.reverse
                #else:
                    #set $forward_reads = $single_paired.collection.reads.forward
                    #set $reverse_reads = $single_paired.collection.reads.reverse
                #end if
                #set $reads = "'%s' '%s'" % ($forward_reads, $reverse_reads)
                $reads
            #end if
            $single_overhang
            #if $genomebam_option.selector
                $genomebam_option.selector
                --gtf $genomebam_option.gtf
                --chromosomes $genomebam_option.chromosomes
            #end if
            #if $pseudobam:
                && samtools sort --no-PG -O bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$pseudobam_output' pseudoalignments.bam
            #end if
            && cat run_info.json
        ]]>
    </command>
    <inputs>
        <expand macro="reference_input" />
        <conditional name="single_paired">
            <param name="single_paired_selector" type="select" label="Single-end or paired reads">
                <option value="single" selected="true">Single-end</option>
                <option value="paired">Paired</option>
            </param>
            <when value="single">
                <param name="reads" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
                <param argument="--fragment-length" type="integer" value="200" label="Average fragment length" help="Illumina typically produces reads of 180-200bp" />
                <param argument="--sd" type="integer" value="20" label="Estimated standard deviation of fragment length" />
            </when>
            <when value="paired">
                <conditional name="collection">
                    <param name="collection_selector" type="select" label="Collection or individual datasets">
                        <option value="datasets" selected="true">Individual files</option>
                        <option value="collection">Pair or list of pairs</option>
                    </param>
                    <when value="datasets">
                        <param name="forward" type="data" format="fastq,fastq.gz" label="Forward reads" />
                        <param name="reverse" type="data" format="fastq,fastq.gz" label="Reverse reads" />
                    </when>
                    <when value="collection">
                        <param name="reads" type="data_collection" format="fastq,fastq.gz" collection_type="paired" label="Collection of reads" />
                    </when>
                </conditional>
                <param name="lib_type" type="select" label="Library strandness information">
                    <option value="">Unstranded</option>
                    <option value="--fr-stranded">Strand specific reads, first read forward</option>
                    <option value="--rf-stranded">Strand specific reads, first read reverse</option>
                </param>
            </when>
        </conditional>
        <param argument="--bias" type="boolean" truevalue="--bias" falsevalue="" label="Perform sequence based bias correction" help="It allows to learn 
            parameters for a model of sequences specific bias and corrects the abundances accordlingly"/>
        <param argument="--bootstrap-samples" type="integer" value="0" label="Number of bootstrap samples" help="Running with bootstraps 
            is mandatory if you want to perform differential expression analysis of isoforms with Sleuth.Default: 0" />
        <param argument="--fusion" type="boolean" truevalue="--fusion" falsevalue="" label="Search for fusions" help="It generates the required files for Pizzly. This option does normal quantification, but 
            additionally looks for reads that do not pseudoalign because they are potentially from fusion genes." />
        <param argument="--single-overhang" type="boolean" truevalue="--single-overhang" falsevalue="" checked="false" label="Single overhang" help="Include reads where 
            unobserved rest of fragment is predicted to lie outside a transcript" />
        <param argument="--pseudobam" type="boolean" truevalue="--pseudobam" falsevalue="" label="Output pseudoalignments in BAM format" />
        <conditional name="genomebam_option">
            <param name="selector" type="select" label="Project pseudoalignments to genome">
                <option value="--genomebam">Enabled</option>
                <option value="" selected="true">Disabled</option>
            </param>
            <when value="--genomebam">
                <param argument="--gtf" type="data" format="gtf" label="GTF file" help="GTF file for transcriptome information" />
                <param argument="--chromosomes" type="data" format="tabular" label="Chromosome names and lengths"/>
            </when>
            <when value=""/>
        </conditional>
        <param argument="--seed" type="integer" value="42" label="Seed for the bootstrap sampling" help="Default: 42" />
    </inputs>
    <outputs>
        <data format="h5" name="abundance_h5" from_work_dir="abundance.h5" label="${tool.name} on ${on_string}: Abundances (HDF5)" />
        <data format="tabular" name="abundance_tab" from_work_dir="abundance.tsv" label="${tool.name} on ${on_string}: Abundances (tabular)" />
        <data format="bam" name="pseudobam_output" from_work_dir="abundance.txt" label="${tool.name} on ${on_string}: Pseudoalignments">
            <filter>pseudobam</filter>
        </data>
        <data format="tabular" name="fusion_output" from_work_dir="fusion.txt" label="${tool.name} on ${on_string}: Pizzly input (tabular)">
            <filter>fusion</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="datasets" />
            <param name="forward" ftype="fastq" value="mm10_chrM-1.f.fq" />
            <param name="reverse" ftype="fastq" value="mm10_chrM-1.r.fq" />
            <output name="abundance_tab" file="kallisto_quant_out1.tab" ftype="tabular" />
        </test>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="datasets" />
            <param name="forward" ftype="fastq" value="mm10_chrM-1.f.fq" />
            <param name="reverse" ftype="fastq" value="mm10_chrM-1.r.fq" />
            <param name="lib_type" value="--fr-stranded"/>
            <output name="abundance_tab" >
                <assert_contents>
                    <has_text_matching expression="chrM\t16299\t15804.2\t37\t1e\+06" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="collection" />
            <param name="reads">
                <collection type="paired">
                    <element name="forward" value="mm10_chrM-1.f.fq" />
                    <element name="reverse" value="mm10_chrM-1.r.fq" />
                </collection>
            </param>
            <output name="abundance_tab" file="kallisto_quant_out2.tab" ftype="tabular" />
        </test>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="mm10_chrM.fa" />
            <param name="single_paired_selector" value="single" />
            <param name="collection_selector" value="collection" />
            <param name="reads" ftype="fastq" value="mm10_chrM-1.f.fq" />
            <output name="abundance_tab" file="kallisto_quant_out3.tab" ftype="tabular" />
        </test>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="felCat8_chrM.fa" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="datasets" />
            <param name="pseudobam" value="true" />
            <param name="forward" ftype="fastq" value="felCat8_chrM_F.fq" />
            <param name="reverse" ftype="fastq" value="felCat8_chrM_R.fq" />
            <output name="abundance_tab" file="kallisto_quant_out4.tab" ftype="tabular" />
            <output name="pseudobam_output" file="kallisto_quant_out4.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test>
            <param name="reference_transcriptome_source" value="cached" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="datasets" />
            <param name="pseudobam" value="true" />
            <param name="forward" ftype="fastq" dbkey="sacCer2" value="sacCer2_chrM_F.fq.gz" />
            <param name="reverse" ftype="fastq" dbkey="sacCer2" value="sacCer2_chrM_R.fq" />
            <output name="abundance_tab" file="kallisto_quant_out5.tab" ftype="tabular" />
            <output name="pseudobam_output" file="kallisto_quant_out5.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="hg38_transcripts.fa" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="datasets" />
            <param name="fusion" value="true" />
            <param name="forward" ftype="fastq" dbkey="hg38" value="hg38_F.fq.gz" />
            <param name="reverse" ftype="fastq" dbkey="hg38" value="hg38_R.fq.gz" />
            <output name="fusion_output" file="fusion.txt" ftype="tabular" />
        </test>
        <test>
            <param name="reference_transcriptome_source" value="history" />
            <param name="reference" ftype="fasta" value="transcripts.fasta" />
            <param name="single_paired_selector" value="paired" />
            <param name="collection_selector" value="datasets" />
            <param name="forward" ftype="fastq" value="reads_forward.fastq.gz" />
            <param name="reverse" ftype="fastq" value="reads_reverse.fastq.gz" />
            <conditional name="genomebam_option">
                <param name="selector" value="--genomebam"/>
                <param name="gtf" value="annotation.gtf.gz"/>
                <param name="chromosomes" value="chromosome_size.tabular"/>
            </conditional>
            <output name="abundance_tab" file="kallisto_quant_out7.tab" ftype="tabular" />
            <assert_command>
                <has_text text="--genomebam" />
                <has_text text="--chromosomes" />
                <has_text text="--gtf" />
            </assert_command>
        </test>
    </tests>
    <help>
 <![CDATA[
 kallisto is a program for quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. It is based on the novel idea of pseudoalignment for rapidly determining the compatibility of reads with targets, without the need for alignment. On benchmarks with standard RNA-Seq data, kallisto can quantify 30 million human reads in less than 3 minutes on a Mac desktop computer using only the read sequences and a transcriptome index that itself takes less than 10 minutes to build. Pseudoalignment of reads preserves the key information needed for quantification, and kallisto is therefore not only fast, but also as accurate as existing quantification tools. In fact, because the pseudoalignment procedure is robust to errors in the reads, in many benchmarks kallisto significantly outperforms existing tools.
 ]]>
     </help>
    <expand macro="citations" />
</tool>