comparison kallisto_quant.xml @ 9:2568a3b975cb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kallisto/ commit 168993a4e148506b1d3998c536caa2e501c36ccf
author iuc
date Wed, 31 May 2023 20:09:49 +0000
parents c971db6f0fe5
children 4f9c4e6566e5
comparison
equal deleted inserted replaced
8:c971db6f0fe5 9:2568a3b975cb
1 <?xml version="1.0"?>
2 <tool id="kallisto_quant" name="Kallisto quant" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> 1 <tool id="kallisto_quant" name="Kallisto quant" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
3 <description>- quantify abundances of RNA-Seq transcripts</description> 2 <description>quantify abundances of RNA-Seq transcripts</description>
4 <macros> 3 <macros>
5 <import>macros.xml</import> 4 <import>macros.xml</import>
6 </macros> 5 </macros>
6 <xrefs>
7 <xref type="bio.tools">kallisto</xref>
8 </xrefs>
7 <expand macro="requirements" /> 9 <expand macro="requirements" />
8 <command detect_errors="exit_code"> 10 <command detect_errors="exit_code">
9 <![CDATA[ 11 <![CDATA[
10 #if $reference_transcriptome.reference_transcriptome_source == "history": 12 #if $reference_transcriptome.reference_transcriptome_source == "history":
11 ln -s '$reference_transcriptome.reference' reference.fa && 13 ln -s '$reference_transcriptome.reference' reference.fa &&
13 #set index_path = 'reference.kallisto' 15 #set index_path = 'reference.kallisto'
14 #else: 16 #else:
15 #set index_path = $reference_transcriptome.index.fields.path 17 #set index_path = $reference_transcriptome.index.fields.path
16 #end if 18 #end if
17 kallisto quant -i '$index_path' 19 kallisto quant -i '$index_path'
18 $bias --bootstrap-samples $bootstrap_samples --seed $seed $fusion $lib_type $pseudobam 20 $bias --bootstrap-samples $bootstrap_samples --seed $seed $fusion $pseudobam
19 #if $pseudobam: 21 #if $pseudobam:
20 -o . 22 -o .
21 #else: 23 #else:
22 --threads \${GALAXY_SLOTS:-1} 24 --threads \${GALAXY_SLOTS:-1}
23 -o . 25 -o .
27 #set $single_reads = $single_paired.reads 29 #set $single_reads = $single_paired.reads
28 --fragment-length $single_paired.fragment_length 30 --fragment-length $single_paired.fragment_length
29 --sd $single_paired.sd 31 --sd $single_paired.sd
30 '$single_reads' 32 '$single_reads'
31 #else: 33 #else:
34 $single_paired.lib_type
32 #if str($single_paired.collection.collection_selector) == 'datasets': 35 #if str($single_paired.collection.collection_selector) == 'datasets':
33 #set $forward_reads = $single_paired.collection.forward 36 #set $forward_reads = $single_paired.collection.forward
34 #set $reverse_reads = $single_paired.collection.reverse 37 #set $reverse_reads = $single_paired.collection.reverse
35 #else: 38 #else:
36 #set $forward_reads = $single_paired.collection.reads.forward 39 #set $forward_reads = $single_paired.collection.reads.forward
37 #set $reverse_reads = $single_paired.collection.reads.reverse 40 #set $reverse_reads = $single_paired.collection.reads.reverse
38 #end if 41 #end if
39 #set $reads = "'%s' '%s'" % ($forward_reads, $reverse_reads) 42 #set $reads = "'%s' '%s'" % ($forward_reads, $reverse_reads)
40 $reads 43 $reads
44 #end if
45 $single_overhang
46 #if $genomebam_option.selector
47 $genomebam_option.selector
48 --gtf $genomebam_option.gtf
49 --chromosomes $genomebam_option.chromosomes
41 #end if 50 #end if
42 #if $pseudobam: 51 #if $pseudobam:
43 && samtools sort --no-PG -O bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$pseudobam_output' pseudoalignments.bam 52 && samtools sort --no-PG -O bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$pseudobam_output' pseudoalignments.bam
44 #end if 53 #end if
45 && cat run_info.json 54 && cat run_info.json
52 <option value="single" selected="true">Single-end</option> 61 <option value="single" selected="true">Single-end</option>
53 <option value="paired">Paired</option> 62 <option value="paired">Paired</option>
54 </param> 63 </param>
55 <when value="single"> 64 <when value="single">
56 <param name="reads" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> 65 <param name="reads" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
57 <param name="fragment_length" argument="--fragment-length" type="integer" value="200" label="Average fragment length" help="Illumina typically produces reads of 180-200bp" /> 66 <param argument="--fragment-length" type="integer" value="200" label="Average fragment length" help="Illumina typically produces reads of 180-200bp" />
58 <param argument="--sd" type="integer" value="20" label="Estimated standard deviation of fragment length" /> 67 <param argument="--sd" type="integer" value="20" label="Estimated standard deviation of fragment length" />
59 </when> 68 </when>
60 <when value="paired"> 69 <when value="paired">
61 <conditional name="collection"> 70 <conditional name="collection">
62 <param name="collection_selector" type="select" label="Collection or individual datasets"> 71 <param name="collection_selector" type="select" label="Collection or individual datasets">
69 </when> 78 </when>
70 <when value="collection"> 79 <when value="collection">
71 <param name="reads" type="data_collection" format="fastq,fastq.gz" collection_type="paired" label="Collection of reads" /> 80 <param name="reads" type="data_collection" format="fastq,fastq.gz" collection_type="paired" label="Collection of reads" />
72 </when> 81 </when>
73 </conditional> 82 </conditional>
83 <param name="lib_type" type="select" label="Library strandness information">
84 <option value="">Unstranded</option>
85 <option value="--fr-stranded">Strand specific reads, first read forward</option>
86 <option value="--rf-stranded">Strand specific reads, first read reverse</option>
87 </param>
74 </when> 88 </when>
75 </conditional> 89 </conditional>
76 <param argument="--bias" type="boolean" truevalue="--bias" falsevalue="" label="Perform sequence based bias correction" /> 90 <param argument="--bias" type="boolean" truevalue="--bias" falsevalue="" label="Perform sequence based bias correction" help="It allows to learn
77 <param name="bootstrap_samples" argument="--bootstrap-samples" type="integer" value="0" label="Number of bootstrap samples" help="default: 0" /> 91 parameters for a model of sequences specific bias and corrects the abundances accordlingly"/>
78 <param argument="--seed" type="integer" value="42" label="Seed for the bootstrap sampling" help="default: 42" /> 92 <param argument="--bootstrap-samples" type="integer" value="0" label="Number of bootstrap samples" help="Running with bootstraps
79 <param argument="--fusion" type="boolean" truevalue="--fusion" falsevalue="" label="Search for fusions" help="for Pizzly" /> 93 is mandatory if you want to perform differential expression analysis of isoforms with Sleuth.Default: 0" />
80 <param name="lib_type" type="select" label="Library strandness information"> 94 <param argument="--fusion" type="boolean" truevalue="--fusion" falsevalue="" label="Search for fusions" help="It generates the required files for Pizzly. This option does normal quantification, but
81 <option value="">Unstranded</option> 95 additionally looks for reads that do not pseudoalign because they are potentially from fusion genes." />
82 <option value="--fr-stranded">Strand specific reads, first read forward</option> 96 <param argument="--single-overhang" type="boolean" truevalue="--single-overhang" falsevalue="" checked="false" label="Single overhang" help="Include reads where
83 <option value="--rf-stranded">Strand specific reads, first read reverse</option> 97 unobserved rest of fragment is predicted to lie outside a transcript" />
84 </param>
85 <param argument="--pseudobam" type="boolean" truevalue="--pseudobam" falsevalue="" label="Output pseudoalignments in BAM format" /> 98 <param argument="--pseudobam" type="boolean" truevalue="--pseudobam" falsevalue="" label="Output pseudoalignments in BAM format" />
99 <conditional name="genomebam_option">
100 <param name="selector" type="select" label="Project pseudoalignments to genome">
101 <option value="--genomebam">Enabled</option>
102 <option value="" selected="true">Disabled</option>
103 </param>
104 <when value="--genomebam">
105 <param argument="--gtf" type="data" format="gtf" label="GTF file" help="GTF file for transcriptome information" />
106 <param argument="--chromosomes" type="data" format="tabular" label="Chromosome names and lengths"/>
107 </when>
108 <when value=""/>
109 </conditional>
110 <param argument="--seed" type="integer" value="42" label="Seed for the bootstrap sampling" help="Default: 42" />
86 </inputs> 111 </inputs>
87 <outputs> 112 <outputs>
88 <data format="h5" name="abundance_h5" from_work_dir="abundance.h5" label="${tool.name} on ${on_string}: Abundances (HDF5)" /> 113 <data format="h5" name="abundance_h5" from_work_dir="abundance.h5" label="${tool.name} on ${on_string}: Abundances (HDF5)" />
89 <data format="tabular" name="abundance_tab" from_work_dir="abundance.tsv" label="${tool.name} on ${on_string}: Abundances (tabular)" /> 114 <data format="tabular" name="abundance_tab" from_work_dir="abundance.tsv" label="${tool.name} on ${on_string}: Abundances (tabular)" />
90 <data format="bam" name="pseudobam_output" from_work_dir="abundance.txt" label="${tool.name} on ${on_string}: Pseudoalignments"> 115 <data format="bam" name="pseudobam_output" from_work_dir="abundance.txt" label="${tool.name} on ${on_string}: Pseudoalignments">
168 <param name="fusion" value="true" /> 193 <param name="fusion" value="true" />
169 <param name="forward" ftype="fastq" dbkey="hg38" value="hg38_F.fq.gz" /> 194 <param name="forward" ftype="fastq" dbkey="hg38" value="hg38_F.fq.gz" />
170 <param name="reverse" ftype="fastq" dbkey="hg38" value="hg38_R.fq.gz" /> 195 <param name="reverse" ftype="fastq" dbkey="hg38" value="hg38_R.fq.gz" />
171 <output name="fusion_output" file="fusion.txt" ftype="tabular" /> 196 <output name="fusion_output" file="fusion.txt" ftype="tabular" />
172 </test> 197 </test>
173 198 <test>
199 <param name="reference_transcriptome_source" value="history" />
200 <param name="reference" ftype="fasta" value="transcripts.fasta" />
201 <param name="single_paired_selector" value="paired" />
202 <param name="collection_selector" value="datasets" />
203 <param name="forward" ftype="fastq" value="reads_forward.fastq.gz" />
204 <param name="reverse" ftype="fastq" value="reads_reverse.fastq.gz" />
205 <conditional name="genomebam_option">
206 <param name="selector" value="--genomebam"/>
207 <param name="gtf" value="annotation.gtf.gz"/>
208 <param name="chromosomes" value="chromosome_size.tabular"/>
209 </conditional>
210 <output name="abundance_tab" file="kallisto_quant_out7.tab" ftype="tabular" />
211 <assert_command>
212 <has_text text="--genomebam" />
213 <has_text text="--chromosomes" />
214 <has_text text="--gtf" />
215 </assert_command>
216 </test>
174 </tests> 217 </tests>
175 <help> 218 <help>
176 <![CDATA[ 219 <![CDATA[
177 kallisto is a program for quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. It is based on the novel idea of pseudoalignment for rapidly determining the compatibility of reads with targets, without the need for alignment. On benchmarks with standard RNA-Seq data, kallisto can quantify 30 million human reads in less than 3 minutes on a Mac desktop computer using only the read sequences and a transcriptome index that itself takes less than 10 minutes to build. Pseudoalignment of reads preserves the key information needed for quantification, and kallisto is therefore not only fast, but also as accurate as existing quantification tools. In fact, because the pseudoalignment procedure is robust to errors in the reads, in many benchmarks kallisto significantly outperforms existing tools. 220 kallisto is a program for quantifying abundances of transcripts from RNA-Seq data, or more generally of target sequences using high-throughput sequencing reads. It is based on the novel idea of pseudoalignment for rapidly determining the compatibility of reads with targets, without the need for alignment. On benchmarks with standard RNA-Seq data, kallisto can quantify 30 million human reads in less than 3 minutes on a Mac desktop computer using only the read sequences and a transcriptome index that itself takes less than 10 minutes to build. Pseudoalignment of reads preserves the key information needed for quantification, and kallisto is therefore not only fast, but also as accurate as existing quantification tools. In fact, because the pseudoalignment procedure is robust to errors in the reads, in many benchmarks kallisto significantly outperforms existing tools.
178 ]]> 221 ]]>