Mercurial > repos > bgruening > salmon
changeset 17:c8903f357804 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 10ccc47885ce71e602d66e157bd475f1facbd042
| author | bgruening | 
|---|---|
| date | Mon, 05 Dec 2022 15:47:23 +0000 | 
| parents | 49121db48873 | 
| children | ebab418107f5 | 
| files | macros.xml salmonquant.xml test-data/alevin_mat.mtx test-data/alevin_mat_01.mtx test-data/alevin_mat_02.mtx test-data/alevin_mat_indropV2.mtx test-data/full_data_structure.txt test-data/genome.fasta test-data/length_distribution.txt | 
| diffstat | 9 files changed, 1250 insertions(+), 109 deletions(-) [+] | 
line wrap: on
 line diff
--- a/macros.xml Thu Jul 22 14:05:28 2021 +0000 +++ b/macros.xml Mon Dec 05 15:47:23 2022 +0000 @@ -1,16 +1,17 @@ <macros> - <token name="@VERSION@">1.5.1</token> - <token name="@GALAXY_VERSION@">galaxy0</token> + <token name="@TOOL_VERSION@">1.9.0</token> + <token name="@VERSION_SUFFIX@">1</token> <token name="@IDX_VERSION@">q7</token> <token name="@PROFILE_VERSION@">20.01</token> <xml name="requirements"> <requirements> - <requirement type="package" version="@VERSION@">salmon</requirement> + <requirement type="package" version="@TOOL_VERSION@">salmon</requirement> <requirement type="package" version="1.3">seqtk</requirement> - <requirement type="package" version="1.12">samtools</requirement> + <requirement type="package" version="1.16.1">samtools</requirement> <requirement type="package" version="0.2.0">vpolo</requirement> - <requirement type="package" version="1.3.0">pandas</requirement> - <requirement type="package" version="1.7.0">scipy</requirement> + <requirement type="package" version="1.5.2">pandas</requirement> + <requirement type="package" version="3.0.0">graphviz</requirement> + <requirement type="package" version="1.9.3">scipy</requirement> </requirements> </xml> <xml name="orient"> @@ -72,10 +73,14 @@ </when> <!-- build-in --> <when value="history"> <section name="s_index" title="Salmon index" expanded="true"> - <param name="fasta" type="data" format="fasta" label="Transcripts fasta file"/> + <param name="fasta" type="data" format="fasta" label="Transcripts FASTA file"/> + <param name="genome" type="data" format="fasta" optional="true" label="Reference genome" + help="The reference genome is required for generating a decoy-away index. The decoy sequences are regions of the target genome + that are sequence similar to annotated transcripts. These are the regions of the genome most likely to cause mismapping." /> <param name="kmer" type="integer" value="31" label="Kmer length"/> <param name="phash" type="boolean" label="Perfect Hash" truevalue="--perfectHash" falsevalue="" checked="false" - help="Build the index using a perfect hash rather than a dense hash. This will require less memory (especially during quantification), but will take longer to construct"/> + help="Build the index using a perfect hash rather than a dense hash. This will require less memory (especially during quantification), + but will take longer to construct"/> </section> </when> </conditional> @@ -155,9 +160,20 @@ <param name="dovetail" label="Allow Dovetail" type="boolean" truevalue="--allowDovetail" falsevalue="" checked="False" help="Allow dovetail mappings"/> <param name="recoverOrphans" label="Recover Orphans" type="boolean" truevalue="--recoverOrphans" falsevalue="" checked="False" help="Attempt to recover the mates of orphaned reads. This uses edlib for orphan recovery, and so introduces some computational overhead, but it can improve sensitivity."/> - <param name="writeMappings" type="boolean" truevalue="--writeMappings=./output/samout.sam" falsevalue="" checked="False" - label="Write Mappings to Bam File" - help="If this option is provided, then the quasi-mapping results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead"/> + <conditional name="bam_options"> + <param argument="--writeMappings" type="select" label="Write Mappings to BAM File" help="If this option is provided, then the quasi-mapping results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead"> + <option value="--writeMappings=./output/samout.sam">Enabled</option> + <option value="" selected="true">Disabled</option> + </param> + <when value="--writeMappings=./output/samout.sam"> + <param argument="--writeQualities" type="boolean" truevalue="--writeQualities" falsevalue="" checked="false" label="Write qualities" help="If this flag is + provided, then the output SAM file will contain quality strings as well as read sequences." /> + </when> + <when value=""/> + </conditional> + <param name="writeMappings" type="boolean" truevalue="" falsevalue="" checked="False" + label="Write Mappings to Bam File" + help="If this option is provided, then the quasi-mapping results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead"/> <param name="consistentHits" type="boolean" truevalue="--consistentHits" falsevalue="" checked="False" label="Consistent Hits" help="Force hits gathered during quasi-mapping to be 'consistent' (i.e. co-linear and approximately the right distance apart)."/> @@ -315,10 +331,21 @@ mkdir ./index && mkdir ./output && #if $quant_type.refTranscriptSource.TranscriptSource != "indexed": + #if $quant_type.refTranscriptSource.s_index.genome + cat '${quant_type.refTranscriptSource.s_index.genome}' | grep "^>" | cut -d " " -f 1 > 'decoys.txt' && + sed -i.bak -e 's/>//g' 'decoys.txt' && + cat '${quant_type.refTranscriptSource.s_index.fasta}' '${quant_type.refTranscriptSource.s_index.genome}' > 'input_index.fasta' && + #else + ln -s '${quant_type.refTranscriptSource.s_index.fasta}' 'input_index.fasta' && + #end if salmon index -i ./index --kmerLen '${quant_type.refTranscriptSource.s_index.kmer}' --gencode - --transcripts '${quant_type.refTranscriptSource.s_index.fasta}' + --threads "\${GALAXY_SLOTS:-4}" + --transcripts 'input_index.fasta' + #if $quant_type.refTranscriptSource.s_index.genome + --decoy 'decoys.txt' + #end if && #set $index_path = './index' #else @@ -453,7 +480,10 @@ #end if ${quant_type.dovetail} ${quant_type.recoverOrphans} - ${quant_type.writeMappings} + ${quant_type.bam_options.writeMappings} + #if $quant_type.bam_options.writeMappings: + ${quant_type.bam_options.writeQualities} + #end if ${quant_type.consistentHits} #if $quant_type.quasiCoverage: --quasiCoverage '${quant_type.quasiCoverage}'
--- a/salmonquant.xml Thu Jul 22 14:05:28 2021 +0000 +++ b/salmonquant.xml Mon Dec 05 15:47:23 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="salmon" name="Salmon quant" version="@VERSION@+@GALAXY_VERSION@" profile="@PROFILE_VERSION@"> +<tool id="salmon" name="Salmon quant" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE_VERSION@"> <description>Perform dual-phase, reads or mapping-based estimation of transcript abundance from RNA-seq reads</description> <macros> <import>macros.xml</import> @@ -12,7 +12,7 @@ @salalign@ #end if @salquant@ - #if $quant_type.qtype == "reads" and $quant_type.writeMappings: + #if $quant_type.qtype == "reads" and $quant_type.bam_options.writeMappings: && @bam_sort@ ./output/samout.sam #end if #if $quant_type.qtype == "alignment" and $quant_type.sampleOut: @@ -37,14 +37,14 @@ <expand macro="quantboth"/> </inputs> <outputs> - <data name="output_quant" format="tabular" from_work_dir="output/quant.sf" label="${tool.name} on ${on_string} (Quantification)" /> - <data name="output_gene_quant" format="tabular" from_work_dir="output/quant.genes.sf" label="${tool.name} on ${on_string} (Gene Quantification)"> + <data name="output_quant" format="tabular" from_work_dir="output/quant.sf" label="${tool.name} on ${on_string}: transcript quantification" /> + <data name="output_gene_quant" format="tabular" from_work_dir="output/quant.genes.sf" label="${tool.name} on ${on_string}: gene quantification"> <filter>geneMap</filter> </data> - <data name="output_bam" format="bam" from_work_dir="output/bamout.bam" label="${tool.name} on ${on_string} (BAM format)"> - <filter>quant_type['qtype'] == "reads" and quant_type['writeMappings']</filter> + <data name="output_bam" format="bam" from_work_dir="output/bamout.bam" label="${tool.name} on ${on_string}: BAM"> + <filter>quant_type['qtype'] == "reads" and quant_type['bam_options']['writeMappings']</filter> </data> - <data name="postSample" format="bam" from_work_dir="output/bamout.bam" label="Sampled input alignments on ${on_string}(BAM format)"> + <data name="postSample" format="bam" from_work_dir="output/bamout.bam" label="${tool.name} on ${on_string}: sampled input alignments"> <filter>quant_type['qtype'] == "alignment" and quant_type['sampleOut']</filter> </data> </outputs> @@ -68,7 +68,9 @@ </conditional> </conditional> </section> - <param name="writeMappings" value="true"/> + <conditional name="bam_options"> + <param name="writeMappings" value="--writeMappings=./output/samout.sam"/> + </conditional> </conditional> <output name="output_quant" ftype="tabular"> <assert_contents> @@ -102,7 +104,9 @@ <param name="single_or_paired.input_mate1" value="fastqs/reads_1.fastq" /> <param name="single_or_paired.input_mate2" value="fastqs/reads_2.fastq" /> </section> - <param name="writeMappings" value="true"/> + <conditional name="bam_options"> + <param name="writeMappings" value="--writeMappings=./output/samout.sam"/> + </conditional> </conditional> <output name="output_quant" ftype="tabular"> <assert_contents> @@ -183,6 +187,7 @@ </assert_contents> </output> </test> + <!-- Test 05 --> <test expect_num_outputs="2"> <conditional name="quant_type"> <param name="qtype" value="alignment"/> @@ -315,6 +320,89 @@ </assert_contents> </output> </test> + <!-- Test 10 --> + <!-- Test writeQualities option--> + <test expect_num_outputs="2"> + <conditional name="quant_type"> + <param name="qtype" value="reads"/> + <conditional name="refTranscriptSource"> + <param name="TranscriptSource" value="history"/> + <section name="s_index"> + <param name="fasta" value="transcripts.fasta"/> + </section> + </conditional> + <section name="input"> + <param name="single_or_paired.single_or_paired_opts" value="paired" /> + <param name="single_or_paired.input_mate1" value="fastqs/reads_1.fastq" /> + <param name="single_or_paired.input_mate2" value="fastqs/reads_2.fastq" /> + </section> + <conditional name="bam_options"> + <param name="writeMappings" value="--writeMappings=./output/samout.sam"/> + <param name="writeQualities" value="true"/> + </conditional> + </conditional> + <output name="output_quant" ftype="tabular"> + <assert_contents> + <has_text text="EffectiveLength" /> + <has_text text="TPM" /> + <has_text text="NM_001168316" /> + <has_text text="NM_174914" /> + <has_text text="NM_018953" /> + <has_text text="NR_003084" /> + <has_text text="NM_017410" /> + <has_text text="NM_153693" /> + <has_text text="NR_031764" /> + <has_n_columns n="5" /> + </assert_contents> + </output> + <assert_command> + <has_text text="--libType A"/> + </assert_command> + </test> + <!-- Test genome input for decoy index --> + <test expect_num_outputs="2"> + <conditional name="quant_type"> + <param name="qtype" value="reads"/> + <conditional name="refTranscriptSource"> + <param name="TranscriptSource" value="history"/> + <section name="s_index"> + <param name="fasta" value="transcripts.fasta"/> + <param name="genome" value="genome.fasta"/> + </section> + </conditional> + <section name="input"> + <conditional name="single_or_paired"> + <param name="single_or_paired_opts" value="paired" /> + <param name="input_mate1" value="fastqs/reads_1.fastq" /> + <param name="input_mate2" value="fastqs/reads_2.fastq" /> + <conditional name="libtype"> + <param name="strandedness" value="U"/> + </conditional> + </conditional> + </section> + <conditional name="bam_options"> + <param name="writeMappings" value="--writeMappings=./output/samout.sam"/> + </conditional> + </conditional> + <output name="output_quant" ftype="tabular"> + <assert_contents> + <has_text text="EffectiveLength" /> + <has_text text="TPM" /> + <has_text text="NM_001168316" /> + <has_text text="NM_174914" /> + <has_text text="NM_018953" /> + <has_text text="NR_003084" /> + <has_text text="NM_017410" /> + <has_text text="NM_153693" /> + <has_text text="NR_031764" /> + <has_n_columns n="5" /> + </assert_contents> + </output> + <assert_command> + <has_text text="--libType IU"/> + <has_text text="--decoy"/> + </assert_command> + </test> </tests> <help><