Mercurial > repos > bgruening > salmonquantmerge
view macros.xml @ 7:7e50e8d920db draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 10ccc47885ce71e602d66e157bd475f1facbd042
author | bgruening |
---|---|
date | Mon, 05 Dec 2022 15:48:07 +0000 |
parents | 88c07fc4024c |
children | ee72e302b6a3 |
line wrap: on
line source
<macros> <token name="@TOOL_VERSION@">1.9.0</token> <token name="@VERSION_SUFFIX@">1</token> <token name="@IDX_VERSION@">q7</token> <token name="@PROFILE_VERSION@">20.01</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">salmon</requirement> <requirement type="package" version="1.3">seqtk</requirement> <requirement type="package" version="1.16.1">samtools</requirement> <requirement type="package" version="0.2.0">vpolo</requirement> <requirement type="package" version="1.5.2">pandas</requirement> <requirement type="package" version="3.0.0">graphviz</requirement> <requirement type="package" version="1.9.3">scipy</requirement> </requirements> </xml> <xml name="orient"> <param name="orientation" type="select" label="Relative orientation of reads within a pair"> <option value="M">Mates are oriented in the same direction (M = matching)</option> <option value="O">Mates are oriented away from each other (O = outward)</option> <option value="I" selected="True">Mates are oriented toward each other (I = inward)</option> </param> </xml> <xml name="stranded"> <conditional name="libtype" > <param name="strandedness" type="select" label="Specify the strandedness of the reads" help="--libtype"> <option value="A" selected="true">Infer automatically (A)</option> <option value="U">Not stranded (U)</option> <option value="SF">read comes from the forward strand (SF)</option> <option value="SR">read comes from the reverse strand (SR)</option> </param> <yield/> </conditional> </xml> <xml name="stranded_se"> <expand macro="stranded"> <when value="A"/> <when value="U"/> <when value="SF"/> <when value="SR"/> </expand> </xml> <xml name="stranded_pe"> <expand macro="stranded"> <when value="A"> </when> <when value="U"> <expand macro="orient"/> </when> <when value="SF"> <expand macro="orient"/> </when> <when value="SR"> <expand macro="orient"/> </when> </expand> </xml> <xml name="index"> <conditional name="refTranscriptSource"> <param name="TranscriptSource" type="select" label="Select a reference transcriptome from your history or use a built-in index?" help="Built-ins were indexed using default options"> <option value="indexed">Use a built-in index</option> <option value="history" selected="True">Use one from the history</option> </param> <when value="indexed"> <param name="index" type="select" label="Select a reference transcriptome" help="If your transcriptome of interest is not listed, contact your Galaxy admin"> <options from_data_table="salmon_indexes_versioned"> <filter type="sort_by" column="2"/> <filter type="static_value" column="4" value="@IDX_VERSION@" /> <validator type="no_options" message="No indexes are available for the selected input dataset"/> </options> </param> </when> <!-- build-in --> <when value="history"> <section name="s_index" title="Salmon index" expanded="true"> <param name="fasta" type="data" format="fasta" label="Transcripts FASTA file"/> <param name="genome" type="data" format="fasta" optional="true" label="Reference genome" help="The reference genome is required for generating a decoy-away index. The decoy sequences are regions of the target genome that are sequence similar to annotated transcripts. These are the regions of the genome most likely to cause mismapping." /> <param name="kmer" type="integer" value="31" label="Kmer length"/> <param name="phash" type="boolean" label="Perfect Hash" truevalue="--perfectHash" falsevalue="" checked="false" help="Build the index using a perfect hash rather than a dense hash. This will require less memory (especially during quantification), but will take longer to construct"/> </section> </when> </conditional> </xml> <xml name="reads"> <section name="input" title="Data input" expanded="True"> <conditional name="single_or_paired"> <param name="single_or_paired_opts" type="select" label="Is this library mate-paired?"> <option value="single">Single-end</option> <option value="paired">Paired-end</option> <option value="paired_collection">Paired-end Dataset Collection</option> <option value="paired_interleaved">Paired-end data from single interleaved dataset</option> </param> <when value="single"> <param name="input_singles" type="data" format="fastq,fasta,fastq.gz,fastq.bz2" label="FASTQ/FASTA file" help="FASTQ file." /> <expand macro="stranded_se"/> </when> <when value="paired"> <param name="input_mate1" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 1" help="FASTQ file." /> <param name="input_mate2" type="data" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Mate pair 2" help="FASTQ file." /> <expand macro="stranded_pe"/> </when> <when value="paired_collection"> <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data_collection" collection_type="paired" label="FASTQ Paired Dataset" help="Must be of datatype "fastqsanger" or "fasta"" /> <expand macro="stranded_pe"/> </when> <when value="paired_interleaved"> <param name="input_1" format="fastq,fasta,fastq.gz,fastq.bz2,fastqsanger,fastqsanger.gz,fastqsanger.bz2" type="data" label="Interleaved FASTQ file" help="Must be of datatype "fastqsanger" or "fasta". --interleaved"/> <expand macro="stranded_pe"/> </when> </conditional> </section> <param argument="--type" type="select" label="Type of index" help="When using quasi, orphaned reads will be considered when performing lightweight-alignment."> <option value="quasi" selected="True">quasi</option> </param> <!--Salmon quant type select: reads-based or alignment-based--> <param name="discardOrphansQuasi" type="boolean" truevalue="--discardOrphansQuasi" falsevalue="" checked="False" label="Discard orphan quasi" help="Discard orphan mappings in quasi-mapping mode. If this flag is passed then only paired mappings will be considered toward quantification estimates. The default behavior is to consider orphan mappings if no valid paired mappings exist."/> <conditional name="validmap"> <param name="validateMappings" type="boolean" truevalue="--validateMappings" falsevalue="" checked="False" label="Validate mappings" help="Validate mappings using alignment-based verifcation. If this flag is passed, quasi-mappings will be validated to ensure that they could give rise to a reasonable alignment before they are further used for quantification."/> <when value="--validateMappings"> <param name="minScoreFraction" type="float" value="0.65" min="0.0" max="0.99" label="Min Score Fraction" help="The fraction of the optimal possible alignment score that a mapping must achieve in order to be considered valid. Should be in (0,1]."/> <param name="maxMMPExtension" type="integer" optional="True" label="Sets the maximum allowable MMP extension when collecting suffix array intervals to be used in chaining. This prevents MMPs from becoming too long, and potentially masking intervals that would uncover other good quasi-mappings for the read. This heuristic mimics the idea of the maximum mappable safe prefix (MMSP) in selective alignment. Setting a smaller value will potentially allow for more sensitive, but slower, mapping."/> <param name="matchScore" argument="--ma" type="integer" value="2" label="Match Score" help="The value given to a match between read and reference nucleotides in an alignment."/> <param name="mismatchPenalty" argument="--mp" type="integer" value="4" label="Mismatch Penalty" help="The value given to a mis-match between read and reference nucleotides in an alignment. This will be cast to a negative value."/> <param name="gapOpenPenalty" argument="--go" type="integer" value="5" label="Gap Open Penalty" help="The value given to a gap opening in an alignment."/> <param name="gapExtensionPenalty" argument="--ge" type="integer" value="3" label="Gap Extension Penalty" help="The value given to a gap extension in an alignment."/> <param name="mimicBT2" type="boolean" truevalue="--mimicBT2" falsevalue="" checked="False" label="Mimic Bowtie 2" help=" Set flags to mimic parameters similar to Bowtie2 with --no-discordant and --no-mixed flags. This increases disallows dovetailing reads, and discards orphans. Note, this does not impose the very strict parameters assumed by RSEM+Bowtie2, like gapless alignments. For that behavior, use the --mimiStrictBT2 flag "/> <param name="mimicStrictBT2" type="boolean" truevalue="--mimicStrictBT2" falsevalue="" checked="False" label="Mimic Strict Bowtie 2" help="Set flags to mimic the very strict parameters used by RSEM+Bowtie2. This increases --minScoreFraction to 0.8, disallows dovetailing reads, discards orphans, and disallows gaps in alignments."/> <param name="hardFilter" type="boolean" truevalue="--hardFilter" falsevalue="" checked="False" label="Hard Filter" help="Instead of weighting mappings by their alignment score, this flag will discard any mappings with sub-optimal alignment score. The default option of soft-filtering (i.e. weighting mappings by their alignment score) usually yields slightly more accurate abundance estimates but this flag may be desirable if you want more accurate 'naive' equivalence classes, rather than range factorized equivalence classes."/> </when> <when value=""/> </conditional> <param name="consensusSlack" type="integer" optional="True" value="0" label="Consensus Slack" help="The amount of slack allowed in the quasi-mapping consensus mechanism. Normally, a transcript must cover all hits to be considered for mapping. If this is set to a value, X, greater than 0, then a transcript can fail to cover up to X hits before it is discounted as a mapping candidate. The default value of this option is 1 if --validateMappings is given and 0 otherwise."/> <param name="dovetail" label="Allow Dovetail" type="boolean" truevalue="--allowDovetail" falsevalue="" checked="False" help="Allow dovetail mappings"/> <param name="recoverOrphans" label="Recover Orphans" type="boolean" truevalue="--recoverOrphans" falsevalue="" checked="False" help="Attempt to recover the mates of orphaned reads. This uses edlib for orphan recovery, and so introduces some computational overhead, but it can improve sensitivity."/> <conditional name="bam_options"> <param argument="--writeMappings" type="select" label="Write Mappings to BAM File" help="If this option is provided, then the quasi-mapping results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead"> <option value="--writeMappings=./output/samout.sam">Enabled</option> <option value="" selected="true">Disabled</option> </param> <when value="--writeMappings=./output/samout.sam"> <param argument="--writeQualities" type="boolean" truevalue="--writeQualities" falsevalue="" checked="false" label="Write qualities" help="If this flag is provided, then the output SAM file will contain quality strings as well as read sequences." /> </when> <when value=""/> </conditional> <param name="writeMappings" type="boolean" truevalue="" falsevalue="" checked="False" label="Write Mappings to Bam File" help="If this option is provided, then the quasi-mapping results will be written out in SAM-compatible format. By default, output will be directed to stdout, but an alternative file name can be provided instead"/> <param name="consistentHits" type="boolean" truevalue="--consistentHits" falsevalue="" checked="False" label="Consistent Hits" help="Force hits gathered during quasi-mapping to be 'consistent' (i.e. co-linear and approximately the right distance apart)."/> <param name="quasiCoverage" type="integer" optional="True" label="Quasi Coverage" help="[Experimental]: The fraction of the read that must be covered by MMPs (of length >= 31) if this read is to be considered as 'mapped'. This may help to avoid 'spurious' mappings. A value of 0 (the default) denotes no coverage threshold (a single 31-mer can yield a mapping). Since coverage by exact matching, large, MMPs is a rather strict condition, this value should likely be set to something low, if used."/> </xml> <xml name="align"> <param name="afile" type="data" format="qname_input_sorted.bam,qname_sorted.bam" label="Alignment file"/> <param argument="--ont" type="boolean" truevalue="--ont" falsevalue="" label="Is this Alignment file a Oxford Nanopore Technologies (ONT) dataset ?" help="Select this for both cDNA and direct RNA ONT datasets. Enables an alignment error model designed to work with long-read alignments and disables the length effect in the generative model when computing the conditional probability of observing a fragment given that it arises from a specific transcript. This is because in long-read sequencing, we do not expect to observe (i.e. sequence) multiple fragments from the same molecule, and thus we do not expect the transcript length to directly affect the observed fragment count directly"/> <param name="transcript" type="data" format="fasta,fa" label="Transcript file"/> <expand macro="stranded_se"/> <param name="discardOrphans" type="boolean" truevalue="--discardOrphans" falsevalue="" checked="False" label="Discard orphans" help="Discard orphan alignments in the input. If this flag is passed, then only paired alignments will be considered toward quantification estimates. The default behavior is to consider orphan alignments if no valid paired mappings exist."/> <param name="noErrorModel" type="boolean" truevalue="--noErrorModel" falsevalue="" checked="False" label="No Error Model" help="Turn off the alignment error model, which takes into account the the observed frequency of different types of mismatches / indels when computing the likelihood of a given alignment. Turning this off can speed up alignment-based salmon, but can harm quantification accuracy."/> <param name="numErrorBins" type="integer" optional="True" value="6" label="Number of Error Bins" help="The number of bins into which to divideeach read when learning and applying the error model. For example, a value of 10 would mean that effectively, a separate error model is learned and applied to each 10th of the read, whilea value of 3 would mean that a separate error model is applied to the read beginning (first third), middle (second third) and end (final third)."/> <param name="sampleOut" type="boolean" truevalue="--sampleOut" falsevalue="" checked="False" label="Sample out" help="Write a 'postSample.bam' file in the output directory that will sample the input alignments according to the estimated transcript abundances. If you're going to perform downstream analysis of the alignments with tools which don't, themselves, take fragment assignment ambiguity into account, you should use this output"/> <param name="sampleUnaligned" type="boolean" truevalue="--sampleUnaligned" falsevalue="" checked="False" label="Sample unaligned" help="In addition to sampling the aligned reads, also write the un-aligned reads to 'postSample.bam'."/> <param name="gencode" type="boolean" truevalue="--gencode" falsevalue="" checked="False" label="Gencode" help="This flag will expect the input transcript fasta to be in GENCODE format, and will split the transcript name at the first '|' character. These reduced names will be used in the output and when looking for these transcripts in a gene to transcript GTF."/> </xml> <xml name="quantboth"> <param name="geneMap" type="data" format="tabular,gff,gtf" optional="True" label="File containing a mapping of transcripts to genes" help="If this file is provided Salmon will output both quant.sf and quant.genes.sf files, where the latter contains aggregated gene-level abundance estimates. The transcript to gene mapping should be provided as either a GTF file, or a in a simple tab-delimited format where each line contains the name of a transcript and the gene to which it belongs separated by a tab."/> <param name="seqBias" type="boolean" truevalue="--seqBias" falsevalue="" checked="False" label="Perform sequence-specific bias correction"/> <param name="gcBias" type="boolean" truevalue="--gcBias" falsevalue="" checked="False" label="Perform fragment GC bias correction"/> <param name="incompatPrior" type="float" optional="True" min="0.0" max="1" value="0" help="Sets the prior probability that an alignment that disagrees with the specified library type (orientation and strandedness) results from the true fragment origin. Setting this to 0 specifies that alignments that disagree with the library type should be 'impossible', while setting it to 1 says that alignments that disagree with the library type are no less likely than those that do"/> <param name="meta" type="boolean" truevalue="--meta" falsevalue="" checked="False" label="Meta" help="If you're using Salmon on a metagenomic dataset, consider setting this flag to disable parts of the abundance estimation model that make less sense for metagenomic data."/> <!--Salmon quant advanced options: available to both kinds of quant--> <section name="adv" title="Additional Options"> <param name="skipQuant" type="boolean" truevalue="--skipQuant" falsevalue="" checked="False" label="skipQuant" help="Skip performing the actual transcript quantification (including any Gibbs sampling or bootstrapping)."/> <param name="dumpEq" type="boolean" truevalue="--dumpEq" falsevalue="" checked="False" label="Dump equivalence class counts" help="Dump the equivalence class counts that were computed during quasi-mapping."/> <param name="dumpEqWeights" type="boolean" truevalue="--dumpEqWeights" falsevalue="" checked="False" label="Dump equivalence class counts including rich weights" help="Includes 'rich' equivlance class weights in the output when equivalence class information is being dumped to file."/> <param name="minAssignedFrags" type="integer" optional="True" label="Minimum assigned fragments" help="The minimum number of fragments that must be assigned to the transcriptome for quantification to proceed."/> <param name="biasSpeedSamp" type="integer" value="5" optional="True" label="The value at which the fragment length PMF is down-sampled when evaluating GC fragment bias." help="Larger values speed up effective length correction, but may decrease the fidelity of bias modeling results."/> <param name="fldMax" type="integer" value="1000" optional="True" label="The maximum fragment length to consider when building the empirical distribution."/> <param name="fldMean" type="integer" value="250" optional="True" label="The mean used in the fragment length distribution prior" help="If single end reads are being used for quantification, or there are an insufficient number of uniquely mapping reads when performing paired-end quantification to estimate the empirical fragment length distribution, then use this value to calculate effective lengths."/> <param name="fldSD" type="integer" value="25" optional="True" label="Standard deviation" help="The standard deviation used in the fragment length distribution prior."/> <param name="forgettingFactor" type="float" value="0.65000000000000002" optional="True" label="The forgetting factor used in the online learning schedule." help=" A smaller value results in quicker learning, but higher variance and may be unstable. A larger value results in slower learning but may be more stable. Value should be in the interval (0.5, 1.0]." /> <param name="initUniform" type="boolean" truevalue="--initUniform" falsevalue="" checked="False" label="Initialization with uniform parameters" help="initialize the offline inference with uniform parameters, rather than seeding with online parameters." /> <param name="maxReadOcc" type="integer" value="100" optional="True" label="Maximal read mapping occurence" help="Reads mapping to more than this many places won't be considered."/> <param name="noLengthCorrection" type="boolean" truevalue="--noLengthCorrection" falsevalue="" label="No length correction" help="[experimental] : Entirely disables length correction when estimating the abundance of transcripts. This option can be used with protocols where one expects that fragments derive from their underlying targets without regard to that target's length (e.g. QuantSeq)"/> <param name="noEffectiveLengthCorrection" type="boolean" truevalue="--noEffectiveLengthCorrection" falsevalue="" checked="False" label="Disable effective length correction" help="Disables effective length correction when computing the probability that a fragment was generated from a transcript. If this flag is passed in, the fragment length distribution is not taken into account when computing this probability."/> <param name="noFragLengthDist" type="boolean" truevalue="--noFragLengthDist" falsevalue="" checked="False" label="Ignore fragment length distribution" help="[experimental] : Don't consider concordance with the learned fragment length distribution when trying to determine the probability that a fragment has originated from a specified location. Normally, Fragments with unlikely lengths will be assigned a smaller relative probability than those with more likely lengths. When this flag is passed in, the observed fragment length has no effect on that fragment's a priori probability." /> <param name="noBiasLengthThreshold" type="boolean" truevalue="--noBiasLengthThreshold" falsevalue="" checked="False" label="[experimental] : If this option is enabled, then no (lower) threshold will be set on how short bias correction can make effecctive lengths." help="This can increase the precision of bias correction, but harm robustness. The default correction applies a threshold." /> <param name="numBiasSamples" type="integer" value="2000000" optional="True" label="Number of fragment mappings to use when learning the sequence-specific bias model."/> <param name="numAuxModelSamples" type="integer" value="5000000" optional="True" label="The first numAuxModelSamples are used to train the auxiliary model parameters." help="(e.g. fragment length distribution, bias, etc.). After ther first numAuxModelSamples observations the auxiliary model parameters will be assumed to have converged and will be fixed." /> <param name="numPreAuxModelSamples" type="integer" value="5000" optional="True" label="The first numPreAuxModelSamples will have their assignment likelihoods and contributions to the transcript abundances computed without applying any auxiliary models." help=" The purpose of ignoring the auxiliary models for the first numPreAuxModelSamples observations is to avoid applying these models before thier parameters have been learned sufficiently well." /> <param name="useEM" type="boolean" truevalue="--useEM" falsevalue="" checked="False" label="Use the traditional EM algorithm for optimization in the batch passes." help=""/> <param name="rangeFactorizationBins" type="integer" value="0" label="Range of factorization bins" help="Factorizes the likelihood used in quantification by adopting a new notion of equivalence classes based on the conditional probabilities with which fragments are generated from different transcripts. This is a more fine-grained factorization than the normal rich equivalence classes. The default value (0) corresponds to the standard rich equivalence classes, and larger values imply a more fine-grained factorization. If range factorization is enabled, a common value to select for this parameter is 4."/> <param name="numGibbsSamples" type="integer" value="0" optional="True" label="Number of Gibbs sampling rounds to perform." help="" /> <param name="noGammaDraw" type="boolean" truevalue="--noGammaDraw" falsevalue="" checked="False" label="No gamma draw" help="This switch will disable drawing transcript fractions from a Gamma distribution during Gibbs sampling. In this case the sampler does not account for shot-noise, but only assignment ambiguity"/> <param name="numBootstraps" type="integer" value="0" optional="True" label="Number of bootstrap samples to generate. Note: This is mutually exclusive with Gibbs sampling." help="" /> <param name="bootstrapReproject" type="boolean" truevalue="--bootstrapReproject" falsevalue="" checked="False" label="Bootstrap reproject" help="This switch will learn the parameter distribution from the bootstrapped counts for each sample, but will reproject those parameters onto the original equivalence class counts."/> <param name="thinningFactor" type="integer" value="16" optional="true" label="Thinning factor" help="Number of steps to discard for every sample kept from the Gibbs chain. The larger this number, the less chance that subsequent samples are auto-correlated, but the slower sampling becomes."/> <param name="perTranscriptPrior" type="boolean" truevalue="--perTranscriptPrior" falsevalue="" checked="False" label="The prior will be interpreted as a transcript-level prior." help="either the default or the argument provided via --vbPrior" /> <param name="sigDigits" type="integer" value="3" label="Significant Digits" help="The number of significant digits to write when outputting the EffectiveLength and NumReads columns."/> <param name="vbPrior" type="float" value="1.0000000000000001e-05" optional="True" label="The prior that will be used in the VBEM algorithm." help="This is interpreted as a per-nucleotide prior, unless the --perTranscriptPrior flag is also given, in which case this is used as a transcript-level prior." /> <param name="writeOrphanLinks" type="boolean" truevalue="--writeOrphanLinks" falsevalue="" checked="False" label="Write orphan links" help="Write the transcripts that are linked by orphaned reads."/> <param name="writeUnmappedNames" type="boolean" truevalue="--writeUnmappedNames" falsevalue="" checked="False" label="Write the names of un-mapped reads to the file unmapped_names.txt." help=""/> </section> </xml> <xml name="quantmerge"> <repeat name="quantfiles" title="Quant file and names"> <param name="quants" type="data" format="tsv,tabular" label="Salmon quant tabular output files"/> <param name="names" type="text" label="One-word sample names" optional="True"/> </repeat> <param name="column" type="select" label="Columns" help="Name of columns that will be merged in the output file"> <option value="len" selected="True">Length</option> <option value="elen">Effective Length</option> <option value="tpm">Transcripts per Million</option> <option value="numreads">NumReads</option> </param> <param name="genes" type="boolean" truevalue="--genes" falsevalue="" checked="False" label="Genes" help="Use gene quantifications instead of transcript"/> <param name="missing" type="integer" label="Missing" optional="True" help="The value of missing values"/> </xml> <xml name="citations"> <citations> <citation type="doi">10.1186/s13059-019-1670-y</citation> <citation type="doi">10.1038/nmeth.4197</citation> </citations> </xml> <token name="@bam_sort@"><![CDATA[samtools sort -@ \${GALAXY_SLOTS} --output-fmt=BAM -o ./output/bamout.bam]]></token> <token name="@indexing@"><![CDATA[ mkdir ./index && mkdir ./output && #if $quant_type.refTranscriptSource.TranscriptSource != "indexed": #if $quant_type.refTranscriptSource.s_index.genome cat '${quant_type.refTranscriptSource.s_index.genome}' | grep "^>" | cut -d " " -f 1 > 'decoys.txt' && sed -i.bak -e 's/>//g' 'decoys.txt' && cat '${quant_type.refTranscriptSource.s_index.fasta}' '${quant_type.refTranscriptSource.s_index.genome}' > 'input_index.fasta' && #else ln -s '${quant_type.refTranscriptSource.s_index.fasta}' 'input_index.fasta' && #end if salmon index -i ./index --kmerLen '${quant_type.refTranscriptSource.s_index.kmer}' --gencode --threads "\${GALAXY_SLOTS:-4}" --transcripts 'input_index.fasta' #if $quant_type.refTranscriptSource.s_index.genome --decoy 'decoys.txt' #end if && #set $index_path = './index' #else #set $index_path = $quant_type.refTranscriptSource.index.fields.path #end if ]]> </token> <token name="@salreads@"><![CDATA[ #set compressed = 'no' #if $quant_type.input.single_or_paired.single_or_paired_opts == 'single': #if $quant_type.input.single_or_paired.input_singles.ext == 'fasta': #set $ext = 'fasta' #else: #if $quant_type.input.single_or_paired.input_singles.is_of_type("fastq.gz", "fastqsanger.gz"): #set compressed = 'GZ' #else if $quant_type.input.single_or_paired.input_singles.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set compressed = 'BZ2' #end if #set $ext = 'fastq' #end if ln -s $quant_type.input.single_or_paired.input_singles ./single.$ext && #else if $quant_type.input.single_or_paired.single_or_paired_opts == 'paired': #if $quant_type.input.single_or_paired.input_mate1.ext == 'fasta': #set $ext = 'fasta' #else: #if $quant_type.input.single_or_paired.input_mate1.is_of_type("fastq.gz", "fastqsanger.gz"): #set compressed = 'GZ' #else if $quant_type.input.single_or_paired.input_mate1.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set compressed = 'BZ2' #end if #set $ext = 'fastq' #end if ln -s $quant_type.input.single_or_paired.input_mate1 ./mate1.$ext && ln -s $quant_type.input.single_or_paired.input_mate2 ./mate2.$ext && #else if $quant_type.input.single_or_paired.single_or_paired_opts == 'paired_collection': #if $quant_type.input.single_or_paired.input_1.forward.ext == 'fasta': #set $ext = 'fasta' #else: #if $quant_type.input.single_or_paired.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"): #set compressed = 'GZ' #else if $quant_type.input.single_or_paired.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set compressed = 'BZ2' #end if #set $ext = 'fastq' #end if ln -s '${quant_type.input.single_or_paired.input_1.forward}' ./mate1.$ext && ln -s '${quant_type.input.single_or_paired.input_1.reverse}' ./mate2.$ext && #else if '$quant_type.input.single_or_paired.single_or_paired_opts' == 'paired_interleaved': #if $quant_type.input.single_or_paired.input_1.ext == 'fasta': #set $ext = 'fasta' #else: #if $quant_type.input.single_or_paired.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): #set compressed = 'GZ' #else if $quant_type.input.single_or_paired.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set compressed = 'BZ2' #end if #set $ext = 'fastq' #end if ln -s '$quant_type.input.single_or_paired.input_1' ./mate1.$ext && #end if #if $geneMap: ln -s '$geneMap' ./geneMap.${geneMap.ext} && #end if salmon quant --index '$index_path' #if $quant_type.input.single_or_paired.single_or_paired_opts == 'single': --libType ${quant_type.input.single_or_paired.libtype.strandedness} #if $compressed == 'GZ': --unmatedReads <(zcat < ./single.$ext) #else if $compressed == 'BZ2': --unmatedReads <(bzcat < ./single.$ext) #else: --unmatedReads ./single.$ext #end if #else: --libType #if $quant_type.input.single_or_paired.libtype.strandedness == 'A' A #else ${quant_type.input.single_or_paired.libtype.orientation}${quant_type.input.single_or_paired.libtype.strandedness} #end if #if $quant_type.input.single_or_paired.single_or_paired_opts == 'paired_interleaved': #if $compressed == 'BZ2': --mates1 <(bzcat < ./mate1.$ext | seqtk seq -1) --mates2 <(bzcat < ./mate1.$ext | seqtk seq -2) #else: --mates1 <(seqtk seq -1 ./mate1.$ext) --mates2 <(seqtk seq -2 ./mate1.$ext) #end if #else: #if $compressed == 'GZ': --mates1 <(zcat < ./mate1.$ext) --mates2 <(zcat < ./mate2.$ext) #else if $compressed == 'BZ2': --mates1 <(bzcat < ./mate1.$ext) --mates2 <(bzcat < ./mate2.$ext) #else: --mates1 ./mate1.$ext --mates2 ./mate2.$ext #end if #end if #end if --threads "\${GALAXY_SLOTS:-4}" ${quant_type.discardOrphansQuasi} ${quant_type.validmap.validateMappings} #if $quant_type.validmap.validateMappings: #if $quant_type.validmap.minScoreFraction: --minScoreFraction '${quant_type.validmap.minScoreFraction}' #end if #if $quant_type.validmap.maxMMPExtension: --maxMMPExtension '${quant_type.validmap.maxMMPExtension}' #end if #if $quant_type.validmap.matchScore: --ma '${$quant_type.validmap.matchScore}' #end if #if $quant_type.validmap.mismatchPenalty: --mp '${$quant_type.validmap.mismatchPenalty}' #end if #if $quant_type.validmap.gapOpenPenalty: --go '${$quant_type.validmap.gapOpenPenalty}' #end if #if $quant_type.validmap.gapExtensionPenalty: --ge '${$quant_type.validmap.gapExtensionPenalty}' #end if ${$quant_type.validmap.mimicBT2} ${$quant_type.validmap.mimicStrictBT2} ${$quant_type.validmap.hardFilter} #end if #if $quant_type.consensusSlack: --consensusSlack '${quant_type.consensusSlack}' #end if ${quant_type.dovetail} ${quant_type.recoverOrphans} ${quant_type.bam_options.writeMappings} #if $quant_type.bam_options.writeMappings: ${quant_type.bam_options.writeQualities} #end if ${quant_type.consistentHits} #if $quant_type.quasiCoverage: --quasiCoverage '${quant_type.quasiCoverage}' #end if ]]> </token> <token name="@salalign@"><![CDATA[ #if $geneMap: ln -s "$geneMap" ./geneMap.${geneMap.ext} && #end if salmon quant -t '${quant_type.transcript}' -l '${quant_type.libtype.strandedness}' -a '${quant_type.afile}' $quant_type.ont --threads "\${GALAXY_SLOTS:-4}" ${quant_type.discardOrphans} ${quant_type.noErrorModel} #if $quant_type.numErrorBins: --numErrorBins '${quant_type.numErrorBins}' #end if ${quant_type.sampleOut} ${quant_type.sampleUnaligned} ${quant_type.gencode} ]]> </token> <token name="@salquant@"><![CDATA[ #if $geneMap: --geneMap ./geneMap.${geneMap.ext} #end if ##Further basic options## ${seqBias} ${gcBias} --incompatPrior '${incompatPrior}' ${meta} ##Advanced options## ${adv.skipQuant} ${adv.dumpEq} ${adv.dumpEqWeights} #if $adv.minAssignedFrags: --minAssignedFrags '${adv.minAssignedFrags}' #end if #if $adv.biasSpeedSamp: --biasSpeedSamp '${adv.biasSpeedSamp}' #end if #if $adv.fldMax: --fldMax '${adv.fldMax}' #end if #if $adv.fldMean: --fldMean '${adv.fldMean}' #end if #if $adv.fldSD: --fldSD '${adv.fldSD}' #end if #if $adv.forgettingFactor: --forgettingFactor '${adv.forgettingFactor}' #end if ${adv.initUniform} #if str($adv.maxReadOcc): --maxReadOcc '${adv.maxReadOcc}' #end if ${adv.noLengthCorrection} ${adv.noEffectiveLengthCorrection} ${adv.noFragLengthDist} ${adv.noBiasLengthThreshold} #if str($adv.numBiasSamples): --numBiasSamples '${adv.numBiasSamples}' #end if #if str($adv.numAuxModelSamples): --numAuxModelSamples '${adv.numAuxModelSamples}' #end if #if str($adv.numPreAuxModelSamples): --numPreAuxModelSamples '${adv.numPreAuxModelSamples}' #end if ${adv.useEM} #if $adv.rangeFactorizationBins: --rangeFactorizationBins '${adv.rangeFactorizationBins}' #end if #if str($adv.numGibbsSamples): --numGibbsSamples '${adv.numGibbsSamples}' #end if ${adv.noGammaDraw} #if str($adv.numBootstraps): --numBootstraps '${adv.numBootstraps}' #end if ${adv.bootstrapReproject} #if $adv.thinningFactor: --thinningFactor '${adv.thinningFactor}' #end if ${adv.perTranscriptPrior} --sigDigits '${adv.sigDigits}' #if $adv.vbPrior: --vbPrior '${adv.vbPrior}' #end if ${adv.writeOrphanLinks} ${adv.writeUnmappedNames} -o ./output ]]> </token> <token name="@qmerge@"><![CDATA[ #import re #for $counter, $input_data in enumerate($quantfiles): rank_of_series=$counter mkdir ${counter}.quantmerge && ln -s $input_data.quants ${counter}.quantmerge/quant.sf && #end for salmon quantmerge --quants *.quantmerge --names #for $counter, $input_data in enumerate($quantfiles): #if $input_data.names and re.sub(' ', '', '$input_data.names') != '': '${input_data.names}' #else: #set $identifier=re.sub('[^\w\-\s]', '_', str($input_data.quants.element_identifier)) "$identifier" #end if #end for --column '${column}' ${genes} #if $missing: --missing '${missing}' #end if -o qmergeout.tab ]]> </token> <token name="@salmonhelp@"><![CDATA[ Salmon is a lightweight method for quantifying transcript abundance from RNA–seq reads, combining a dual-phase parallel inference algorithm and feature-rich bias models with an ultra-fast read mapping procedure. The salmon package contains 4 tools: * Index: creates a salmon index * Quant: quantifies a sample (Reads or mapping-based) * Alevin: Single-cell analysis * Quantmerge: Merges multiple quantifications into a single file Galaxy divides these four into three separate tools in the IUC toolshed: * Salmon quant * Salmon quantmerge * Alevin ]]> </token> <token name="@alevinhelp@"><![CDATA[ Alevin is a tool — integrated with the salmon software — that introduces a family of algorithms for quantification and analysis of 3’ tagged-end single-cell sequencing data. Currently alevin supports the following two major droplet based single-cell protocols: * Drop-seq * 10x-Chromium v1/2/3 Alevin works under the same indexing scheme (as salmon) for the reference, and consumes the set of FASTA/Q files(s) containing the Cellular Barcode(CB) + Unique Molecule identifier (UMI) in one read file and the read sequence in the other. Given just the transcriptome and the raw read files, alevin generates a cell-by-gene count matrix (in a fraction of the time compared to other tools). Alevin works in two phases. In the first phase it quickly parses the read file containing the CB and UMI information to generate the frequency distribution of all the observed CBs, and creates a lightweight data-structure for fast-look up and correction of the CB. In the second round, alevin utilizes the read-sequences contained in the files to map the reads to the transcriptome, identify potential PCR/sequencing errors in the UMIs, and performs hybrid de-duplication while accounting for UMI collisions. Finally, a post-abundance estimation CB whitelisting procedure is done and a cell-by-gene count matrix is generated. For further information regarding the tool and its optional parameters, visit the `Alevin <https://salmon.readthedocs.io/en/latest/alevin.html?highlight=alevin>`__ and `Salmon <https://salmon.readthedocs.io/en/latest/index.html>`__ wikis. ]]> </token> </macros>