Mercurial > repos > bgruening > alevin
diff alevin.xml @ 0:908a8d400fa2 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/salmon commit 8f432498890670fd03a197bd3d1aa2638d1ff2b3"
author | bgruening |
---|---|
date | Mon, 09 Sep 2019 11:12:51 -0400 |
parents | |
children | e53f19161c59 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alevin.xml Mon Sep 09 11:12:51 2019 -0400 @@ -0,0 +1,209 @@ +<tool id="alevin" name="Alevin" version="@VERSION@"> + <description>Quantification and analysis of 3’ tagged-end single-cell sequencing data</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + mkdir ./index + && mkdir ./output + #if '${refTranscriptSource.TranscriptSource}' != "indexed": + && salmon index -i ./index + --kmerLen '${refTranscriptSource.s_index.kmer}' + --gencode + --transcripts '${refTranscriptSource.s_index.fasta}' + #set $index_path = './index' + #else + #set $index_path = $refTranscriptSource.index.fields.path + #end if + #if $pairstraight.readselect == 'paired': + #if $pairstraight.file1.is_of_type("fastq.gz"): + && cp '${pairstraight.file1}' ./mate1.fastq.gz + && gunzip ./mate1.fastq.gz + && cp '${pairstraight.file2}' ./mate2.fastq.gz + && gunzip ./mate2.fastq.gz + #else if $pairstraight.file1.is_of_type("fastq.bz2"): + && cp '${pairstraight.file1}' ./mate1.fastq.bz2 + && bzip2 -d ./mate1.fastq.bz2 + && cp '${pairstraight.file2}' ./mate2.fastq.bz2 + && bzip2 -d ./mate2.fastq.bz2 + #else: + && ln -s '${pairstraight.file1}' ./mate1.fastq + && ln -s '${pairstraight.file2}' ./mate2.fastq + #end if + #else: + #if $pairstraight.unmatedreads.is_of_type("fastq.gz"): + && cp '${pairstraight.unmatedreads}' ./unmate.fastq.gz + && gunzip ./unmate.fastq.gz + #else if $pairstraight.unmatedreads.is_of_type("fastq.bz2"): + && cp '${pairstraight.unmatedreads}' ./unmate.fastq.bz2 + && bzip2 -d unmate.fastq.bz2 + #else: + && ln -s '${pairstraight.unmatedreads}' ./unmate.fastq + #end if + #end if + + && ln -s '${tgmap}' ./alevinmap.tsv + && salmon alevin -l + #if $pairstraight.readselect == 'paired': + ${pairstraight.orientation}${pairstraight.strandedness} + -i $index_path + -1 ./mate1.fastq + -2 ./mate2.fastq + #else: + '${pairstraight.strandedness}' + -i $index_path + -r zcat ./unmate.fastq + #end if + -o ./output + -p "\${GALAXY_SLOTS:-4}" + ${protocol} + --tgMap ./alevinmap.tsv + #if $whitelist: + --whitelist '${optional.whitelist}' + #end if + #if $optional.numCellBootstraps: + --numCellBootstraps '${optional.numCellBootstraps}' + #end if + #if $optional.forceCells: + --forceCells '${optional.forceCells}' + #end if + #if $optional.expectCells: + --expectCells '${optional.expectCells}' + #end if + #if $optional.mrna: + --mrna '${optional.mrna}' + #end if + #if $optional.rrna: + --rrna '${optional.rrna}' + #end if + #if $optional.keepCBFraction: + --keepCBFraction '${optional.keepCBFraction}' + #end if + ${optional.dumpBfh} + ${optional.dumpFeatures} + ${optional.dumpUmiGraph} + ${optional.dumpMtx} + #if $optional.lowRegionMinNumBarcodes: + --lowregionMinNumBarcodes '${optional.lowRegionMinNumBarcodes}' + #end if + #if $optional.maxNumBarcodes: + --maxNumBarcodes '${optional.maxNumBarcodes}' + #end if + ]]> + </command> + <inputs> + <expand macro="index"/> + <conditional name="pairstraight"> + <param name="readselect" label="Single or paired-end reads?" type="select"> + <option value="paired">Paired-end</option> + <option value="unmated">Single-end</option> + </param> + <when value="paired"> + <param name="file1" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" help="CB+UMI raw sequence file(s)"/> + <param name="file2" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" help="Read-sequence file(s)"/> + <expand macro="orient"/> + <expand macro="stranded"/> + </when> + <when value="unmated"> + <param name="unmatedreads" type="data" format="fastq,fastq.gz,fastqsanger.gz,fastq.bz2" label="Unmated reads files"/> + <expand macro="stranded"/> + </when> + </conditional> + <param name="protocol" type="select"> + <option value="--dropseq">DropSeq Single Cell protocol</option> + <option value="--chromium">10x chromium v2 Single Cell protocol</option> + <option value="--chromiumV3">10x chromium v3 Single Cell protocol</option> + <option value="--gemcode">Gemcode v1 Single Cell protocol</option> + <option value="--celseq">CEL-Seq Single Cell protocol</option> + <option value="--celseq2">CEL-Seq2 Single Cell protocol</option> + </param> + <param name="tgmap" type="data" format="tsv,tabular" label="Transcript to gene map file" help="Tsv with no header, containing two columns mapping each transcript present in the reference to the corresponding gene (the first column is a transcript and the second is the corresponding gene)."/> + <param name="allout" type="boolean" label="Retrieve all output files" truevalue="Yes" falsevalue="No" checked="false" help="If not selected, all log, info.txt, and json files output by Alevin will not be retrieved"/> + <section name="optional" title="Optional commands" expanded="false"> + <param name="whitelist" type="data" format="tsv,tabular" optional="true" label="Whitelist file" help="Explicitly specify whitelist CP for cell detection and CB sequence correction. If not specified, putative CBs generated."/> + <param name="noDedup" type="boolean" truevalue="Yes" falsevalue="No" checked="false" help="Causes pipeline to only perform CB correction, then maps the read-sequences to the transcriptome generating the interim data-structure of CB-EqClass-UMI-count. Used in parallel with --dumpBarcodeEq or --dumpBfh for the purposes of obtaining raw information or debugging."/> + <param name="mrna" type="data" format="tsv" optional="true" help="Single column tsv of mitochondrial genes which are to be used as a feature for CB whitelising naive Bayes classification."/> + <param name="rrna" type="data" format="tsv" optional="true" help="Single column tsv of ribosomal genes which are to be used as a feature for CB whitelising naive Bayes classification."/> + <param name="dumpBfh" type="boolean" truevalue="--dumpBfh" falsevalue="" checked="false" help="Dumps the full CB-EqClass-UMI-count data-structure for the purposed of allowing raw data analysis and debugging."/> + <param name="dumpFeatures" type="boolean" truevalue="--dumpFeatures" falsevalue="" checked="false" help="Dumps all features used by the CB classification and their counts at each cell level. Generally, this is used for the purposes of debugging."/> + <param name="dumpUmiGraph" type="boolean" truevalue="--dumpUmiGraph" falsevalue="" checked="false" help="Dump the per-cell level umi graph"/> + <param name="dumpMtx" type="boolean" truevalue="--dumpMtx" falsevalue="" checked="false" help="Converts the default binary format of alevin for gene-count matrix into a human readable mtx (matrix market exchange) sparse format."/> + <param name="forceCells" type="integer" optional="true" help="Explicitly specify the number of cells."/> + <param name="expectCells" type="integer" optional="true" help="define a close upper bound on expected number of cells."/> + <param name="numCellBootstraps" type="integer" optional="true" help="Performs certain number of bootstrap and generate the mean and variance of the count matrix"/> + <param name="minScoreFraction" type="float" optional="true" help="This value controls the minimum allowed score for a mapping to be considered valid. It matters only when --validateMappings has been passed to Salmon. The maximum possible score for a fragment is ms = read_len * ma (or ms = (left_read_len + right_read_len) * ma for paired-end reads). The argument to --minScoreFraction determines what fraction of the maximum score s a mapping must achieve to be potentially retained. For a minimum score fraction of f, only mappings with a score less than (f * s) will be kept. Mappings with lower scores will be considered as low-quality, and will be discarded."/> + <param name="keepCBFraction" type="float" optional="true" help="Fraction of cellular barcodes to keep (Between 0 and 1)."/> + <param name="lowRegionMinNumBarcodes" type="integer" optional="true" help="Minimum number of cell barcodes to use fo learning low confidence region (defaults to 200)"/> + <param name="maxNumBarcodes" type="integer" optional="true" help="Maximum allowable limit to process the cell barcodes. Defaults to 100000"/> + </section> + </inputs> + <outputs> + <data name="quants_mat.gz" label="quants_mat.gz" format="txt" from_work_dir="output/alevin/quants_mat.gz"> + <filter>optional["dumpMtx"] != "Yes"</filter> + </data> + <data name="quants_mat.mtx.gz" label="quants_mat.mtx.gz" format="mtx" from_work_dir="output/alevin/quants_mat.mtx.gz"/> + <data name="quants_mat_cols.txt" label="quants_mat_cols.txt" format="txt" from_work_dir="output/alevin/quants_mat_cols.txt"/> + <data name="quants_mat_rows.txt" label="quants_mat_rows.txt" format="txt" from_work_dir="output/alevin/quants_mat_rows.txt"/> + <data name="quants_tier_mat.gz" label="quants_tier_mat.gz" format="mtx" from_work_dir="output/alevin/quants_tier_mat.gz"/> + <data name="alevin.log" label="alevin.log" format="txt" from_work_dir="output/alevin/alevin.log"> + <filter>allout</filter> + </data> + <data name="featureDump.txt" label="featureDump.txt" format="txt" from_work_dir="output/alevin/featureDump.txt"/> + <data name="whitelist.txt" label="whitelist.txt" format="txt" from_work_dir="output/alevin/whitelist.txt"/> + <data name="bfh.txt" label="bfh.txt" format="txt" from_work_dir="output/alevin/bfh.txt"> + <filter>optional["dumpBfh"] == "Yes"</filter> + </data> + <data name="quants_mean_mat.gz" label="quants_mean_mat.gz" format="mtx" from_work_dir="output/alevin/quants_mean_mat.gz"> + <filter>optional["numCellBootstraps"]</filter> + </data> + <data name="quants_var_mat.gz" label="quants_var_mat.gz" format="mtx" from_work_dir="output/alevin/quants_var_mat.gz"> + <filter>optional["numCellBootstraps"]</filter> + </data> + <data name="quants_boot_rows.txt" label="quants_boot_rows.txt" format="txt" from_work_dir="output/alevin/quants_boot_rows.txt"> + <filter>optional["numCellBootstraps"]</filter> + </data> + <data name="alevinmeta_info.json" format="json" label="meta_info.json" from_work_dir="output/aux_info/alevin_meta_info.json"> + <filter>allout</filter> + </data> + <data name="ambig_info.tsv" format="tsv" label="ambig_info.tsv" from_work_dir="output/aux_info/ambig_info.tsv"> + <filter>allout</filter> + </data> + <data name="meta_info.json" format="json" label="meta_info.json" from_work_dir="output/aux_info/meta_info.json"> + <filter>allout</filter> + </data> + <data name="expected_bias.gz" format="txt" label="expected_bias.gz" from_work_dir="output/aux_info/fld.gz"/> + <data name="observed_bias.gz" format="txt" label="observed_bias.gz" from_work_dir="output/aux_info/observed_bias.gz"/> + <data name="observed_bias_3p.gz" format="txt" label="observed_bias_3p.gz" from_work_dir="output/aux_info/observed_bias_3p.gz"/> + <data name="flenDist.txt" format="txt" label="flenDist.txt" from_work_dir="output/libParams/flenDist.txt"/> + <data name="salmon_quant.log" format="txt" label="salmon_quant.log" from_work_dir="output/logs/salmon_quant.log"> + <filter>allout</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="11"> + <conditional name="refTranscriptSource"> + <param name="TranscriptSource" value="history"/> + <section name="s_index"> + <param name="fasta" value="minitranscript.fa"/> + </section> + </conditional> + <conditional name="pairstraight"> + <param name="readselect" value="paired"/> + <param name="file1" value="fastqs/moreminifastq1.fastq.gz"/> + <param name="file2" value="fastqs/moreminifastq2.fastq.gz"/> + <param name="orientation" value="I"/> + <param name="strandedness" value="SR"/> + </conditional> + <param name="protocol" value="--chromium"/> + <param name="tgmap" value="minitxp.tsv"/> + <param name="dumpMtx" value="Yes"/> + <output name="quants_mat.mtx.gz" file="alevin_mat.mtx.gz" ftype="mtx" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + @salmonhelp@ + @alevinhelp@ + ]]></help> + <expand macro="citations"/> +</tool>