Mercurial > repos > iuc > cite_seq_count
view cite_seq_count.xml @ 0:3df3d1b51110 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cite_seq_count commit 70fe293f480c4d0e9f14d4373ecac4cbcf4fe17f
author | iuc |
---|---|
date | Wed, 18 Jan 2023 16:04:20 +0000 |
parents | |
children |
line wrap: on
line source
<tool name="CITE-seq-Count" id="cite_seq_count" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>Count CMO/HTO</description> <macros> <token name="@TOOL_VERSION@">1.4.4</token> <token name="@VERSION_SUFFIX@">0</token> </macros> <edam_topics> <edam_topic>topic_3170</edam_topic> <edam_topic>topic_4028</edam_topic> <edam_topic>topic_3308</edam_topic> </edam_topics> <xrefs> <xref type="bio.tools">CITE-seq-Count</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">cite-seq-count</requirement> <requirement type="package" version="3.7.12">python</requirement> <requirement type="package" version="1.0.0">umi_tools</requirement> <requirement type="package" version="0.20.7">python-levenshtein</requirement> <requirement type="package" version="0.20.7">levenshtein</requirement> <requirement type="package" version="0.25.3">pandas</requirement> <requirement type="package" version="1.0.0">umi_tools</requirement> <!-- The next dependencies are here to help conda to solve the environment --> <requirement type="package" version="1.0.8">bzip2</requirement> <requirement type="package" version="2.5.0">expat</requirement> <requirement type="package" version="0.70.14">multiprocess</requirement> <requirement type="package" version="1.21.6">numpy</requirement> <requirement type="package" version="0.16">pysam</requirement> <requirement type="package" version="1.7.3">scipy</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal"/> </stdio> <version_command>CITE-seq-Count --version</version_command> <command><![CDATA[ ## This is taken from the star solo wrapper: ## Check that the input pairs are of the same type. ## We consume either repeats of two inputs R1 + R2 ## or a collection of paired reads. #if str($input_types.use) == "repeat": #assert len(str($input_types.input1).split(",")) == len(str($input_types.input2).split(",")) #set $reads1 = $input_types.input1 #set $reads2 = $input_types.input2 #elif str($input_types.use) == "list_paired": #set $reads1 = $input_types.input_collection.forward #set $reads2 = $input_types.input_collection.reverse #end if CITE-seq-Count --threads \${GALAXY_SLOTS:-4} --read1 '$reads1' --read2 '$reads2' --tags '$tags' ## Chemistry: #if str($params.chemistry) == "v2": --cell_barcode_first_base 1 --cell_barcode_last_base 16 --umi_first_base 17 --umi_last_base 26 #else if str($params.chemistry) == "v3": --cell_barcode_first_base 1 --cell_barcode_last_base 16 --umi_first_base 17 --umi_last_base 28 #else if str($params.chemistry) == "custom": --cell_barcode_first_base $params.cell_barcode_first_base --cell_barcode_last_base $params.cell_barcode_last_base --umi_first_base $params.umi_first_base --umi_last_base $params.umi_last_base #end if --bc_collapsing_dist $bc_collapsing_dist --umi_collapsing_dist $umi_collapsing_dist $no_umi_correction --expected_cells $expected_cells #if $whitelist: --whitelist '$whitelist' #end if --max-error $max_error #if str($start_trim) != "0": --start-trim $start_trim #end if $sliding_window $dense #if str($first_n) != "0": --first_n $first_n #end if #if str($unmapped.output) == "true": --unknown-top-tags $unknown_top_tags #end if ## Outputs are gzip && gunzip Results/read_count/barcodes.tsv.gz && gunzip Results/read_count/features.tsv.gz && gunzip Results/read_count/matrix.mtx.gz && gunzip Results/umi_count/barcodes.tsv.gz && gunzip Results/umi_count/features.tsv.gz && gunzip Results/umi_count/matrix.mtx.gz ]]> </command> <inputs> <conditional name="input_types" > <param name="use" type="select" label="Input Type" > <option value="repeat" >Separate barcode and CMO/HTO reads</option> <option value="list_paired" >Paired collection of barcode and CMO/HTO reads</option> </param> <when value="repeat"> <param format="fastq.gz,fastqsanger.gz" name="input1" type="data" multiple="true" label="RNA-Seq FASTQ file, Barcode reads" /> <param format="fastq.gz,fastqsanger.gz" name="input2" type="data" multiple="true" label="RNA-Seq FASTQ file, HTO/CMO reads"/> </when> <when value="list_paired"> <param name="input_collection" collection_type="paired" type="data_collection" format="fastq.gz,fastqsanger.gz" label="Collection of Pairs" /> </when> </conditional> <param argument="--tags" type="data" format="csv" label="the CMO/HTO barcodes as well as their respective names" help="first column is the sequence and second column is the name" /> <conditional name="params" > <param name="chemistry" type="select" label="Configure Chemistry Options"> <option value="v2" selected="true">Chromium Chemistry v2</option> <option value="v3">Chromium Chemistry v3</option> <option value="custom">Custom</option> </param> <when value="v2" /> <when value="v3" /> <when value="custom" > <param argument="--cell_barcode_first_base" type="integer" min="1" value="1" label="Cell Barcode First Base" /> <param argument="--cell_barcode_last_base" type="integer" min="1" value="16" label="Cell Barcode Last Base" /> <param argument="--umi_first_base" type="integer" min="1" value="17" label="UMI First Base" /> <param argument="--umi_last_base" type="integer" min="1" value="26" label="UMI Last Base" /> </when> </conditional> <param argument="--bc_collapsing_dist" type="integer" min="0" value="1" label="How many errors are allowed between two cell barcodes to collapse them onto one cell." /> <param argument="--umi_collapsing_dist" type="integer" min="0" value="2" label="How many errors are allowed between two umi within the same cell and TAG to collapse." /> <param argument="--no_umi_correction" type="boolean" truevalue="--no_umi_correction" falsevalue="" checked="false" label="Deactivate UMI correction" /> <param argument="--expected_cells" type="integer" min="1" value="3000" label="How many cells you expect in your run" /> <param argument="--whitelist" type="data" format="txt,csv,tsv" label="Whitelist of cell barcodes" optional="true"/> <param name="max_error" type="integer" min="0" value="2" label="Maximum Levenshtein distance allowed for CMO/HTO."/> <param name="start_trim" type="integer" min="0" value="0" label="How many bases should be trimmed on CMO/HTO read before starting to map"/> <param name="sliding_window" type="boolean" truevalue="--sliding-window" falsevalue="" checked="false" label="Activate sliding window alignement."/> <param argument="--dense" type="boolean" truevalue="--dense" falsevalue="" checked="false" label="Output in tsv format (in addition to the dense format)"/> <param argument="--first_n" type="integer" value="0" label="Select N reads to run on instead of all." /> <conditional name="unmapped"> <param name="output" type="select" label="Write table of unknown TAGs to file."> <option value="false">No</option> <option value="true">Yes</option> </param> <when value="false"/> <when value="true"> <param name="unknown_top_tags" type="integer" min="1" value="100" label="Top n unmapped TAGs to output."/> </when> </conditional> </inputs> <outputs> <data name="report" format="yaml" label="${tool.name} on ${on_string}: report" from_work_dir="Results/run_report.yaml" /> <data format="tsv" name="output_features" label="${tool.name} on ${on_string}: Features raw" from_work_dir="Results/read_count/features.tsv" /> <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw" from_work_dir="Results/read_count/barcodes.tsv" /> <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Features Counts raw" from_work_dir="Results/read_count/matrix.mtx" /> <data format="tsv" name="output_features_filtered" label="${tool.name} on ${on_string}: Features filtered" from_work_dir="Results/umi_count/features.tsv" /> <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered" from_work_dir="Results/umi_count/barcodes.tsv" /> <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Features Counts filtered (UMI)" from_work_dir="Results/umi_count/matrix.mtx" /> <data format="tsv" name="dense_output_matrix" label="${tool.name} on ${on_string}: Dense Matrix With UMI" from_work_dir="Results/dense_umis.tsv" > <filter>dense</filter> </data> </outputs> <tests> <test expect_num_outputs="7"> <!-- test 1 --> <param name="tags" value="tags.csv"/> <param name="expected_cells" value="2"/> <param name="whitelist" value="whitelist.txt"/> <conditional name="input_types"> <param name="use" value="repeat"/> <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> </conditional> <conditional name="params" > <param name="chemistry" value="v2"/> </conditional> <output name="report"> <assert_contents> <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> <has_line line="Reads processed: 200"/> <has_line line="Uncorrected cells: 0"/> </assert_contents> </output> <output name="output_barcodes"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+43"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="output_barcodes_filtered"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features_filtered"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix_filtered"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+36"/> <has_n_lines n="9"/> </assert_contents> </output> </test> <test expect_num_outputs="8"> <!-- test 2 --> <!-- change input style use dense no whitelist--> <param name="tags" value="tags.csv"/> <param name="expected_cells" value="2"/> <conditional name="input_types"> <param name="use" value="list_paired" /> <param name="input_collection" > <collection type="paired"> <element name="forward" value="correct_R1.fastq.gz" ftype="fastqsanger.gz" /> <element name="reverse" value="correct_R2.fastq.gz" ftype="fastqsanger.gz" /> </collection> </param> </conditional> <conditional name="params" > <param name="chemistry" value="v2"/> </conditional> <param name="dense" value="true"/> <output name="report"> <assert_contents> <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> <has_line line="Reads processed: 200"/> <has_line line="Uncorrected cells: 0"/> </assert_contents> </output> <output name="output_barcodes"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="2"/> </assert_contents> </output> <output name="output_features"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-2]\s+43"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="output_barcodes_filtered"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="2"/> </assert_contents> </output> <output name="output_features_filtered"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix_filtered"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-2]\s+36"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="dense_output_matrix"> <assert_contents> <has_line_matching expression="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC\s+2[79]\s+2[79]"/> <has_line_matching expression="test1-CGTAGCTCG-CGTAGCTCG\s+[23][56]\s+[23][56]"/> <has_line_matching expression="unmapped\s+2[17]\s+2[17]"/> <has_n_lines n="4"/> </assert_contents> </output> </test> <test expect_num_outputs="7"> <!-- test 3 --> <!-- custom chemistry max_error 5 --> <param name="tags" value="tags.csv"/> <param name="expected_cells" value="2"/> <param name="whitelist" value="whitelist.txt"/> <conditional name="input_types"> <param name="use" value="repeat"/> <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> </conditional> <conditional name="params" > <param name="chemistry" value="custom"/> <param name="cell_barcode_first_base" value="1"/> <param name="cell_barcode_last_base" value="16"/> <param name="umi_first_base" value="17"/> <param name="umi_last_base" value="26"/> </conditional> <param name="max_error" value="5"/> <output name="report"> <assert_contents> <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> <has_line line="Reads processed: 200"/> <has_line line="Uncorrected cells: 0"/> </assert_contents> </output> <output name="output_barcodes"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+68"/> <has_n_lines n="7"/> </assert_contents> </output> <output name="output_barcodes_filtered"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features_filtered"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix_filtered"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+52"/> <has_n_lines n="7"/> </assert_contents> </output> </test> <test expect_num_outputs="7"> <!-- test 4 --> <!-- incompatible whitelist --> <param name="tags" value="tags.csv"/> <param name="expected_cells" value="2"/> <param name="whitelist" value="incompatible_whitelist.txt"/> <conditional name="input_types"> <param name="use" value="repeat"/> <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> </conditional> <conditional name="params" > <param name="chemistry" value="v2"/> </conditional> <output name="report"> <assert_contents> <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> <has_line line="Reads processed: 200"/> <has_line line="Uncorrected cells: 0"/> </assert_contents> </output> <output name="output_barcodes"> <assert_contents> <has_line line="AAAAAAAAAAAAAAAA"/> <has_line line="TTTTTTTTTTTTTTTT"/> <has_n_lines n="2"/> </assert_contents> </output> <output name="output_features"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix"> <assert_contents> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_barcodes_filtered"> <assert_contents> <has_line line="AAAAAAAAAAAAAAAA"/> <has_line line="TTTTTTTTTTTTTTTT"/> <has_n_lines n="2"/> </assert_contents> </output> <output name="output_features_filtered"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix_filtered"> <assert_contents> <has_n_lines n="3"/> </assert_contents> </output> </test> <test expect_num_outputs="7"> <!-- test 5 --> <!-- umi_collapsing_dist 5 --> <param name="tags" value="tags.csv"/> <param name="expected_cells" value="2"/> <param name="whitelist" value="whitelist.txt"/> <conditional name="input_types"> <param name="use" value="repeat"/> <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/> <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/> </conditional> <conditional name="params" > <param name="chemistry" value="v2"/> </conditional> <param name="umi_collapsing_dist" value="5"/> <output name="report"> <assert_contents> <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> <has_line line="Reads processed: 200"/> <has_line line="Uncorrected cells: 0"/> </assert_contents> </output> <output name="output_barcodes"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+43"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="output_barcodes_filtered"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features_filtered"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix_filtered"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+10"/> <has_n_lines n="9"/> </assert_contents> </output> </test> <test expect_num_outputs="7"> <!-- test 6 --> <!-- multiple fastqs --> <param name="tags" value="tags.csv"/> <param name="expected_cells" value="2"/> <param name="whitelist" value="whitelist.txt"/> <conditional name="input_types"> <param name="use" value="repeat"/> <param name="input1" value="correct_R1.fastq.gz,correct_R1.fastq.gz" ftype="fastqsanger.gz"/> <param name="input2" value="correct_R2.fastq.gz,correct_R2.fastq.gz" ftype="fastqsanger.gz"/> </conditional> <conditional name="params" > <param name="chemistry" value="v2"/> </conditional> <output name="report"> <assert_contents> <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/> <has_line line="Reads processed: 400"/> <has_line line="Uncorrected cells: 0"/> </assert_contents> </output> <output name="output_barcodes"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+86"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="output_barcodes_filtered"> <assert_contents> <has_line line="TAGAGGGAAGTCAAGC"/> <has_line line="TACATATTCTTTACTG"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="output_features_filtered"> <assert_contents> <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/> <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/> <has_line line="unmapped"/> <has_n_lines n="3"/> </assert_contents> </output> <output name="output_matrix_filtered"> <assert_contents> <has_line_matching expression="[1-2]\s+[1-4]\s+36"/> <has_n_lines n="9"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ CITE-seq-Count is a program that outputs UMI and read counts from raw fastq CITE-seq or hashing data. Here is an image explaining the expected structure of read1 and read2 from the sequencer: .. image:: read_structure.png :alt: Read1: --barcode--|--umi--|TTTT and Read2: --CMO/HTO--|AAA ]]></help> <citations> <citation type="doi">10.5281/zenodo.2585469</citation> </citations> </tool>