changeset 0:3df3d1b51110 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/cite_seq_count commit 70fe293f480c4d0e9f14d4373ecac4cbcf4fe17f
author iuc
date Wed, 18 Jan 2023 16:04:20 +0000
parents
children
files cite_seq_count.xml static/images/read_structure.png test-data/correct_R1.fastq.gz test-data/correct_R2.fastq.gz test-data/incompatible_whitelist.txt test-data/tags.csv test-data/whitelist.txt
diffstat 7 files changed, 592 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cite_seq_count.xml	Wed Jan 18 16:04:20 2023 +0000
@@ -0,0 +1,584 @@
+<tool name="CITE-seq-Count" id="cite_seq_count" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>Count CMO/HTO</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.4.4</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <edam_topics>
+        <edam_topic>topic_3170</edam_topic>
+        <edam_topic>topic_4028</edam_topic>
+        <edam_topic>topic_3308</edam_topic>
+    </edam_topics>
+    <xrefs>
+        <xref type="bio.tools">CITE-seq-Count</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">cite-seq-count</requirement>
+        <requirement type="package" version="3.7.12">python</requirement>
+        <requirement type="package" version="1.0.0">umi_tools</requirement>
+        <requirement type="package" version="0.20.7">python-levenshtein</requirement>
+        <requirement type="package" version="0.20.7">levenshtein</requirement>
+        <requirement type="package" version="0.25.3">pandas</requirement>
+        <requirement type="package" version="1.0.0">umi_tools</requirement>
+        <!-- The next dependencies are here to help conda to solve the environment -->
+        <requirement type="package" version="1.0.8">bzip2</requirement>
+        <requirement type="package" version="2.5.0">expat</requirement>
+        <requirement type="package" version="0.70.14">multiprocess</requirement>
+        <requirement type="package" version="1.21.6">numpy</requirement>
+        <requirement type="package" version="0.16">pysam</requirement>
+        <requirement type="package" version="1.7.3">scipy</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal"/>
+    </stdio>
+    <version_command>CITE-seq-Count --version</version_command>
+    <command><![CDATA[
+## This is taken from the star solo wrapper:
+## Check that the input pairs are of the same type.
+## We consume either repeats of two inputs R1 + R2
+## or a collection of paired reads.
+#if str($input_types.use) == "repeat":
+    #assert len(str($input_types.input1).split(",")) == len(str($input_types.input2).split(","))
+    #set $reads1 = $input_types.input1
+    #set $reads2 = $input_types.input2
+#elif str($input_types.use) == "list_paired":
+    #set $reads1 = $input_types.input_collection.forward
+    #set $reads2 = $input_types.input_collection.reverse
+#end if
+CITE-seq-Count 
+--threads \${GALAXY_SLOTS:-4}
+--read1 '$reads1'
+--read2 '$reads2'
+--tags '$tags'
+## Chemistry:
+#if str($params.chemistry) == "v2":
+--cell_barcode_first_base 1
+--cell_barcode_last_base 16
+--umi_first_base 17
+--umi_last_base 26
+#else if str($params.chemistry) == "v3":
+--cell_barcode_first_base 1
+--cell_barcode_last_base 16
+--umi_first_base 17
+--umi_last_base 28
+#else if str($params.chemistry) == "custom":
+--cell_barcode_first_base $params.cell_barcode_first_base
+--cell_barcode_last_base $params.cell_barcode_last_base
+--umi_first_base $params.umi_first_base
+--umi_last_base $params.umi_last_base
+#end if
+--bc_collapsing_dist $bc_collapsing_dist
+--umi_collapsing_dist $umi_collapsing_dist
+$no_umi_correction
+--expected_cells $expected_cells
+#if $whitelist:
+--whitelist '$whitelist'
+#end if
+--max-error $max_error
+#if str($start_trim) != "0":
+--start-trim $start_trim
+#end if
+$sliding_window
+$dense
+#if str($first_n) != "0":
+--first_n $first_n
+#end if
+#if str($unmapped.output) == "true":
+--unknown-top-tags $unknown_top_tags
+#end if
+
+## Outputs are gzip
+&& gunzip Results/read_count/barcodes.tsv.gz
+&& gunzip Results/read_count/features.tsv.gz
+&& gunzip Results/read_count/matrix.mtx.gz
+&& gunzip Results/umi_count/barcodes.tsv.gz
+&& gunzip Results/umi_count/features.tsv.gz
+&& gunzip Results/umi_count/matrix.mtx.gz
+]]>
+</command>
+    <inputs>
+        <conditional name="input_types" >
+            <param name="use" type="select" label="Input Type" >
+                <option value="repeat" >Separate barcode and CMO/HTO reads</option>
+                <option value="list_paired" >Paired collection of barcode and CMO/HTO reads</option>
+            </param>
+            <when value="repeat">
+                <param format="fastq.gz,fastqsanger.gz" name="input1" type="data"  multiple="true"
+                label="RNA-Seq FASTQ file, Barcode reads" />
+                <param format="fastq.gz,fastqsanger.gz" name="input2" type="data"  multiple="true"
+                label="RNA-Seq FASTQ file, HTO/CMO reads"/>
+            </when>
+            <when value="list_paired">
+                <param name="input_collection" collection_type="paired" type="data_collection" format="fastq.gz,fastqsanger.gz" label="Collection of Pairs" />
+            </when>
+        </conditional>
+        <param argument="--tags" type="data" format="csv" label="the CMO/HTO barcodes as well as their respective names" help="first column is the sequence and second column is the name" />
+        <conditional name="params" >
+            <param name="chemistry" type="select" label="Configure Chemistry Options">
+                <option value="v2" selected="true">Chromium Chemistry v2</option>
+                <option value="v3">Chromium Chemistry v3</option>
+                <option value="custom">Custom</option>
+            </param>
+            <when value="v2" />
+            <when value="v3" />
+            <when value="custom" >
+                <param argument="--cell_barcode_first_base" type="integer" min="1" value="1" label="Cell Barcode First Base" />
+                <param argument="--cell_barcode_last_base" type="integer" min="1" value="16" label="Cell Barcode Last Base" />
+                <param argument="--umi_first_base" type="integer" min="1" value="17" label="UMI First Base" />
+                <param argument="--umi_last_base" type="integer" min="1" value="26" label="UMI Last Base" />
+            </when>
+        </conditional>
+        <param argument="--bc_collapsing_dist" type="integer" min="0" value="1" label="How many errors are allowed between two cell barcodes to collapse them onto one cell." />
+        <param argument="--umi_collapsing_dist" type="integer" min="0" value="2" label="How many errors are allowed between two umi within the same cell and TAG to collapse." />
+        <param argument="--no_umi_correction" type="boolean" truevalue="--no_umi_correction" falsevalue="" checked="false" label="Deactivate UMI correction" />
+        <param argument="--expected_cells" type="integer" min="1" value="3000" label="How many cells you expect in your run" />
+        <param argument="--whitelist" type="data" format="txt,csv,tsv" label="Whitelist of cell barcodes" optional="true"/>
+        <param name="max_error" type="integer" min="0" value="2" label="Maximum Levenshtein distance allowed for CMO/HTO."/>
+        <param name="start_trim" type="integer" min="0" value="0" label="How many bases should be trimmed on CMO/HTO read before starting to map"/>
+        <param name="sliding_window" type="boolean" truevalue="--sliding-window" falsevalue="" checked="false" label="Activate sliding window alignement."/>
+        <param argument="--dense" type="boolean" truevalue="--dense" falsevalue="" checked="false" label="Output in tsv format (in addition to the dense format)"/>
+        <param argument="--first_n" type="integer" value="0" label="Select N reads to run on instead of all." />
+        <conditional name="unmapped">
+            <param name="output" type="select" label="Write table of unknown TAGs to file.">
+                <option value="false">No</option>
+                <option value="true">Yes</option>
+            </param>
+            <when value="false"/>
+            <when value="true">
+            <param name="unknown_top_tags" type="integer" min="1" value="100" label="Top n unmapped TAGs to output."/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="report" format="yaml" label="${tool.name} on ${on_string}: report" from_work_dir="Results/run_report.yaml" />
+        <data format="tsv" name="output_features" label="${tool.name} on ${on_string}: Features raw"
+            from_work_dir="Results/read_count/features.tsv" />
+        <data format="tsv" name="output_barcodes" label="${tool.name} on ${on_string}: Barcodes raw"
+            from_work_dir="Results/read_count/barcodes.tsv" />
+        <data format="mtx" name="output_matrix" label="${tool.name} on ${on_string}: Matrix Features Counts raw"
+            from_work_dir="Results/read_count/matrix.mtx" />
+        <data format="tsv" name="output_features_filtered" label="${tool.name} on ${on_string}: Features filtered"
+            from_work_dir="Results/umi_count/features.tsv" />
+        <data format="tsv" name="output_barcodes_filtered" label="${tool.name} on ${on_string}: Barcodes filtered"
+                from_work_dir="Results/umi_count/barcodes.tsv" />
+        <data format="mtx" name="output_matrix_filtered" label="${tool.name} on ${on_string}: Matrix Features Counts filtered (UMI)"
+            from_work_dir="Results/umi_count/matrix.mtx" />
+        <data format="tsv" name="dense_output_matrix" label="${tool.name} on ${on_string}: Dense Matrix With UMI"
+            from_work_dir="Results/dense_umis.tsv" >
+            <filter>dense</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="7">
+            <!-- test 1 -->
+            <param name="tags" value="tags.csv"/>
+            <param name="expected_cells" value="2"/>
+            <param name="whitelist" value="whitelist.txt"/>
+            <conditional name="input_types">
+                <param name="use" value="repeat"/>
+                <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/>
+                <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <conditional name="params" >
+                <param name="chemistry" value="v2"/>
+            </conditional>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/>
+                    <has_line line="Reads processed: 200"/>
+                    <has_line line="Uncorrected cells: 0"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+43"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes_filtered">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features_filtered">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+36"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="8">
+            <!-- test 2 -->
+            <!-- change input style use dense no whitelist-->
+            <param name="tags" value="tags.csv"/>
+            <param name="expected_cells" value="2"/>
+            <conditional name="input_types">
+                <param name="use" value="list_paired" />
+                <param name="input_collection" >
+                    <collection type="paired">
+                        <element name="forward" value="correct_R1.fastq.gz" ftype="fastqsanger.gz" />
+                        <element name="reverse" value="correct_R2.fastq.gz" ftype="fastqsanger.gz" />
+                    </collection>
+                </param>
+            </conditional>
+            <conditional name="params" >
+                <param name="chemistry" value="v2"/>
+            </conditional>
+            <param name="dense" value="true"/>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/>
+                    <has_line line="Reads processed: 200"/>
+                    <has_line line="Uncorrected cells: 0"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+            <output name="output_features">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-2]\s+43"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes_filtered">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+            <output name="output_features_filtered">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-2]\s+36"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+            <output name="dense_output_matrix">
+                <assert_contents>
+                    <has_line_matching expression="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC\s+2[79]\s+2[79]"/>
+                    <has_line_matching expression="test1-CGTAGCTCG-CGTAGCTCG\s+[23][56]\s+[23][56]"/>
+                    <has_line_matching expression="unmapped\s+2[17]\s+2[17]"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="7">
+            <!-- test 3 -->
+            <!-- custom chemistry max_error 5 -->
+            <param name="tags" value="tags.csv"/>
+            <param name="expected_cells" value="2"/>
+            <param name="whitelist" value="whitelist.txt"/>
+            <conditional name="input_types">
+                <param name="use" value="repeat"/>
+                <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/>
+                <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <conditional name="params" >
+                <param name="chemistry" value="custom"/>
+                <param name="cell_barcode_first_base" value="1"/>
+                <param name="cell_barcode_last_base" value="16"/>
+                <param name="umi_first_base" value="17"/>
+                <param name="umi_last_base" value="26"/>
+            </conditional>
+            <param name="max_error" value="5"/>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/>
+                    <has_line line="Reads processed: 200"/>
+                    <has_line line="Uncorrected cells: 0"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+68"/>
+                    <has_n_lines n="7"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes_filtered">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features_filtered">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+52"/>
+                    <has_n_lines n="7"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="7">
+            <!-- test 4 -->
+            <!-- incompatible whitelist -->
+            <param name="tags" value="tags.csv"/>
+            <param name="expected_cells" value="2"/>
+            <param name="whitelist" value="incompatible_whitelist.txt"/>
+            <conditional name="input_types">
+                <param name="use" value="repeat"/>
+                <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/>
+                <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <conditional name="params" >
+                <param name="chemistry" value="v2"/>
+            </conditional>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/>
+                    <has_line line="Reads processed: 200"/>
+                    <has_line line="Uncorrected cells: 0"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes">
+                <assert_contents>
+                    <has_line line="AAAAAAAAAAAAAAAA"/>
+                    <has_line line="TTTTTTTTTTTTTTTT"/>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+            <output name="output_features">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes_filtered">
+                <assert_contents>
+                    <has_line line="AAAAAAAAAAAAAAAA"/>
+                    <has_line line="TTTTTTTTTTTTTTTT"/>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+            <output name="output_features_filtered">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="7">
+            <!-- test 5 -->
+            <!-- umi_collapsing_dist 5 -->
+            <param name="tags" value="tags.csv"/>
+            <param name="expected_cells" value="2"/>
+            <param name="whitelist" value="whitelist.txt"/>
+            <conditional name="input_types">
+                <param name="use" value="repeat"/>
+                <param name="input1" value="correct_R1.fastq.gz" ftype="fastqsanger.gz"/>
+                <param name="input2" value="correct_R2.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <conditional name="params" >
+                <param name="chemistry" value="v2"/>
+            </conditional>
+            <param name="umi_collapsing_dist" value="5"/>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/>
+                    <has_line line="Reads processed: 200"/>
+                    <has_line line="Uncorrected cells: 0"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+43"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes_filtered">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features_filtered">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+10"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="7">
+            <!-- test 6 -->
+            <!-- multiple fastqs -->
+            <param name="tags" value="tags.csv"/>
+            <param name="expected_cells" value="2"/>
+            <param name="whitelist" value="whitelist.txt"/>
+            <conditional name="input_types">
+                <param name="use" value="repeat"/>
+                <param name="input1" value="correct_R1.fastq.gz,correct_R1.fastq.gz" ftype="fastqsanger.gz"/>
+                <param name="input2" value="correct_R2.fastq.gz,correct_R2.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <conditional name="params" >
+                <param name="chemistry" value="v2"/>
+            </conditional>
+            <output name="report">
+                <assert_contents>
+                    <has_line line="CITE-seq-Count Version: @TOOL_VERSION@"/>
+                    <has_line line="Reads processed: 400"/>
+                    <has_line line="Uncorrected cells: 0"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+86"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+            <output name="output_barcodes_filtered">
+                <assert_contents>
+                    <has_line line="TAGAGGGAAGTCAAGC"/>
+                    <has_line line="TACATATTCTTTACTG"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+            <output name="output_features_filtered">
+                <assert_contents>
+                    <has_line line="test2-CGTACGTAGCCTAGC-CGTACGTAGCCTAGC"/>
+                    <has_line line="test1-CGTAGCTCG-CGTAGCTCG"/>
+                    <has_line line="unmapped"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+            <output name="output_matrix_filtered">
+                <assert_contents>
+                    <has_line_matching expression="[1-2]\s+[1-4]\s+36"/>
+                    <has_n_lines n="9"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+CITE-seq-Count is a program that outputs UMI and read counts from raw fastq CITE-seq or hashing data.
+
+Here is an image explaining the expected structure of read1 and read2 from the sequencer:
+
+.. image:: read_structure.png
+   :alt: Read1: --barcode--|--umi--|TTTT and Read2: --CMO/HTO--|AAA
+
+]]></help>
+    <citations>
+        <citation type="doi">10.5281/zenodo.2585469</citation>
+    </citations>
+</tool>
Binary file static/images/read_structure.png has changed
Binary file test-data/correct_R1.fastq.gz has changed
Binary file test-data/correct_R2.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/incompatible_whitelist.txt	Wed Jan 18 16:04:20 2023 +0000
@@ -0,0 +1,2 @@
+AAAAAAAAAAAAAAAA
+TTTTTTTTTTTTTTTT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tags.csv	Wed Jan 18 16:04:20 2023 +0000
@@ -0,0 +1,2 @@
+CGTACGTAGCCTAGC,test2-CGTACGTAGCCTAGC
+CGTAGCTCG,test1-CGTAGCTCG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/whitelist.txt	Wed Jan 18 16:04:20 2023 +0000
@@ -0,0 +1,4 @@
+TACATATTCTTTACTG
+TAGAGGGAAGTCAAGC
+AAAAAAAAAAAAAAAA
+TTTTTTTTTTTTTTTT