diff sequali.xml @ 0:8e2c32a7c0a9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sequali commit 035d59f19f3a55032be215a3a226c83ad167059c
author iuc
date Thu, 10 Apr 2025 16:42:32 +0000
parents
children abea23542a6b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sequali.xml	Thu Apr 10 16:42:32 2025 +0000
@@ -0,0 +1,242 @@
+<tool id="sequali" name="sequali" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>Fast sequencing data quality metrics for short and long reads</description>
+    <macros>
+        <token name="@TOOL_VERSION@">0.12.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">sequali</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">sequali</requirement>
+    </requirements>
+    <version_command><![CDATA[
+        sequali --version
+        ]]>
+    </version_command>
+    <command detect_errors="exit_code"><![CDATA[
+    #import re
+    #if $input_reads.is_collection:
+        #set $input_1 = re.sub('[^\w\-_\.]', '_', $input_reads[0].element_identifier) 
+        #set $input_2 = re.sub('[^\w\-_\.]', '_', $input_reads[1].element_identifier)
+        ln -s '${input_reads[0]}' ${input_1} &&
+        ln -s '${input_reads[1]}' ${input_2} &&
+    #else if $input_type_selector == 'single':
+        #set $input_1 = re.sub('[^\w\-_\.]', '_', str($input_reads.element_identifier))
+        ln -s '${input_reads}' ${input_1} &&
+        #set $input_2 = ''
+    #else if $input_type_selector == 'paired':
+        #set $input_1 = re.sub('[^\w\-_\.]', '_', str($input_reads.element_identifier))
+        ln -s '${input_reads}' '$input_1' &&
+        #set $input_2 = re.sub('[^\w\-_\.]', '_', str($input_reads_rev.element_identifier))
+        ln -s '${input_reads_rev}' '$input_2' &&
+    #end if
+
+    mkdir -p '${html_report.files_path}' &&
+    sequali
+        #if $adapter:
+            --adapter-file '${adapter}'
+        #end if
+        #if str($overrepresentation_threshold) != '':
+            --overrepresentation-threshold-fraction '${overrepresentation_threshold}'
+        #end if
+        #if str($overrep_min_threshold) != '':
+            --overrepresentation-min-threshold '${overrep_min_threshold}'
+        #end if
+        #if str($overrep_max_threshold) != '':
+            --overrepresentation-max-threshold '${overrep_max_threshold}'
+        #end if
+        #if str($overrep_max_unique_fragments) != '':
+            --overrepresentation-max-unique-fragments '${overrep_max_unique_fragments}'
+        #end if
+        #if str($overrep_fragment_length) != '':
+            --overrepresentation-fragment-length '${overrep_fragment_length}'
+        #end if
+        #if str($overrep_sample_every) != '':
+            --overrepresentation-sample-every '${overrep_sample_every}'
+        #end if
+        #if str($dup_max_stored_fingerprints) != '':
+            --duplication-max-stored-fingerprints '${dup_max_stored_fingerprints}'
+        #end if
+        #if str($fp_front_length) != '':
+            --fingerprint-front-length '${fp_front_length}'
+        #end if
+        #if str($fp_back_length) != '':
+            --fingerprint-back-length '${fp_back_length}'
+        #end if
+        #if str($fp_front_offset) != '':
+            --fingerprint-front-offset '${fp_front_offset}'
+        #end if
+        #if str($fp_back_offset) != '':
+            --fingerprint-back-offset '${fp_back_offset}'
+        #end if
+        --html '$html_report'
+        --json '$json_report'
+        --threads \${GALAXY_SLOTS:-2}
+        '${input_1}'
+        #if $input_2:
+            '${input_2}'
+        #end if
+
+    ]]></command>
+    <inputs>
+        <conditional name="input_type">
+            <param name="input_type_selector" type="select" label="Single file or paired-end reads" help="Select between paired-end reads and single file (fastq, ubam) to process">
+                <option value="single">Single file</option>
+                <option value="paired">Paired-end</option>
+                <option value="paired_collection">Paired Collection</option>
+            </param>
+            <when value="single">
+                <param name="input_reads" type="data" format="fastq,fastq.gz,unsorted.bam" label="Select fastq dataset" help="Specify dataset with single reads (fastq or unaligned bam)."/>
+            </when>
+            <when value="paired">
+                <param name="input_reads" type="data" format="fastq,fastq.gz" label="Forward reads" help="Specify FASTQ(.gz) dataset with forward reads."/>
+                <param name="input_reads_rev" type="data" format="fastq,fastq.gz" label="Reverse reads" help="Specify FASTQ(.gz) dataset with reverse reads."/>
+            </when>
+            <when value="paired_collection">
+                <param name="input_reads" type="data_collection" format="fastq,fastq.gz" label="Collection with paired-end reads" help="Specify collection with paired-end reads."/>
+            </when>
+        </conditional>
+        <param argument="--adapter-file" name="adapter" format="tabular" type="data" optional= "true" label="Adapters to search for" 
+            help="TSV file with header: Name, Sequencing Technology, Probe sequence, sequence position. Default: https://github.com/rhpvorderman/sequali/tree/develop/src/sequali/adapters/adapter_list.tsv"/>
+        <section name="overrepresentation-param" title="Overrepresentation parameters" expanded="False">
+            <param argument="--overrepresentation-threshold-fraction" name="overrepresentation_threshold" type="float" optional="true" min="0" max="1"
+                    label="Fraction to be determined as overrepresented" help="The threshold is calculated as fraction times the number of sampled sequences. Default: 0.001 (1 in 1,000)."/>
+            <param argument="--overrepresentation-min-threshold" name="overrep_min_threshold" type="integer" optional="true" min="0"
+                    label="Minimum occurrences to be considered overrepresented" 
+                    help="The minimum amount of occurrences for a sequence to be considered overrepresented, regardless of the bound set by the threshold fraction. Useful for smaller files. Default: 100."/>
+            <param argument="--overrepresentation-max-threshold" name="overrep_max_threshold" type="integer" optional="true" min="0"
+                    label="Amount of occurrences to be considered overrepresented" 
+                    help="The amount of occurrences for a sequence to be considered overrepresented, regardless of the bound set by the threshold fraction. Useful for very large files. Default: unlimited."/>
+            <param argument="--overrepresentation-max-unique-fragments" name="overrep_max_unique_fragments" type="integer" optional="true" min="0"
+                    label="Maximum amount of unique fragments to store" 
+                    help="Larger amounts increase the sensitivity of finding overrepresented sequences at the cost of increasing memory usage. Default: 5,000,000."/>
+            <param argument="--overrepresentation-fragment-length" name="overrep_fragment_length" type="integer" optional="true" min="3" max="31"
+                    label="Length of fragments to sample" 
+                    help="The maximum is 31. Default: 21."/>
+            <param argument="--overrepresentation-sample-every" name="overrep_sample_every" type="integer" optional="true"
+                    label="Sample one every N sequences" 
+                    help="More sequences sampled leads to better precision, lower speed, and also more bias towards the beginning of the file as the fragment store gets filled up with more sequences from the beginning. Default is N=8, so one in 8 sequences is analysed."/>
+        </section>
+        <section name="duplication-param" title="Duplication parameters" expanded="False">
+            <param argument="--duplication-max-stored-fingerprints" name="dup_max_stored_fingerprints" type="integer" optional="true" min="0"
+                    label="Maximum number of fingerprints stored" 
+                    help="Determines how many fingerprints are maximally stored to estimate the duplication rate. More fingerprints leads to a more accurate estimate, but also more memory usage. Default: 1,000,000."/>
+            <param argument="--fingerprint-front-length" name="fp_front_length" type="integer" optional="true" min="0"
+                    label="Number of bases from the front of the sequence" 
+                    help="Number of bases to be taken for the deduplication fingerprint from the front of the sequence. Default: 8."/>
+            <param argument="--fingerprint-back-length" name="fp_back_length" type="integer" optional="true" min="0"
+                    label="Number of bases from the back of the sequence" 
+                    help="Number of bases to be taken for the deduplication fingerprint from the back of the sequence. Default: 8."/>
+            <param argument="--fingerprint-front-offset" name="fp_front_offset" type="integer" optional="true" min="0"
+                    label="Front offset of the deduplication fingerprint" 
+                    help="Useful for avoiding adapter sequences. Default: 64 for single end, 0 for paired sequences."/>
+            <param argument="--fingerprint-back-offset" name="fp_back_offset" type="integer" optional="true" min="0"
+                    label="Back offset of the deduplication fingerprint" 
+                    help="Useful for avoiding adapter sequences. Default: 64 for single end, 0 for paired sequences."/>
+        </section>     
+    </inputs>
+    <outputs>
+        <data format="html" name="html_report" label="${tool.name} on ${on_string}: HTML report" />
+        <data format="json" name="json_report" label="${tool.name} on ${on_string}: JSON report" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <conditional name="input_type">
+                <param name="input_type_selector" value="single" />
+                <param name="input_reads" value="input_fwd.fastq" />
+            </conditional>
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <has_text text="Sequali report"/>
+                </assert_contents>            
+            </output>
+            <output name="json_report" ftype="json">
+                <assert_contents>
+                    <has_text text="sequali_version"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="input_type">
+                <param name="input_type_selector" value="paired" />
+                <param name="input_reads" value="input_fwd.fastq" />
+                <param name="input_reads_rev" value="input_rev.fastq" />
+            </conditional>
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <has_text text="Sequali report"/>
+                    <has_text text="Filename read 2"/>
+                </assert_contents>            
+            </output>
+            <output name="json_report" ftype="json">
+                <assert_contents>
+                    <has_text text="sequali_version"/>
+                    <has_text text="filename_read2"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="input_type">
+                <param name="input_type_selector" value="single" />
+                <param name="input_reads" value="input_nanopore.fastq" />
+            </conditional>
+            <output name="html_report" ftype="html">
+                <assert_contents>
+                    <has_text text="Sequali report"/>
+                </assert_contents>
+            </output>
+            <output name="json_report" ftype="json">
+                <assert_contents>
+                    <has_text text="sequali_version"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_failure="true">
+            <conditional name="input_type">
+                <param name="input_type_selector" value="single" />
+                <param name="input_reads" value="input_fail.fastq" />
+            </conditional>          
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**Purpose**
+
+Sequali_ is a tool for fast sequencing data quality metrics for short and long reads. 
+
+Features:
+
+- Informative graphs that allow for judging the quality of a sequence at a quick glance.
+- Overrepresentation analysis using 21 bp sequence fragments. Overrepresented sequences are checked against the NCBI univec database.
+- Estimate duplication rate using a fingerprint subsampling technique which is also used in filesystem duplication estimation.
+- Checks for 6 illumina adapter sequences and 17 nanopore adapter sequences for single read data.
+- Determines adapters by overlap analysis for paired read data.
+- Insert size metrics for paired read data.
+- Per tile quality plots for illumina reads.
+- Channel and other plots for nanopore reads.
+
+-----
+
+**Supported formats**
+
+- FASTQ. Only the Sanger variation with a phred offset of 33 and the error rate calculation of 10 ^ (-phred/10) is supported. All sequencers use this format today.
+    - Paired end sequencing data is supported.
+    - For sequences called by illumina base callers an additional plot with the per tile quality will be provided.
+    - For sequences called by guppy additional plots for nanopore specific data will be provided.
+- (unaligned) BAM with single reads. Read-pair information is currently ignored.
+    - For BAM data as delivered by dorado additional nanopore plots will be provided.
+
+-----
+
+**Outputs**
+
+Sequali produces informative HTML report with dynamic plots for each quality metric.
+
+.. _Sequali: https://sequali.readthedocs.io/en/latest/
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioadv/vbaf010</citation>
+    </citations>
+</tool>
\ No newline at end of file