view pysradb.xml @ 2:b91e1e70f3a1 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pysradb commit 2d911e00c68a1be27d3627acd5020ee12378919b
author iuc
date Mon, 23 Jan 2023 16:10:06 +0000
parents f63cf0adfd87
children
line wrap: on
line source

<tool id='pysradb_search' name='pysradb search' version='@TOOL_VERSION@+galaxy@SUFFIX_VERSION@' profile='20.01'>
    <description>sequence metadata from SRA/ENA</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro='requirements' />
    <command detect_errors='exit_code'><![CDATA[
    pysradb search
    --db $database
    #if $conditional_platform.instrument
        --query '${conditional_platform.instrument} ${query}'
    #else
        --query '${query}'
    #end if
    --source '${conditional_mode.source}'
    #if $conditional_mode.selector == 'false'
        --organism '${conditional_mode.organism}'
    #end if
    #if $conditional_platform.platform == 'illumina'
        #if $conditional_platform.layout
            --layout $conditional_platform.layout
        #end if
    #end if
    --platform '$conditional_platform.platform'
    #if $selection
        --selection '${selection}'
    #end if
    #if $strategy
        --strategy '${strategy}'
    #end if

    ## Optional fields
    --max $output_options.max
    #if $advanced.mbases
        --mbases $advanced.mbases
    #end if
    #if $advanced.accession
        --accession '${advanced.accession}'
    #end if
    #if $advanced.publication_date
        --publication-date '${advanced.publication_date}'
    #end if
    --verbosity $output_options.verbosity
    #if 'stats' in $output_options.output_files
        --stats
    #end if
    #if 'graphs' in $output_options.output_files
        --graphs 'daterange selection basecount'
    #end if
    --saveto 'output.tsv'
    #if 'stats' in $output_options.output_files
        > stats.txt
        #if $output_options.verbosity == '2' or $output_options.verbosity == '3'
            && echo '  Sequencing instrument:' >> stats.txt
            #if $database == 'sra'
                &&  gawk -F '\t' '{print $11}' 'output.tsv' | tail -n +2 | sort | uniq -c | gawk '{ print "    ", $0 }' >> stats.txt
            #else
                &&  gawk -F '\t' '{print $12}' 'output.tsv' | tail -n +2 | sort | uniq -c | gawk '{ print "    ", $0 }' >> stats.txt
            #end if
        #end if
        && echo $'\n  Query keywords: ${query}\n'  >> stats.txt
    #end if
    #if 'graphs' in $output_options.output_files
        && convert 'search_plots/Histogram*.svg' -set filename:fn '%[basename]' 'search_plots/%[filename:fn].jpg'
        && mv 'search_plots/Histogram of Base Count'*.jpg 'search_plots/histogram_base_count.jpg'
        && mv 'search_plots/Histogram of Library'*jpg 'search_plots/histogram_library.jpg'
        && mv 'search_plots/Histogram of Publication'*.jpg 'search_plots/histogram_publication.jpg'
    #end if
    ]]></command>
        <inputs>
            <param name="database" type="select" label="Database to query" 
                help="Sequence Read Archive (SRA) data, is the largest publicly available repository of high throughput sequencing data. The European Nucleotide 
                    Archive (ENA) provides a comprehensive record of nucleotide sequencing information.">
                <option value="sra">SRA: Sequence Read Archive</option>
                <option value="ena">ENA: European Nucleotide Archive</option>
            </param>
            <param argument="--query" type="text" value="" optional="true"
                label="Query keywords/BioProject ID" help="Multiple keywords should be separated by spaces. Example: colorectal cancer">
                <sanitizer invalid_char="">
                    <valid initial="string.letters,string.digits">
                        <add value="_" />
                        <add value="-" />
                        <add value=" " />
                    </valid>
                </sanitizer>
                <validator type="regex">[0-9a-zA-Z_ -]+</validator>
            </param>
            <conditional name="conditional_mode">
                <param name="selector" type="select" label="Metagenomic data" help="Enable this option if you are interested in genetic material recovered directly from environmental or clinical samples. Default: disabled">
                    <option value="false" selected="true">Disabled</option>
                    <option value="true">Enabled</option>
                </param>
                <when value="false">
                    <expand macro="source_macro">
                        <option value="genomic">Genomic</option>
                        <option value="transcriptomic">Transcriptomic</option>
                        <option value="genomic single cell">Genomic single cell</option>
                        <option value="transcriptomic single cell">Transcriptomic single cell</option>
                        <option value="viral rna">Viral RNA</option>
                        <option value="synthetic">Synthetic</option>
                        <option value="other">Other</option>
                    </expand>
                    <param argument="--organism" type="text" value="Homo sapiens" label="Scientific name of the sample organism" help="The scientific name of a species that is set by binomial nomenclature entails two parts: generic name (or genus name) and specific name">
                        <sanitizer invalid_char="">
                            <valid initial="string.letters">
                                <add value=" " />
                            </valid>
                        </sanitizer>
                        <validator type="regex" message="This field is compulsory. Please only use letters or whitespace">[a-zA-Z ]+</validator>
                    </param>
                </when>
                <when value="true">
                    <expand macro="source_macro">
                        <option value="metagenomic">Metagenomic</option>
                        <option value="metatranscriptomic">Metatranscriptomic</option>
                    </expand>
                </when>
            </conditional>
            <conditional name="conditional_platform">
                <param argument="--platform" type="select" label="Platform" help="Sequencing platform used for generating the reads">
                    <option value="illumina">Illumina</option>
                    <option value="oxford nanopore">Oxford Nanopore</option>
                    <option value="pacbio smrt">PacBio SMRT</option>
                </param>
                <when value="illumina">
                    <expand macro="instrument_macro" >
                        <option value="NovaSeq 6000">NovaSeq 6000</option>
                        <option value="HiSeq X Ten">HiSeq X Ten</option>
                        <option value="NextSeq 2000">NextSeq 2000</option>
                        <option value="NextSeq 550">NextSeq 550</option>
                        <option value="NextSeq 500">NextSeq 500</option>
                        <option value="HiSeq 4000">HiSeq 4000</option>
                        <option value="HiSeq 3000">HiSeq 3000</option>
                        <option value="HiSeq 2500">HiSeq 2500</option>
                        <option value="HiSeq 2000">HiSeq 2000</option>
                        <option value="MiSeq">MiSeq</option>
                        <option value="Genome Analyzer IIx">Genome Analyzer IIx</option>
                        <option value="Genome Analyzer">Genome Analyzer</option>
                    </expand>
                    <param argument="--layout" type="select" optional="true" label="Library layout" help="Paired-end reads improve the ability to identify the relative positions of various reads in the 
                        genome, making it much more effective than single-end reading in resolving structural rearrangements such as gene insertions, deletions, or inversions. Note: Only available for the Illumina platform">
                        <option value="single">Single reads</option>
                        <option value="paired">Paired reads</option>
                    </param>
                </when>
                <when value="pacbio smrt">
                    <expand macro="instrument_macro">
                        <option value="PacBio RS">PacBio RS</option>
                        <option value="PacBio RS II">PacBio RS II </option>
                        <option value="Sequel">Sequel</option>
                        <option value="Sequel II">Sequel II</option>

                    </expand>
                </when>
                <when value="oxford nanopore">
                    <expand macro="instrument_macro" >
                        <option value="MinION">MinION</option>
                        <option value="PromethION">PromethION</option>
                        <option value="GridION">GridION</option>
                    </expand>
                </when>
            </conditional>

            <param argument="--strategy" type="select" optional="true" label="Library preparation strategy" help="Sequencing technique intended for the library">
                <option value="amplicon">Amplicon</option>
                <option value="atac seq">ATAC-seq</option>
                <option value="bisulfite seq">Bisulfite-seq</option>
                <option value="chia pet">Chromatin interaction analysis with paired-end tag (ChIA-PET)</option>
                <option value="chip">ChIP</option>
                <option value="chip seq">ChIP-seq</option>
                <option value="cts">Coding transcriptome sequencing (CTS)</option>
                <option value="dnase hypersensitivity">DNase-seq</option>
                <option value="est">EST</option>
                <option value="faire seq">FAIRE-seq</option>
                <option value="fl cdna">FL-cDNA</option>
                <option value="gbs">Genotyping-by-sequencing (GBS)</option>
                <option value="hi c">Hi-C</option>
                <option value="medip seq">Methylated DNA immunoprecipitation sequencing (MeDIP-Seq)</option>
                <option value="mbd seq">Methyl-binding domain sequencing (MBD-seq)</option>
                <option value="mnase seq">Micrococcal nuclease sequencing (MNase-seq)</option>
                <option value="mirna seq">miRNA-seq</option>
                <option value="mre seq">Methylation sensitive restriction enzyme sequencing (MRE-seq)</option>
                <option value="ncrna seq">ncRNA-seq</option>
                <option value="other">Other</option>
                <option value="poolclone">POOLCLONE</option>
                <option value="rad seq">Restriction site-associated DNA sequencing (RAD-seq)</option>
                <option value="rip seq">RNA immunoprecipitation sequencing (RIP-seq)</option>
                <option value="rna seq">RNA-seq</option>
                <option value="selex">Systematic evolution of ligands by exponential enrichment (SELEX)</option>
                <option value="synthetic long read">Synthetic long-read sequencing</option>
                <option value="target capture">Target capture sequencing (TCS)</option>
                <option value="tn seq">Tn-seq</option>
                <option value="wcs">Whole chromosome shotgun (WCS)</option>
                <option value="wga">Whole genome amplification (WGA)</option>
                <option value="wgs">Whole genome sequencing (WGS) </option>
                <option value="wxs">Whole exome sequencing (WXS)</option>
            </param>
            <param argument="--selection" type="select" optional="true" label="Library selection" help="Method used to select and/or enrich the material being sequenced">
                <option value="5 methylcytidine antibody">5-methylcytidine antibody</option>
                <option value="cage">CAGE</option>
                <option value="cdna">cDNA</option>
                <option value="chip">ChIP</option>
                <option value="chip seq">ChIP-seq</option>
                <option value="dnase">DNAse</option>
                <option value="hmpr">Hypomethylated partial restriction (HMPR)</option>
                <option value="hybrid selection">Hybrid selection</option>
                <option value="inverse rrna">Inverse RNA</option>
                <option value="mbd2 protein methyl cpg binding domain">Methylated DNA binding domain protein 2 (MBD2)</option>
                <option value="mda">Multiple displacement amplification (MDA)</option>
                <option value="mf">Mechanical fragmentation (MF)</option>
                <option value="mnase">Micrococcal nuclease (MNase)</option>
                <option value="msII">MSII</option>
                <option value="oligo dt">Oligo(dT)</option>
                <option value="padlock proves capture method">Padlock probe capture</option>
                <option value="pcr">PCR</option>
                <option value="polya">PolyA</option>
                <option value="race">Rapid amplification of cDNA ends (RACE)</option>
                <option value="random">Random</option>
                <option value="random pcr">Random PCR</option>
                <option value="reduced representation">Reduced representation</option>
                <option value="repeat fractionation">Repeat fractionation</option>
                <option value="restriction digest">Restriction digest</option>
                <option value="rt pcr">RT-PCR</option>
                <option value="size fractionation">Size fractionation</option>
            </param>
            <section name="advanced" title="Advanced options">
                <param argument="--accession" type="text" value="" optional="true" label="Accession number" help="Relevant study/experiment/sample/run accession number">
                    <sanitizer invalid_char="">
                        <valid initial="string.letters,string.digits"/>
                    </sanitizer>
                    <validator type="regex">[0-9a-zA-Z]+</validator>
                </param>
                <param argument="--mbases" type="integer" min="0" value="" optional="true" 
                    label="Sample size" help="Size of the sample rounded to the nearest megabase" />
                <param argument="--publication-date" type="text" value="" optional="true" 
                    label="Publication date" help="Publication date of the run in the format dd-mm-yyyy. 
                        If a date range is desired, enter the start date, followed by end date, separated by a colon ':'.
                        Example: 01-01-2010:31-12-2010">
                    <sanitizer invalid_char="">
                        <valid initial="string.digits">
                            <add value=":" />
                            <add value="-" />
                        </valid>
                    </sanitizer>
                    <validator type="regex">[0-9:-]+</validator>
                </param>
            </section>
            <section name="output_options" title="Output options">
                <param argument="--verbosity" type="select" label="Level of search result details" help="Default: 2">
                    <option value="0">0: run accession only</option>
                    <option value="1">1: run accession and experiment title</option>
                    <option value="2" selected="true">2: accession numbers, titles and sequencing information</option>
                    <option value="3">3: all available metadata</option>
                </param>
                <param argument="--max" type="integer" min="1" max="10000" value="100" label="Maximum number of entries" 
                    help="Note: If the maximum number set is large, querying the SRA database will take significantly longer due to API limits" />
                <param name="output_files" type="select" multiple="true" display="checkboxes" label="Additional output files">
                    <option value="stats" selected="true">Statistics for the search query (--stats)</option>
                    <option value="graphs">Generates graphs to illustrate the search result (--graphs)</option>
                </param>
            </section>
        </inputs>
        <outputs>
            <data name="metadata_file" from_work_dir="output.tsv" format="tsv" label="${tool.name} on ${on_string}: metadata"/>
            <collection name="graphs_collection" type="list" label="${tool.name} on ${on_string}: search plots">
                <discover_datasets pattern="(?P&lt;name&gt;.+)\.jpg" format="jpg" directory="search_plots" />
                <filter>'graphs' in output_options['output_files']</filter>
            </collection>
            <data name="stats" from_work_dir="stats.txt" format="txt" label="${tool.name} on ${on_string}: stats">
                <filter>'stats' in output_options['output_files']</filter>
            </data>
        </outputs>
        <tests>
            <!-- Test 01: default options -->
            <test expect_num_outputs="3">
                <param name="database" value="sra"/>
                <param name="query" value="cancer"/>
                <conditional name="conditional_mode">
                    <param name="selector" value="false"/>
                    <param name="organism" value="Homo sapiens"/>
                    <param name="source" value="transcriptomic"/>
                </conditional>
                <section name="output_options">
                    <param name="max" value="100"/>
                    <param name="output_files" value="stats,graphs"/>
                </section>
                <output name="metadata_file" ftype="tsv">
                    <assert_contents>
                        <has_n_lines n="101"/>
                        <has_n_columns n="16"/>
                    </assert_contents>
                </output>
                <output name="stats" ftype="txt">
                    <assert_contents>
                        <has_text text="Statistics for the search query:"/>
                        <has_text text="Query keywords: cancer"/>
                    </assert_contents>
                </output>
                <output_collection name="graphs_collection" type="list" count="3">
                    <element name="histogram_base_count" ftype="jpg">
                        <assert_contents>
                            <has_size value="82507" delta="10000"/>
                        </assert_contents>
                    </element>
                    <element name="histogram_library" ftype="jpg">
                        <assert_contents>
                            <has_size value="49925" delta="10000"/>
                        </assert_contents>
                    </element>
                    <element name="histogram_publication" ftype="jpg">
                        <assert_contents>
                            <has_size value="46059" delta="10000"/>
                        </assert_contents>
                    </element>
                </output_collection>
            </test>
            <!-- Test 02: Specific search options-->
            <test expect_num_outputs="2">
                <param name="database" value="sra"/>
                <conditional name="conditional_platform">
                    <param name="platform" value="illumina"/>
                    <param name="instrument" value="NovaSeq 6000"/>
                </conditional>
                <param name="strategy" value="wga"/>
                <param name="selection" value="pcr"/>
                <conditional name="conditional_mode">
                    <param name="selector" value="false"/>
                    <param name="source" value="genomic single cell"/>
                    <param name="organism" value="Homo sapiens"/>
                </conditional>
                <section name="advanced">
                    <param name="publication_date" value="01-11-2022"/>
                    <param name="accession" value="SRX18108950"/>
                    <param name="verbosity" value="3"/>
                </section>
                <output name="metadata_file" file="test_02.tabular" ftype="tsv"/>
                <output name="stats" file="test_02_stats.txt" ftype="txt"/>
            </test>
            <!-- Test 03: ENA database and verbosity 1-->
            <test expect_num_outputs="2">
                <param name="database" value="ena"/>
                <param name="query" value="cancer"/>
                <param name="strategy" value="wxs"/>
                <param name="selection" value="random"/>
                <section name="output_options">
                    <param name="verbosity" value="1"/>
                    <param name="max" value="50"/>
                </section>
                <conditional name="conditional_mode">
                    <param name="selector" value="false"/>
                    <param name="source" value="genomic"/>
                    <param name="organism" value="Homo sapiens"/>
                </conditional>
                <output name="metadata_file" file="test_03.tabular" ftype="tsv"/>
                <output name="stats" file="test_03.txt" ftype="txt"/>
            </test>
            <!-- Test 04: Test metatranscriptome query -->
            <test expect_num_outputs="2">
                <param name="database" value="sra"/>
                <param name="query" value="escherichia"/>
                <section name="advanced">
                    <param name="mbases" value="100"/>
                </section>
                <conditional name="conditional_mode">
                    <param name="selector" value="true"/>
                    <param name="source" value="metagenomic"/>
                </conditional>
                <output name="metadata_file" file="test_04.tabular" ftype="tsv"/>
                <output name="stats" file="test_04.txt" ftype="txt"/>
            </test>
            <!-- Test 05: Test nanopore data -->
            <test expect_num_outputs="2">
                <param name="database" value="sra"/>
                <param name="query" value="cancer"/>
                <conditional name="conditional_platform">
                    <param name="platform" value="oxford nanopore"/>
                    <param name="instrument" value="MinION"/>
                </conditional>
                <param name="strategy" value="rna seq"/>
                <param name="selection" value="cdna"/>
                <conditional name="conditional_mode">
                    <param name="selector" value="false"/>
                    <param name="source" value="transcriptomic"/>
                    <param name="organism" value="Homo sapiens"/>
                </conditional>
                <output name="metadata_file" file="test_05.tabular" ftype="tsv"/>
                <output name="stats" file="test_05_stats.txt" ftype="txt"/>
            </test>
            <!-- Test 06: Different specie -->
            <test expect_num_outputs="2">
                <param name="database" value="sra"/>
                <param name="query" value="stress"/>
                <conditional name="conditional_platform">
                    <param name="platform" value="illumina"/>
                    <param name="instrument" value="NextSeq 500"/>
                </conditional>
                <param name="strategy" value="rna seq"/>
                <param name="selection" value="random"/>
                <conditional name="conditional_mode">
                    <param name="selector" value="false"/>
                    <param name="source" value="transcriptomic"/>
                    <param name="organism" value="Arabidopsis thaliana"/>
                </conditional>
                <section name="output_options">
                    <param name="verbosity" value="3"/>
                    <param name="max" value="20"/>
                </section>
                <output name="metadata_file" file="test_06.tabular" ftype="tsv"/>
                <output name="stats" file="test_06_stats.txt" ftype="txt"/>
            </test>
        </tests>
        <help><![CDATA[
.. class:: infomark

**Purpose**

pysradb allows to retrieve metadata, such as run accession numbers, from SRA and ENA based on multiple criteria:

- Database:	SRA or ENA
- Query keywords
- Accession number:	a relevant study/experiment/sample/run accession number
- Organism: scientific name of the sample organism
- Library layout: paired or single-end reads
- Sample size: rounded to the nearest megabase
- Publication date
- Sequencing platform: Illumina, Nanopore or PacBio
- Library selection: method used to select and/or enrich the material being sequenced
- Library source: Type of source material that is being sequenced
- Library preparation strategy: sequencing technique intended for the library

------

.. class:: infomark

**Outputs**

pysradb generates three different output types:

- Raw metadata file
- Statistics for the search query
- Graphs to illustrate the search results

------

.. class:: infomark

**Sequencing instruments**

**Comparisons between HiSeq instruments**

HiSeq 3000/4000 provides some improvements with respect the previous model HiSeq 2500:

- HiSeq 3000/4000 genere up to 1.5 Tb and 5 Tb reads per run.
- HiSeq 3000/4000 use patterned flow cell technology originally developed for HiSeq X platforms.
- HiSeq 3000/4000 run 3 times faster and yield 65% more reads per lane. 
- HiSeq 3000/4000 patterned flow cells contain billions of nanowells at fixed, known positions on the flow cell. The structured organization enables clustering at higher densities compared to non-pattern HiSeq designs.

However, the HiSeq 3000/4000 also have some also some limitations with respect to HiSeq 2500:

- HiSeq 3000/4000 are not recommended for low complexity sequencing. Applications such as non-unique amplicons, 16S, are currently not recommended. 
- Libraries with low complexity within the first 25 bases of a read are not expected to produce high quality data.
- Library size restrictions. Libraries that are too long can result in polyclonal clusters that span more than 1 well, these will not pass filter. Smaller libraries will preferentially amplify with Illumina's new kinetic exclusion amplification so tight library distributions ranging from 300-500 bp are recommended.
- Very low tolerance for adapter dimers. Even as little as 1% adapter dimer can take up ~6% of sequencing reads, 10% contamination will take up 84% of reads. Illumina recommends you keep adapter contamination below 0.5% of your entire library.
- Higher duplication rates as compared to HiSeq 2500.
- Low quality read 2 (entire HiSeq 3000 install base is affected).

HiSeq 3000/4000 support DNA-seq, RNA-seq , ChIP-Seq, mate-pair, small RNA and exome library preparation. Any library preparation where there is enough sequence diversity is currently supported. Amplicon, 16S and applications with low sequencing diversity are currently not supported on the HiSeq 3000 / 4000.

HiSeq 2500 is considered the most reliable model according to different sources.

**What type of read quality is expected from the HiSeq 3000/4000 ?**

- 2 x 50bp  ≥85% bases > Q30
- 2 x 75bp  ≥80% bases > Q30
- 2 x 150bp ≥75% of bases >Q30

**What is the difference between MiSeq and HiSeq?**

HiSeq and MiSeq platforms are among the most widely used platform to study microbial communities. But the two platforms differ in the length and amount of reads. 
MiSeq can run 600 cycles to produce 200 million 300 bp reads, on the other hand, HiSeq 2500 can run 500 cycles to produce 120 million 250 bp.

**What are the differences between HiSeq and NovaSeq?**

The Illumina NovaSeq provides a massive upgrade in sequencing throughput compared to the HiSeq 4000. There are more stringent library requirements and requires a 
larger sample size. Due to the vast amount of data produced by the NovaSeq and the known issue of index swapping, unique dual-indexed libraries are required.

**What are the characteristics of HiSeq X instruments?**

- HiSeq X is recommended for whole genome sequencing only (including whole bisulfite sequencing). This means that it is not adequate for RNA-seq, exome, ChIP-seq or small RNA-seq applications. 
- Plant and animal samples can be sequenced on the HiSeq X.
- Expect coverate is over 30x  or approximately 375 million reads per lane by loading one sample per lane.
- Hiseq X Ten generates utilize 2x150 base pair read configurations and has slightly better GC coverage than the HiSeq 2500.

**What are the differences between MiSeq and Nextseq?**

The NextSeq Series of systems delivers the power of high-throughput sequencing with the simplicity of a desktop sequencer. NextSeq instruments represent an improvement when compared with Miseq, despite generating sorter reads (150bp, compared to MiSeq 250bp). NextSeq is recommended in 
the following applications & methods:

- Exome & large panel sequencing (enrichment-based)
- Single-cell profiling (scRNA-Seq, scDNA-Seq, oligo tagging assays)
- Transcriptome sequencing (total RNA-Seq, mRNA-Seq, gene expression profiling)
- Methylation sequencing	
- Metagenomic profiling (shotgun metagenomics, metatranscriptomics)
- Cell-free sequencing & liquid biopsy analysis

Regarding the maximum number of reads per ran, MiSeq can generate 25 million, vs 400 million generated by the Nextseq 550 instrument. MiSeq recommended for sequencing samples of low diversity.

**What are the differences between HiSeq and NextSeq?**

The main technical difference between HiSeq and NextSeq will be the number of dyes each machines use. HiSeq uses traditional color coding with four different dyes, while NextSeq uses two dyes. This does not give any practical differences in terms of the data quality, but the trend in illumina sequencers are more into the direction of reducing the number of dyes.

**What is the difference between Nextseq and NovaSeq?**

The NovaSeq 6000 system offers deep and broad coverage and is recommended for large whole-genome sequencing (human, plant, animal)	projects. It generates 250 bp reads, 
with 20 billion maximum reads per run. NovaSeq 6000 instruments have not application based restrictions.

**Illumina maximum read-length  summary**

- MiSeq: between 300 and 600 bp
- NextSeq: 300 bp
- HiSeq 2500: between 250 and 500 bp (depending of the sofware)
- HiSeq 4000: 150 bp
- HiSeq X: 150 bp

**Nanopore models - single-molecule ultra-long-read sequencing**

Nanopore sequencing provides the longest read lengths, from 500 bp to the current record of 2.3 Mb, with 10-30-kb genomic libraries being common. Even after error correction, sequencing error rates of corrected nanopore reads (1.5-9%) are still higher than those of corrected PacBio reads (<1%). 

**PacBio SMRT instruments - single-molecule long-read low-error rate sequencing**

PacBio Sequel II CLR sequencing represents a major advancement in sequencing throughput over previous PacBio platforms with the production of more sequencing data and longer reads versus RS II and the Sequel I.
The PacBio HiFi sequencing method yields highly accurate long-read sequencing datasets with read lengths averaging 10-25 kb and accuracies greater than 99.5%.


    ]]>    </help>
    <expand macro="citations" />
</tool>