Mercurial > repos > galaxy-australia > pbccs
changeset 0:f159c3da2ffc draft default tip
"planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/pbccs commit 5ba0af2de1e1ce7b7a8ed3eb4042f5fec366c234"
author | galaxy-australia |
---|---|
date | Fri, 18 Mar 2022 02:10:52 +0000 |
parents | |
children | |
files | README.md macros.xml pbccs.xml test-data/input.subreads.bam test-data/log.txt test-data/output.bam test-data/report.txt |
diffstat | 7 files changed, 207 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Fri Mar 18 02:10:52 2022 +0000 @@ -0,0 +1,7 @@ +ccs combines multiple subreads of the same SMRTbell molecule using a statistical model to produce one highly accurate consensus sequence, also called a HiFi read, along with base quality values. This tool powers the Circular Consensus Sequencing workflow in SMRT Link. + +To process the old version of bam generated by the RSII chemistry of PacBio. This Galaxy wrapper is using ccs version 3.4.1. + +The latest version of ccs (pbccs) is 6.2.0 and can be found on [conda](https://anaconda.org/bioconda/pbccs) + +See [doc](https://ccs.how/) for more details.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Mar 18 02:10:52 2022 +0000 @@ -0,0 +1,29 @@ + +<macros> + <token name="@TOOL_VERSION@">3.4.1</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">pbccs</requirement> + </requirements> + </xml> + <xml name="version_command"> + <version_command>@HEADLESS@ ccs --version</version_command> + </xml> + <token name="@HEADLESS@"><![CDATA[export QT_QPA_PLATFORM='offscreen' &&]]></token> + <token name="@PBCCS_OVERVIEW@"> + +**Generate Highly Accurate Single-Molecule Consensus Reads (CCS) Overview** + +CCS combines multiple subreads of the same SMRTbell molecule using a statistical model to produce one highly accurate consensus sequence, also called a HiFi read, along with base quality values. This tool powers the Circular Consensus Sequencing workflow in SMRT Link. + + +CCS works with PacBio subreads bam (subreads.bam) files. For more information about this file format, see here_ + +.. _here: https://ccs.how/how-does-ccs-work.html + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.1016/j.gpb.2015.08.002</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pbccs.xml Fri Mar 18 02:10:52 2022 +0000 @@ -0,0 +1,143 @@ +<tool id="pbccs" name="CCS" version="0.1.0" python_template_version="3.5"> + <description>Generate accurate consensus sequences from subreads</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"> <![CDATA[ + mkdir -p ./tmp && + cp '$input_file' ./tmp/input.bam && + cd ./tmp/ && + ccs + -j 4 + #if $input_filter_options.minLength: + --minLength '$input_filter_options.minLength' + #end if + #if $input_filter_options.maxLength: + --maxLength '$input_filter_options.maxLength' + #end if + #if $input_filter_options.minPasses: + --minPasses '$input_filter_options.minPasses' + #end if + #if $input_filter_options.minIdentity: + --minIdentity '$input_filter_options.minIdentity' + #end if + #if $input_filter_options.zmws: + --zmws '$input_filter_options.zmws' + #end if + #if $model_override_options.modelPath: + --modelPath '$model_override_options.modelPath' + #end if + #if $model_override_options.modelSpec: + --modelSpec '$model_override_options.modelSpec' + #end if + #if $output_filter_options.minPredictedAccuracy: + --minPredictedAccuracy '$output_filter_options.minPredictedAccuracy' + #end if + #if $output_filter_options.minReadScore: + --minReadScore '$output_filter_options.minReadScore' + #end if + #if $output_filter_options.maxDropFraction: + --maxDropFraction '$output_filter_options.maxDropFraction' + #end if + #if $processing_options.byStrand: + --byStrand '$processing_options.byStrand' + #end if + #if $processing_options.noPolish: + --noPolish '$processing_options.noPolish' + #end if + #if $processing_options.richQVs: + --richQVs '$processing_options.richQVs' + #end if + #if $if_log.log_options == "yes": + --logFile '$log_output' + --logLevel '$if_log.loglevel' + #end if + #if $output_options.reportFile: + --reportFile ccs_report.txt + #end if + input.bam + output.bam + && mv output.bam '$bam_output' + && mv ccs_report.txt '$report_output' + 2>&1 + + ]]></command> + <inputs> + <param name="input_file" type="data" format="bam" label="Input Subreads" help="Subreads (subreads.bam)"/> + <section name="input_filter_options" title="Input Filter Options" expanded="False"> + <param argument="--minLength" type="integer" value="10" label="Minimum length" help="Minimum draft length before polishing. [10]" /> + <param argument="--maxLength" type="integer" value="21000" label="Maximum length" help="Maximum draft length before polishing. [21000]"/> + <param argument="--minPasses" type="integer" value="3" label="Minimum number of subreads" help="Minimum number of subreads required to generate CCS. [3]" /> + <param argument="--minIdentity" type="float" value="0.82" label="top N passes" help="Minimum identity of a subread aligned to the draft consensus to use it for polishing. 0 disables this filter. [0.82]" /> + <param argument="--minSnr" type="float" value="2.5" label="Minimum SNR of subreads" help="Minimum SNR of subreads to use for generating CCS [2.5]"/> + <param argument="--zmws" type="text" value="all" label="Generate CCS for the provided comma-separated hole number ranges only." help="Default=all" /> + </section> + <section name="model_override_options" title="Model Override Options" expanded="False"> + <param argument="--modelPath" type="data" format="txt" optional="True" label="Model File" help="a model file or directory containing model files." /> + <param argument="--modelSpec" type="text" value="" optional="True" label="Chemistry" help="Name of chemistry or model to use, overriding default selection."/> + </section> + <section name="processing_options" title="Processing Options" expanded="False"> + <param argument="--byStrand" type="boolean" truevalue="--byStrand" falsevalue="" checked="false" label="Generate a consensus for each strand." /> + <param argument="--noPolish" type="boolean" truevalue="--noPolish" falsevalue="" checked="false" label="Only output the initial template derived from the POA (faster, less accurate)." /> + <param argument="--richQVs" type="boolean" truevalue="--richQVs" falsevalue="" checked="false" label="Emit dq, iq, and sq rich quality tracks." /> + </section> + <section name="output_filter_options" title="Output Flter Options" expanded="False"> + <param argument="--minPredictedAccuracy" type="float" value="0.9" label="Mininum predicted accurary" help="i.e 0.9"/> + <param argument="--minReadScore" type="float" value="0.75" label="Minimum read score of input subreads" help="i.e 0.75"/> + <param argument="--maxDropFraction" type="float" value="0.34" label="Maximum fraction of subreads dropped by polishing (not input filters) before skipping ZMW." help="i.e 0.34" /> + </section> + <section name="output_options" title="Output Files Options" expanded="False"> + <param argument="--reportFile" type="boolean" truevalue="True" falsevalue="False" checked="False" label="write the results report."/> + </section> + <conditional name="if_log"> + <param type="select" name="log_options" label="show log option"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"> + <param argument="--logLevel" name="loglevel" type="select" label="Log Level"> + <option selected="True" value="WARN">WARN</option> + <option value="TRACE">TRACE</option> + <option value="DEBUG">DEBUG</option> + <option value="INFO">INFO</option> + <option value="FATAL">FATAL</option> + </param> + </when> + <when value="no"> + </when> + </conditional> + </inputs> + <outputs> + <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: CCS reads in BAM format"/> + <data format="txt" name="report_output" label="${tool.name} on ${on_string}: CCS reads report"/> + <data format="txt" name="log_output" label="${tool.name} on ${on_string}: log" > + <filter>if_log['log_options'] == 'yes' </filter> + </data> + </outputs> + <tests> + <test><!-- test with default settings --> + <param name="input_file" ftype="bam" value="input.subreads.bam"/> + <output name="bam_output" ftype="bam" file="output.bam" /> + <output name="log_output" ftype="txt" file="log.txt" /> + <output name="report_output" ftype="txt" file="report.txt" /> + </test> + </tests> + <help><![CDATA[ +@PBCCS_OVERVIEW@ + +For detailed information on psp-gen, click here_, or view the license_. + +.. _here: https://github.com/nlhepler/pbccs +.. _license: https://github.com/nlhepler/pbccs/blob/master/LICENSE + +**Command Documentation** + +CCS will generate consensus sequence. + + + ]]></help> + <expand macro="citations"/> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/log.txt Fri Mar 18 02:10:52 2022 +0000 @@ -0,0 +1,14 @@ +ZMW Yield +Success (without retry) -- CCS generated,2,100.00% +Success (with retry) -- CCS generated,0,0.00% +Failed -- Below SNR threshold,0,0.00% +Failed -- No usable subreads,0,0.00% +Failed -- Insert size too long,0,0.00% +Failed -- Insert size too small,0,0.00% +Failed -- Not enough full passes,0,0.00% +Failed -- Too many unusable subreads,0,0.00% +Failed -- CCS did not converge,0,0.00% +Failed -- CCS below minimum predicted accuracy,0,0.00% +Failed -- Unknown error during processing,0,0.00% + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/report.txt Fri Mar 18 02:10:52 2022 +0000 @@ -0,0 +1,14 @@ +ZMW Yield +Success (without retry) -- CCS generated,2,100.00% +Success (with retry) -- CCS generated,0,0.00% +Failed -- Below SNR threshold,0,0.00% +Failed -- No usable subreads,0,0.00% +Failed -- Insert size too long,0,0.00% +Failed -- Insert size too small,0,0.00% +Failed -- Not enough full passes,0,0.00% +Failed -- Too many unusable subreads,0,0.00% +Failed -- CCS did not converge,0,0.00% +Failed -- CCS below minimum predicted accuracy,0,0.00% +Failed -- Unknown error during processing,0,0.00% + +