diff porechop.xml @ 0:24822689acf8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/porechop commit b8cc84454aedcdbe22d385c1d7067fab599b9090
author iuc
date Tue, 18 Sep 2018 16:24:49 -0400
parents
children 93d623d9979c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/porechop.xml	Tue Sep 18 16:24:49 2018 -0400
@@ -0,0 +1,154 @@
+<tool id="porechop" name="Porechop" version="0.2.3">
+    <description>adapter trimmer for Oxford Nanopore reads</description>
+    <requirements>
+        <requirement type="package" version="0.2.3_seqan2.1.1">porechop</requirement>
+    </requirements>
+    <version_command>porechop --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+porechop
+    -i '$input_file'
+    --format '$format'
+    --barcode_threshold '$barcode_binning_settings.barcode_threshold'
+    --barcode_diff '$barcode_binning_settings.barcode_diff'
+    $barcode_binning_settings.require_two_barcodes
+    $barcode_binning_settings.discard_unassigned
+    --adapter_threshold '$adapter_search_settings.adapter_threshold'
+    --check_reads '$adapter_search_settings.check_reads'
+    --scoring_scheme '$adapter_search_settings.scoring_scheme'
+    --end_size '$end_adapter_settings.end_size'
+    --min_trim_size '$end_adapter_settings.min_trim_size'
+    --extra_end_trim '$end_adapter_settings.extra_end_trim'
+    --end_threshold '$end_adapter_settings.end_threshold'
+    $middle_adapter_settings.no_split
+    $middle_adapter_settings.discard_middle
+    --middle_threshold '$middle_adapter_settings.middle_threshold'
+    --extra_middle_trim_good_side '$middle_adapter_settings.extra_middle_trim_good_side'
+    --extra_middle_trim_bad_side '$middle_adapter_settings.extra_middle_trim_bad_side'
+    --min_split_read_size '$middle_adapter_settings.min_split_read_size'
+    -o 'out.$format'
+
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="fasta,fastq" label="Input FASTA/FASTQ" help="FASTA/FASTQ of input reads" />
+        <param name="format" type="select" label="Output format for the reads" help="Output format for the reads - if auto, the format will be chosen based on the output filename or the input read format">
+            <option selected="True" value="fasta">FASTA</option>
+            <option value="fastq">FASTQ</option>
+            <option value="fasta.gz">FASTA.gz</option>
+            <option value="fastq.gz">FASTQ.gz</option>
+        </param>
+        <section name="barcode_binning_settings" title="Barcode binning settings">
+            <param argument="--barcode_threshold" type="float" min="0" max="100" value="75.0" optional="True" label="Percent identity for binning"
+                   help="A read must have at least this percent identity to a barcode to be binned (default: 75.0)"/>
+            <param argument="--barcode_diff" type="float" min="0" max="100" value="5.0" optional="True" label="Minimum difference in identity"
+                   help="If the difference between a read's best barcode identity and its second-best barcode identity is less than this value, it will not be put in a barcode bin (to exclude cases which are too close to call) (default: 5.0)"/>
+            <param argument="--require_two_barcodes" type="boolean" checked="false" truevalue="--require_two_barcodes" falsevalue="" label="Require two matches for binning"
+                   help="Reads will only be put in barcode bins if they have a strong match for the barcode on both their start and end (default: a read can be binned with a match at its start or end)"/>
+            <param argument="--discard_unassigned" type="boolean" checked="false" truevalue="--discard_unassigned" falsevalue="" label="Discard unassigned reads"
+                   help="Discard unassigned reads (instead of creating a 'none' bin) (default: False)"/>
+        </section>
+        <section name="adapter_search_settings" title="Adapter search settings">
+            <param argument="--adapter_threshold" type="float" min="0" max="100" value="90.0" optional="True" label="Minimum identity"
+                   help="An adapter set has to have at least this percent identity to be labelled as present and trimmed off (0 to 100) (default: 90.0)"/>
+            <param argument="--check_reads" type="integer" min="0" value="10000" optional="True" label="Number of alligned reads"
+                   help="This many reads will be aligned to all possible adapters to determine which adapter sets are present (default: 10000)"/>
+            <param argument="--scoring_scheme" type="text" value="3,-6,-5,-2" label="Scoring scheme"
+                   help="Comma-delimited string of alignment scores: match, mismatch, gap open, gap extend">
+                <validator type="regex" message="Scoring scheme must be comma-separated string of four positive/negative integers">^((-?\d+[,]){3})-?\d+$</validator>
+            </param>
+        </section>
+        <section name="end_adapter_settings" title="End adapter settings">
+            <param argument="--end_size" type="integer" min="0" value="150" optional="True" label="Number of bases"
+                   help="The number of base pairs at each end of the read which will be searched for adapter sequences (default: 150)"/>
+            <param argument="--min_trim_size" type="integer" min="0" value="4" optional="True" label="Minimum trim length"
+                   help="Adapter alignments smaller than this will be ignored (default: 4)"/>
+            <param argument="--extra_end_trim" type="integer" min="0" value="2" optional="True" label="Extra bases trimmed"
+                   help="This many additional bases will be removed next to adapters found at the ends of reads (default: 2)"/>
+            <param argument="--end_threshold" type="float" min="0" max="100" value="75.0" optional="True" label="Minimum percent identity"
+                   help="Adapters at the ends of reads must have at least this percent identity to be removed (0 to 100) (default: 75.0)"/>
+        </section>
+        <section name="middle_adapter_settings" title="Middle adapter settings">
+            <param argument="--no_split" type="boolean" checked="false" truevalue="--no_split" falsevalue="" label="Skip splitting"
+                   help="Skip splitting reads based on middle adapters (default: split reads when an adapter is found in the middle)"/>
+            <param argument="--discard_middle" type="boolean" checked="false" truevalue="--discard_middle" falsevalue="" label="Discard reads with middle adapter"
+                   help="Reads with middle adapters will be discarded (default: reads with middle adapters are split) (required for reads to be used with Nanopolish, this option is on by default when outputting reads into barcode bins)"/>
+            <param argument="--middle_threshold" type="float" min="0" max="100" value="85.0" optional="True" label="Minimum percent identity"
+                   help="Adapters in the middle of reads must have at least this percent identity to be found (0 to 100) (default: 85.0)"/>
+            <param argument="--extra_middle_trim_good_side" type="integer" min="0" value="10" optional="True" label="Extra trimming good side"
+                   help="This many additional bases will be removed next to middle adapters on their 'good' side (default: 10)"/>
+            <param argument="--extra_middle_trim_bad_side" type="integer" min="0" value="100" optional="True" label="Extra trimming bad side"
+                   help="This many additional bases will be removed next to middle adapters on their 'bad' side (default: 100)"/>
+            <param argument="--min_split_read_size" type="integer" min="0" value="1000" optional="True" label="Minimum length reads post-split"
+                   help="Post-split read pieces smaller than this many base pairs will not be outputted (default: 1000)"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="fasta" from_work_dir="out.*" label="${tool.name} on ${on_string}: Trimmed">
+            <change_format>
+                <when input="format" value="fastq" format="fastq"/>
+                <when input="format" value="fasta.gz" format="fasta.gz"/>
+                <when input="format" value="fastq.gz" format="fastq.gz"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
+            <param name="format" value="fasta"/>
+            <output name="outfile" ftype="fasta" file="out.fasta"/>
+        </test>
+        <test>
+            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
+            <param name="format" value="fastq"/>
+            <output name="outfile" ftype="fastq" file="out.fastq"/>
+        </test>
+        <test>
+            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
+            <param name="format" value="fasta.gz"/>
+            <output name="outfile" ftype="fasta.gz" file="out.fasta.gz" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
+            <param name="format" value="fastq.gz"/>
+            <output name="outfile" ftype="fastq.gz" file="out.fastq.gz" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="input_file" ftype="fasta" value="test_format.fasta"/>
+            <param name="format" value="fasta"/>
+            <param name="barcode_threshold" value="70"/>
+            <param name="barcode_diff" value="4"/>
+            <param name="require_two_barcodes" value="True"/>
+            <param name="discard_unassigned" value="True"/>
+            <param name="end_size" value="100"/>
+            <param name="min_trim_size" value="2"/>
+            <param name="extra_end_trim" value="1"/>
+            <param name="end_threshold" value="80"/>
+            <param name="discard_middle" value="True"/>
+            <param name="middle_threshold" value="90"/>
+            <param name="extra_middle_trim_good_size" value="3"/>
+            <param name="extra_middle_trim_bad_size" value="30"/>
+            <param name="min_split_read_size" value="1500"/>
+            <output name="outfile" ftype="fasta" file="out_advanced.fasta"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Porechop is a tool for finding and removing adapters from Oxford Nanopore reads.
+Adapters on the ends of reads are trimmed off, and when a read has an adapter in
+its middle, it is treated as chimeric and chopped into separate reads. Porechop
+performs thorough alignments to effectively find adapters, even at low sequence identity.
+
+Porechop also supports demultiplexing of Nanopore reads that were barcoded with
+the Native Barcoding Kit, PCR Barcoding Kit or Rapid Barcoding Kit.
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @misc{rrwick2017,
+                author = {Wick, Ryan},
+                year = {2017},
+                title = {Porechop},
+                publisher = {GitHub},
+                journal = {GitHub repository},
+                url = {https://github.com/rrwick/Porechop},
+            }
+        </citation>
+    </citations>
+</tool>