comparison fastp.xml @ 24:f875da9d433c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastp commit 3214ce465671de3c15da94f71f2c3558f332d39a
author iuc
date Sun, 19 Oct 2025 07:27:04 +0000
parents 1c183b0a6cfd
children
comparison
equal deleted inserted replaced
23:1c183b0a6cfd 24:f875da9d433c
1 <tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy2" profile="23.1"> 1 <tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1">
2 <description>fast all-in-one preprocessing for FASTQ files</description> 2 <description>fast all-in-one preprocessing for FASTQ files</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="biotools" /> 6 <expand macro="biotools" />
144 144
145 $filter_options.low_complexity_filter.enable_low_complexity_filter 145 $filter_options.low_complexity_filter.enable_low_complexity_filter
146 146
147 #if str($filter_options.low_complexity_filter.complexity_threshold): 147 #if str($filter_options.low_complexity_filter.complexity_threshold):
148 -Y $filter_options.low_complexity_filter.complexity_threshold 148 -Y $filter_options.low_complexity_filter.complexity_threshold
149 #end if
150
151
152 ## Duplicate analysis / deduplication
153
154 $duplicated_reads.handling_options.eval_dups
155 #if not str($duplicated_reads.handling_options.eval_dups):
156 $duplicated_reads.handling_options.dedup
149 #end if 157 #end if
150 158
151 159
152 ## Read Modification Options 160 ## Read Modification Options
153 161
271 <section name="low_complexity_filter" title="Low complexity filtering options" expanded="True"> 279 <section name="low_complexity_filter" title="Low complexity filtering options" expanded="True">
272 <param name="enable_low_complexity_filter" argument="-y" type="boolean" truevalue="-y" falsevalue="" checked="false" label="Enable low complexity filter" help="The complexity is defined as the percentage of base that is different from its next base, default is No"/> 280 <param name="enable_low_complexity_filter" argument="-y" type="boolean" truevalue="-y" falsevalue="" checked="false" label="Enable low complexity filter" help="The complexity is defined as the percentage of base that is different from its next base, default is No"/>
273 <param name="complexity_threshold" argument="-Y" type="integer" optional="true" label="Complexity threshold" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/> 281 <param name="complexity_threshold" argument="-Y" type="integer" optional="true" label="Complexity threshold" help="Threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required."/>
274 </section> 282 </section>
275 </section> 283 </section>
276 284 <section name="duplicated_reads" title="Duplicated Reads Options">
285 <conditional name="handling_options">
286 <param name="eval_dups" type="select" label="Enable duplicated reads analysis" help="If enabled, calculate and report read duplication statistics. Enabling this is also a prerequisite for optional deduplication of reads. Duplicate detection relies exclusively on exact identity between read sequences (both for SE and PE data). It also increases tool memory requirements and running time moderately. NOTE: the default (no duplication analysis) is different from the command-line tool.">
287 <option value="">Enable</option>
288 <option value="--dont_eval_duplication" selected="true">Disable (--dont_eval_duplication)</option>
289 </param>
290 <when value="--dont_eval_duplication" />
291 <when value="">
292 <param argument="--dedup" type="boolean" truevalue="--dedup" falsevalue="" label="Drop duplicate reads/pairs"/>
293 </when>
294 </conditional>
295 </section>
277 <!-- Read Modification Options --> 296 <!-- Read Modification Options -->
278 <section name="read_mod_options" title="Read Modification Options"> 297 <section name="read_mod_options" title="Read Modification Options">
279 <conditional name="polyg_tail_trimming"> 298 <conditional name="polyg_tail_trimming">
280 <param name="trimming_select" type="select" label="PolyG tail trimming" help="This feature is enabled for NextSeq/NovaSeq data by default. NextSeq/NovaSeq data is detected by the machine ID in the FASTQ records."> 299 <param name="trimming_select" type="select" label="PolyG tail trimming" help="This feature is enabled for NextSeq/NovaSeq data by default. NextSeq/NovaSeq data is detected by the machine ID in the FASTQ records.">
281 <option value="" selected="true">Automatic trimming for Illumina NextSeq/NovaSeq data</option> 300 <option value="" selected="true">Automatic trimming for Illumina NextSeq/NovaSeq data</option>
310 <param argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/> 329 <param argument="--umi_prefix" type="text" optional="true" label="UMI prefix" help="If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default."/>
311 </section> 330 </section>
312 331
313 <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True"> 332 <section name="cutting_by_quality_options" title="Per read cutting by quality options" expanded="True">
314 <conditional name="cut_front_select"> 333 <conditional name="cut_front_select">
315 <param argument="--cut_front" type="select" truevalue="--cut_front" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'), default is disabled (WARNING: this will interfere deduplication for both PE/SE data)."> 334 <param argument="--cut_front" type="select" truevalue="--cut_front" falsevalue="" checked="false" label="Cut by quality in front (5')" help="Enable per read cutting by quality in front (5'). (WARNING: this will interfere with deduplication of both PE/SE data if performed with downstream tools.)">
316 <option value="--cut_front">Yes</option> 335 <option value="--cut_front">Yes</option>
317 <option value="" selected="true">No</option> 336 <option value="" selected="true">No</option>
318 </param> 337 </param>
319 <when value="--cut_front"> 338 <when value="--cut_front">
320 <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut front" help="The size of the sliding window for sliding window trimming."/> 339 <param argument="--cut_front_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut front" help="The size of the sliding window for sliding window trimming."/>
322 </when> 341 </when>
323 <when value=""> 342 <when value="">
324 </when> 343 </when>
325 </conditional> 344 </conditional>
326 <conditional name="cut_tail_select"> 345 <conditional name="cut_tail_select">
327 <param argument="--cut_tail" type="select" truevalue="--cut_tail" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'), default is disabled (WARNING: this will interfere deduplication for SE data)."> 346 <param argument="--cut_tail" type="select" truevalue="--cut_tail" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Enable per read cutting by quality in tail (3'). (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
328 <option value="--cut_tail">Yes</option> 347 <option value="--cut_tail">Yes</option>
329 <option value="" selected="true">No</option> 348 <option value="" selected="true">No</option>
330 </param> 349 </param>
331 <when value="--cut_tail"> 350 <when value="--cut_tail">
332 <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut tail" help="The size of the sliding window for sliding window trimming."/> 351 <param argument="--cut_tail_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut tail" help="The size of the sliding window for sliding window trimming."/>
334 </when> 353 </when>
335 <when value=""> 354 <when value="">
336 </when> 355 </when>
337 </conditional> 356 </conditional>
338 <conditional name="cut_right_select"> 357 <conditional name="cut_right_select">
339 <param argument="--cut_right" type="select" truevalue="--cut_right" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Move a sliding window from front to tail, if meet one window with mean quality &lt; threshold, drop the bases in the window and the right part, and then stop."> 358 <param argument="--cut_right" type="select" truevalue="--cut_right" falsevalue="" checked="false" label="Cut by quality in tail (3')" help="Move a sliding window from front to tail, if meet one window with mean quality &lt; threshold, drop the bases in the window and the right part, and then stop. (WARNING: this will interfere with deduplication of SE data if performed with downstream tools.)">
340 <option value="--cut_right">Yes</option> 359 <option value="--cut_right">Yes</option>
341 <option value="" selected="true">No</option> 360 <option value="" selected="true">No</option>
342 </param> 361 </param>
343 <when value="--cut_right"> 362 <when value="--cut_right">
344 <param argument="--cut_right_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut right" help="The size of the sliding window for sliding window trimming."/> 363 <param argument="--cut_right_window_size" type="integer" optional="true" value="4" min="1" max="1000" label="Cutting window size for cut right" help="The size of the sliding window for sliding window trimming."/>
394 </conditional> 413 </conditional>
395 <output name="out1" ftype="fastqsanger" file="out1.fq"/> 414 <output name="out1" ftype="fastqsanger" file="out1.fq"/>
396 <output name="report_html"> 415 <output name="report_html">
397 <assert_contents> 416 <assert_contents>
398 <has_text text="fastp report"/> 417 <has_text text="fastp report"/>
418 <not_has_text text="duplication rate:"/>
399 </assert_contents> 419 </assert_contents>
400 </output> 420 </output>
401 <output name="report_json"> 421 <output name="report_json">
402 <assert_contents> 422 <assert_contents>
403 <has_text text="fastp report"/> 423 <has_text text="fastp report"/>
424 <not_has_text text="&quot;duplication&quot;:"/>
404 </assert_contents> 425 </assert_contents>
405 </output> 426 </output>
406 </test> 427 </test>
407 <!-- 2. Ensure paired collection works --> 428 <!-- 2. Ensure paired collection works -->
408 <test expect_num_outputs="4"> 429 <test expect_num_outputs="4">
419 <param name="report_json" value="False" /> 440 <param name="report_json" value="False" />
420 </section> 441 </section>
421 <output name="report_html"> 442 <output name="report_html">
422 <assert_contents> 443 <assert_contents>
423 <has_text text="fastp report"/> 444 <has_text text="fastp report"/>
445 <not_has_text text="duplication rate:"/>
424 </assert_contents> 446 </assert_contents>
425 </output> 447 </output>
426 <output_collection name="output_paired_coll" type="paired"> 448 <output_collection name="output_paired_coll" type="paired">
427 <element name="forward" value="out_bwa1.fq" ftype="fastqsanger"/> 449 <element name="forward" value="out_bwa1.fq" ftype="fastqsanger"/>
428 <element name="reverse" value="out_bwa2.fq" ftype="fastqsanger"/> 450 <element name="reverse" value="out_bwa2.fq" ftype="fastqsanger"/>
530 <assert_contents> 552 <assert_contents>
531 <has_text text="fastp report"/> 553 <has_text text="fastp report"/>
532 </assert_contents> 554 </assert_contents>
533 </output> 555 </output>
534 </test> 556 </test>
535 <!-- 8. Ensure JSON report output works --> 557 <!-- 8. Ensure enabling duplicate analysis works -->
536 <test expect_num_outputs="2"> 558 <test expect_num_outputs="3">
537 <conditional name="single_paired"> 559 <conditional name="single_paired">
538 <param name="single_paired_selector" value="single"/> 560 <param name="single_paired_selector" value="single"/>
539 <param name="in1" ftype="fastqsanger" value="R1.fq"/> 561 <param name="in1" ftype="fastqsanger" value="R1.fq"/>
540 </conditional> 562 </conditional>
541 <section name="output_options"> 563 <section name="duplicated_reads">
542 <param name="report_html" value="False"/> 564 <conditional name="handling_options">
565 <param name="eval_dups" value=""/>
566 </conditional>
543 </section> 567 </section>
544 <output name="out1" ftype="fastqsanger" file="out1.fq"/> 568 <output name="out1" ftype="fastqsanger" file="out1.fq"/>
569 <output name="report_html">
570 <assert_contents>
571 <has_text text="fastp report"/>
572 <has_text text="duplication rate:"/>
573 </assert_contents>
574 </output>
545 <output name="report_json"> 575 <output name="report_json">
546 <assert_contents> 576 <assert_contents>
547 <has_text text="fastp report"/> 577 <has_text text="fastp report"/>
578 <has_text text="&quot;duplication&quot;:"/>
548 </assert_contents> 579 </assert_contents>
549 </output> 580 </output>
550 </test> 581 </test>
551 <!-- 9. Ensure polyG trimming works --> 582 <!-- 9. Ensure polyG trimming works -->
552 <test expect_num_outputs="3"> 583 <test expect_num_outputs="3">
790 <has_text text="--cut_right_window_size 6"/> 821 <has_text text="--cut_right_window_size 6"/>
791 <has_text text="--cut_right_mean_quality 7"/> 822 <has_text text="--cut_right_mean_quality 7"/>
792 </assert_contents> 823 </assert_contents>
793 </output> 824 </output>
794 </test> 825 </test>
826 <!-- 18. Ensure deduplication works -->
827 <test expect_num_outputs="2">
828 <conditional name="single_paired">
829 <param name="single_paired_selector" value="single"/>
830 <param name="in1" ftype="fastqsanger" value="R1_with_dup.fq"/>
831 </conditional>
832 <section name="duplicated_reads">
833 <conditional name="handling_options">
834 <param name="eval_dups" value=""/>
835 <param name="dedup" value="true"/>
836 </conditional>
837 </section>
838 <section name="output_options">
839 <param name="report_html" value="false"/>
840 </section>
841 <output name="out1" ftype="fastqsanger" file="out1.fq"/>
842 <output name="report_json">
843 <assert_contents>
844 <has_text text="fastp report"/>
845 <has_text text="&quot;duplication&quot;:"/>
846 </assert_contents>
847 </output>
848 </test>
795 </tests> 849 </tests>
796 <help><![CDATA[ 850 <help><![CDATA[
797 .. class:: infomark 851 .. class:: infomark
798 852
799 **What it does** 853 **What it does**
801 fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to 855 fastp_ is a tool designed to provide fast all-in-one preprocessing for FASTQ files. This tool is developed in C++ with multithreading supported to
802 afford high performance. 856 afford high performance.
803 857
804 *Features* 858 *Features*
805 859
806 1. Filter out bad reads (too low quality, too short, or too many N...) 860 1. Filter out bad (too low quality, too short, or too many N...) and/or duplicate reads
807 861
808 2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster) 862 2. Cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster)
809 863
810 3. Trim all reads in front and tail 864 3. Trim all reads in front and tail
811 865