Mercurial > repos > iuc > umi_tools_extract
view umi-tools_extract.xml @ 18:015cf18f4a15 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit be699fdf0360f0535f52564e5b59be9b84712b14
author | iuc |
---|---|
date | Sat, 28 Sep 2024 16:39:41 +0000 |
parents | 7accf7407811 |
children | 7a7e33d28f62 |
line wrap: on
line source
<tool id="umi_tools_extract" name="UMI-tools extract" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Extract UMI from fastq files</description> <expand macro="bio_tools"/> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ @COMMAND_LINK@ umi_tools extract @FASTQ_BARCODE_EXTRACTION_OPTIONS@ #if $input_type_cond.input_type == 'single': #if $gz: --stdin=input_single.gz --stdout out.gz #else --stdin=input_single.txt --stdout '$out' #end if #else: #if $gz: --stdin=input_read1.gz --read2-in=input_read2.gz --stdout out1.gz --read2-out=out2.gz #else: --stdin=input_read1.txt --read2-in=input_read2.txt #if $input_type_cond.input_type == 'paired' --stdout '$out' --read2-out='$out2' #else --stdout '$out_paired_collection.forward' --read2-out='$out_paired_collection.reverse' #end if #end if $input_type_cond.reconcile_pairs #end if #if $whitelist --whitelist='$whitelist' #end if #if $blacklist --blacklist='$blacklist' #end if $error_correct_cell.value #if $quality.quality_selector =='true': #if str($quality.quality_filter_threshold) != '' --quality-filter-threshold '$quality.quality_filter_threshold' #end if #if str($quality.quality_filter_mask) != '' --quality-filter-mask '$quality.quality_filter_mask' #end if #if $input_type_cond.input_type != 'paired_collection' #set input=$input_type_cond.input_read1 #else #set input=$input_type_cond.input_readpair.forward #end if --quality-encoding #if $input.ext.startswith("fastqillumina") phred64 #else if $input.ext.startswith("fastqsolexa") solexa #else phred33 #end if #end if @LOG@ #if $gz: #if $input_type_cond.input_type == 'single': && mv out.gz '$out' #else if $input_type_cond.input_type == 'paired' && mv out1.gz '$out' && mv out2.gz '$out2' #else && mv out1.gz '$out_paired_collection.forward' && mv out2.gz '$out_paired_collection.reverse' #end if #end if ]]></command> <inputs> <expand macro="input_types"> <param argument="--reconcile-pairs" type="boolean" truevalue="--reconcile-pairs" falsevalue="" checked="false" label="Allow unpaired reads" help="Allow the presences of reads in read2 input that are not present in read1 input. This allows cell barcode filtering of read1s without considering read2s" /> </expand> <expand macro="fastq_barcode_extraction_options_macro"/> <param argument="--whitelist" type="data" optional="true" format="txt,tabular,tsv" label="Allowlist of accepted barcodes" /> <param argument="--blacklist" type="data" optional="true" format="txt,tabular,tsv" label="Denylist of accepted barcodes" /> <param argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command" /> <conditional name="quality"> <param name="quality_selector" type="select" label="Enable quality filter?" > <option value="false">No</option> <option value="true">Yes</option> </param> <when value="false"> </when> <when value="true"> <param argument="--quality-filter-threshold" label="Phred score threshold" type="integer" value="" optional="true" help="Remove reads where any UMI base quality score falls below this threshold" /> <param argument="--quality-filter-mask" label="Mask UMI bases below threshold" type="integer" value="" optional="true" help="If a UMI base has a quality below this threshold, replace the base with 'N'" /> </when> </conditional> <expand macro="log_input_macro"/> </inputs> <outputs> <data name="out" format_source="input_read1" label="${tool.name} on ${on_string}: Reads" > <filter>input_type_cond['input_type'] in ['single', 'paired']</filter> </data> <data name="out2" format_source="input_read2" label="${tool.name} on ${on_string}: Reads2" > <filter>input_type_cond['input_type'] == 'paired'</filter> </data> <collection name="out_paired_collection" type="paired" label="${tool.name} on ${on_string}: Reads"> <data name="forward" format_source="input_readpair" /> <data name="reverse" format_source="input_readpair" /> <filter>input_type_cond['input_type'] == 'paired_collection'</filter> </collection> <expand macro="log_output_macro"/> </outputs> <tests> <test expect_num_outputs="2"> <conditional name="input_type_cond"> <param name="input_type" value="single" /> <param name="input_read1" value="t_R1.fastq" ftype="fastqsanger" /> <param name="bc_pattern" value="XXXNNN" /> </conditional> <conditional name="extract_method_cond"> <param name="prime3" value="true" /> </conditional> <param name="quality_selector" value="true" /> <param name="quality_filter_threshold" value="10" /> <param name="quality_encoding" value="phred33" /> <param name="log" value="true"/> <output name="out" file="out_SE.fastq" ftype="fastqsanger" /> <output name="out_log" > <assert_contents> <has_text text="Input Reads: 100" /> <has_text text="umi quality: 28" /> <has_text text="Reads output: 72" /> </assert_contents> </output> </test> <test expect_num_outputs="3"> <conditional name="input_type_cond"> <param name="input_type" value="paired" /> <param name="input_read1" value="t_R1.fastq.gz" ftype="fastqsanger.gz" /> <param name="input_read2" value="t_R2.fastq.gz" ftype="fastqsanger.gz" /> <param name="bc_pattern" value="NNNXXX" /> </conditional> <param name="log" value="true"/> <output name="out" file="out_R1.fastq.gz" decompress="true" lines_diff="2" ftype="fastqsanger.gz" /> <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" ftype="fastqsanger.gz" /> <output name="out_log" > <assert_contents> <has_text text="Input Reads: 100" /> <has_text text="Reads output: 100" /> </assert_contents> </output> </test> <test expect_num_outputs="4"> <conditional name="input_type_cond"> <param name="input_type" value="paired_collection" /> <!-- same as before, but uncompressed --> <param name="paired_type" value="no" /> <param name="input_readpair"> <collection type="paired" > <element name="forward" ftype="fastqsanger" value="t_R1.fastq" /> <element name="reverse" ftype="fastqsanger" value="t_R2.fastq" /> </collection> </param> <param name="bc_pattern" value="NNNXXX" /> </conditional> <param name="log" value="true"/> <output_collection name="out_paired_collection" type="paired"> <element name="forward" file="out_R1.fastq" ftype="fastqsanger" /> <element name="reverse" file="out_R2.fastq" ftype="fastqsanger" /> </output_collection> <output name="out_log" > <assert_contents> <has_text text="Input Reads: 100" /> <has_text text="Reads output: 100" /> </assert_contents> </output> </test> <test expect_num_outputs="3"> <conditional name="input_type_cond"> <param name="input_type" value="paired" /> <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastqsanger.gz" /> <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastqsanger.gz" /> <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" /> </conditional> <param name="extract_method" value="string" /> <param name="whitelist" value="scrb_seq_barcodes" /> <param name="log" value="true"/> <output name="out2" file="scrb_extract.fastq.gz" decompress="true" ftype="fastqsanger.gz" /> </test> <test expect_num_outputs="3"><!-- same as above but with regex barcode--> <conditional name="input_type_cond"> <param name="input_type" value="paired" /> <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastqsanger.gz" /> <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastqsanger.gz" /> <param name="bc_pattern" value="^(?P<cell_1>.{6})(?P<umi_1>.{10})" /> </conditional> <param name="extract_method" value="regex" /> <param name="whitelist" value="scrb_seq_barcodes" /> <param name="log" value="true"/> <output name="out2" file="scrb_extract.fastq.gz" decompress="true" ftype="fastqsanger.gz" /> </test> <test expect_num_outputs="2"><!-- CelSeq2 example --> <conditional name="input_type_cond"> <param name="input_type" value="paired" /> <param name="input_read1" value="read_R1.200.gz" ftype="fastqsanger.gz" /> <param name="input_read2" value="read_R2.200.gz" ftype="fastqsanger.gz" /> <param name="bc_pattern" value="NNNNNNCCCCCC" /> </conditional> <param name="extract_method" value="string" /> <output name="out" file="read_R1.200_extracted.fastq.gz" ftype="fastqsanger.gz" decompress="true" lines_diff="1" /> <output name="out2" file="read_R2.200_extracted.fastq.gz" ftype="fastqsanger.gz" decompress="true" lines_diff="1" /> </test> </tests> <help><![CDATA[ extract - Extract UMI from fastq ================================ Extract UMI barcode from a read and add it to the read name, leaving any sample barcode in place Can deal with paired end reads and UMIs split across the paired ends. Can also optionally extract cell barcodes and append these to the read name also. See the section below for an explanation for how to encode the barcode pattern(s) to specficy the position of the UMI +/- cell barcode. Filtering and correcting cell barcodes -------------------------------------- ``umi_tools extract`` can optionally filter cell barcodes against a user-supplied whitelist (``--whitelist``). If a whitelist is not available for your data, e.g if you have performed droplet-based scRNA-Seq, you can use the whitelist tool. Cell barcodes which do not match the whitelist (user-generated or automatically generated) can also be optionally corrected using the ``--error-correct-cell`` option. The whitelist should be in the following format (tab-separated):: AAAAAA AGAAAA AAAATC AAACAT AAACTA AAACTN,GAACTA AAATAC AAATCA GAATCA AAATGT AAAGGT,CAATGT Where column 1 is the whitelisted cell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should be corrected to the barcode in column 1. If the ``--error-correct-cell`` option is not used, this column will be ignored. Any additional columns in the whitelist input, such as the counts columns from the output of umi_tools whitelist, will be ignored. @FASTQ_BARCODE_EXTRACTION_HELP@ ]]></help> <expand macro="citations" /> </tool>