Mercurial > repos > iuc > umi_tools_count
view umi-tools_counts.xml @ 11:9570563fb686 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit be699fdf0360f0535f52564e5b59be9b84712b14
author | iuc |
---|---|
date | Sat, 28 Sep 2024 16:40:55 +0000 |
parents | e654095ab143 |
children | 71ad4a56c40c |
line wrap: on
line source
<tool id="umi_tools_count" name="UMI-tools count" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>performs quantification of UMIs from BAM files</description> <expand macro="bio_tools"/> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"> <!-- TODO see comment in LINK_SAM_BAM_INPUT --> <requirement type="package" version="1.12">samtools</requirement> <requirement type="package" version="4.7">sed</requirement> </expand> <command detect_errors="exit_code"><![CDATA[ #import re @LINK_SAM_BAM_INPUT@ umi_tools count '$wide_format_cell_counts' @BARCODE_OPTIONS@ @UMI_GROUPING_OPTIONS@ @SC_OPTIONS@ @SAMBAM_OPTIONS@ @ADVANCED_OPTIONS@ -I '$input_file' -S '$out_counts' @LOG@ #if str($cond_extra.prepender) != "none": #if str($cond_extra.prepender) == "string": #set $replacer = str($cond_extra.custom_label) #else #set $replacer = re.sub('[^\w\_]+', '_', str($input.element_identifier.rsplit('.',1)[0])) #end if && sed -i -r '1s|\b([ACGT]+)\b|'"$replacer"'_\1|g' '$out_counts' #end if ]]></command> <inputs> <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" help="Please use the samtools sort tool to ensure a correct BAM input" /> <param argument="--wide-format-cell-counts" name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="true" label="Output a matrix of genes and cells, instead of a flat file" /> <expand macro="barcode_options_macro"/> <expand macro="umi_grouping_options_macro"/> <expand macro="sambam_options_macro"/> <expand macro="sc_options_macro"/> <expand macro="advanced_options_macro"/> <conditional name="cond_extra" > <param name="prepender" type="select" label="Prepend a label to all column headers" help="This preserves uniqueness when merging with other files with the same headers. Note: filename must not contain a '.' character" > <option value="none" selected="true" >No modifications</option> <option value="string">Custom Label</option> <option value="dataset name">Dataset Name</option> </param> <when value="none"></when> <when value="dataset name"></when> <when value="string"> <param name="custom_label" type="text" label="Label to Prepend" > <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="-"/> <add value="_"/> <add value="."/> </valid> </sanitizer> </param> </when> </conditional> <expand macro="log_input_macro"/> </inputs> <outputs> <data name="out_counts" format="tabular" /> <expand macro="log_output_macro"/> </outputs> <tests> <test><!--count_single_gene_tag:--> <param name="input" value="chr19_gene_tags.bam" /> <section name="advanced"> <param name="random_seed" value="123456789" /> </section> <section name="sc"> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> <param name="per_cell" value="false" /> </section> <conditional name="bc"> <param name="extract_umi_method" value="umis" /> </conditional> <section name="umi"> <param name="method" value="directional" /> </section> <param name="wide_format_cell_counts" value="false" /> <output name="out_counts" value="count_single_gene_tag.tsv" /> </test> <test><!--count_single_gene_tag .. with sam input--> <param name="input" value="chr19_gene_tags.sam" /> <section name="advanced"> <param name="random_seed" value="123456789" /> </section> <section name="sc"> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> <param name="per_cell" value="false" /> </section> <conditional name="bc"> <param name="extract_umi_method" value="umis" /> </conditional> <section name="umi"> <param name="method" value="directional" /> </section> <param name="wide_format_cell_counts" value="false" /> <output name="out_counts" value="count_single_gene_tag.tsv" /> </test> <test><!--count_single_cells_gene_tag:--> <param name="input" value="chr19_gene_tags.bam" /> <section name="advanced"> <param name="random_seed" value="123456789" /> </section> <section name="sc"> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> <param name="per_cell" value="true" /> </section> <conditional name="bc"> <param name="extract_umi_method" value="umis" /> </conditional> <section name="umi"> <param name="method" value="directional" /> </section> <param name="wide_format_cell_counts" value="false" /> <output name="out_counts" value="count_single_cells_gene_tag.tsv" /> </test> <test><!--count_single_cells_wide_gene_tag:--> <param name="input" value="chr19_gene_tags.bam" /> <section name="advanced"> <param name="random_seed" value="123456789" /> </section> <section name="sc"> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> <param name="per_cell" value="true" /> </section> <conditional name="bc"> <param name="extract_umi_method" value="umis" /> </conditional> <section name="umi"> <param name="method" value="directional" /> </section> <param name="wide_format_cell_counts" value="true" /> <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" /> </test> <test><!-- count ENSDARG00000019692, with defaults --> <param name="input" value="fc.ENSDARG00000019692.bam" /> <section name="advanced"> <param name="random_seed" value="0" /> </section> <section name="sc"> <param name="gene_tag" value="XT" /> <param name="per_cell" value="true" /> </section> <section name="umi"> <param name="method" value="unique" /> </section> <output name="out_counts" value="fc.ENSDARG00000019692.counts" /> </test> <test><!-- count ENSDARG00000019692, relabel string --> <param name="input" value="fc.ENSDARG00000019692.bam" /> <section name="advanced"> <param name="random_seed" value="0" /> </section> <section name="sc"> <param name="gene_tag" value="XT" /> <param name="per_cell" value="true" /> </section> <section name="umi"> <param name="method" value="unique" /> </section> <conditional name="cond_extra" > <param name="prepender" value="string" /> <param name="custom_label" value="test" /> </conditional> <output name="out_counts" value="fc.ENSDARG00000019692.counts.test" /> </test> <test><!-- count ENSDARG00000019692, relabel filename --> <param name="input" value="fc.ENSDARG00000019692.bam" /> <section name="advanced"> <param name="random_seed" value="0" /> </section> <section name="sc"> <param name="gene_tag" value="XT" /> <param name="per_cell" value="true" /> </section> <section name="umi"> <param name="method" value="unique" /> </section> <conditional name="cond_extra" > <param name="prepender" value="dataset name" /> </conditional> <output name="out_counts" value="fc.ENSDARG00000019692.counts.name" /> </test> </tests> <help><![CDATA[ count - Count reads per gene from BAM using UMIs and mapping coordinates ======================================================================== This tool is only designed to work with library preparation methods where the fragmentation occurs after amplification, as per most single cell RNA-Seq methods (e.g 10x, inDrop, Drop-seq, SCRB-seq and CEL-seq2). Since the precise mapping co-ordinate is not longer informative for such library preparations, it is simplified to the gene. This is a reasonable approach providing the number of available UMIs is sufficiently high and the sequencing depth is sufficiently low that the probability of two reads from the same gene having the same UMIs is acceptably low. If you want to count reads per gene for library preparations which fragment prior to amplification (e.g bulk RNA-Seq), please use ``umi_tools dedup`` to remove the duplicate reads as this will use the full information from the mapping co-ordinate. Then use a read counting tool such as FeatureCounts or HTSeq to count the reads per gene. In the rare case of bulk RNA-Seq using a library preparation method with fragmentation after amplification, one can still use ``count`` but note that it has not been tested on bulk RNA-Seq. This tool deviates from group and dedup in that the ``--per-gene`` option is hardcoded on. @BARCODE_HELP@ @UMI_GROUPING_HELP@ ]]></help> <expand macro="citations" /> </tool>