Mercurial > repos > iuc > umi_tools_count
comparison umi-tools_counts.xml @ 0:8db56d2f8b72 draft
planemo upload commit c79a5f4a05156bb2a6035a844aa9ad8f0e59ecb5
author | iuc |
---|---|
date | Thu, 21 Jun 2018 15:20:14 -0400 |
parents | |
children | 3c932ad4a174 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8db56d2f8b72 |
---|---|
1 <tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0"> | |
2 <description>Count UMIs from BAM files</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 <xml name="sanitize_tag" > | |
6 <sanitizer invalid_char=""> | |
7 <valid initial="string.letters,string.digits" /> | |
8 </sanitizer> | |
9 </xml> | |
10 </macros> | |
11 <expand macro="requirements" /> | |
12 <command detect_errors="exit_code"><![CDATA[ | |
13 | |
14 ln -s '${input_bam}' 'input.bam' && | |
15 ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && | |
16 | |
17 umi_tools count | |
18 -I input.bam | |
19 '$bam_paired' | |
20 --extract-umi-method='$barcodes.extract_umi_method.value' | |
21 #if $barcodes.extract_umi_method == 'read_id': | |
22 --umi-separator='$barcodes.delimiter' | |
23 #else if $barcodes.extract_umi_method == 'tag': | |
24 --umi-tag='$barcodes.umi_tag' | |
25 --cell-tag='$barcodes.cell_tag' | |
26 #end if | |
27 --method='$grouping_method.value' | |
28 --edit-distance-threshold='$hamming_distance' | |
29 --mapping-quality='$advanced.mapping_quality' | |
30 --per-gene | |
31 $wide_format_cell_counts | |
32 $advanced.per_contig | |
33 '$advanced.per_cell' | |
34 #if $advanced.gene_tag: | |
35 --gene-tag='$advanced.gene_tag' | |
36 #end if | |
37 #if $advanced.skip_tags_regex.value: | |
38 --skip-tags-regex='$advanced.skip_tags_regex' | |
39 #end if | |
40 #if $advanced.random_seed != 0: | |
41 --random-seed='$advanced.random_seed' | |
42 #end if | |
43 -S '$out_counts' | |
44 -L '$out_log' | |
45 ]]></command> | |
46 <inputs> | |
47 <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" /> | |
48 | |
49 <param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false" | |
50 label="Bam is paired-end" | |
51 help="both read pairs will be output. This will also force the use of the template length to determine | |
52 reads with the same mapping coordinates." /> | |
53 | |
54 <conditional name="barcodes" > | |
55 <param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" > | |
56 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option> | |
57 <option value="tag" >Barcodes are contained in tags</option> | |
58 <option value="umis" >Barcodes were extracted using umis</option> | |
59 </param> | |
60 <when value="read_id" > | |
61 <param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" > | |
62 <expand macro="sanitize_tag" /> | |
63 </param> | |
64 </when> | |
65 <when value="tag" > | |
66 <param name="umi_tag" type="text" label="Tag which contains the UMI" > | |
67 <expand macro="sanitize_tag" /> | |
68 </param> | |
69 <param name="cell_tag" type="text" label="Tag which contains the cell barcode" > | |
70 <expand macro="sanitize_tag" /> | |
71 </param> | |
72 </when> | |
73 <when value="umis"></when> | |
74 </conditional> | |
75 | |
76 <param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical | |
77 UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with | |
78 counts < 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent | |
79 UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." > | |
80 <option value="unique" >Unique</option> | |
81 <option value="percentile">Percentile</option> | |
82 <option value="cluster">Cluster</option> | |
83 <option value="adjacency">Adjacency</option> | |
84 <option value="directional" selected="true" >Directional</option> | |
85 </param> | |
86 | |
87 <param name="hamming_distance" type="integer" label="Edit distance threshold" min="0" value="1" /> | |
88 <param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="false" label="Output a mtrix of genes and cells, instead of a flat file" /> | |
89 | |
90 <section name="advanced" title="Extra parameters" > | |
91 <param name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" /> | |
92 <!-- Currently hard-coded parameter. Leave here if useful to future wrapper --> | |
93 <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library | |
94 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either | |
95 -\-gene-tag or -\-per-contig option" /> --> | |
96 <param name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="" > | |
97 <expand macro="sanitize_tag" /> | |
98 </param> | |
99 <param name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > | |
100 <sanitizer invalid_char=""> | |
101 <valid initial="string.letters,string.digits"> | |
102 <add value="!="/> | |
103 <add value="-"/> | |
104 <add value="_"/> | |
105 <add value="."/> | |
106 <add value="?"/> | |
107 <add value="<"/><!-- left triangle bracket --> | |
108 <add value=">"/><!-- right triangle bracket --> | |
109 <add value="["/> <!-- left square bracket --> | |
110 <add value="]"/> <!-- right square bracket --> | |
111 <add value="^"/> <!-- caret --> | |
112 <add value="{"/> <!-- left curly --> | |
113 <add value="}"/> <!-- right curly --> | |
114 <add value="("/> <!-- left parenthesis --> | |
115 <add value=")"/> <!-- right parenthesis --> | |
116 </valid> | |
117 </sanitizer> | |
118 </param> | |
119 <param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" | |
120 label="Deduplicate per contig (field 3 in BAM; RNAME)" | |
121 help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." /> | |
122 <param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false" | |
123 label="Group reads only if they have the same cell barcode." /> | |
124 <param name="random_seed" type="integer" min="0" value="0" label="Random Seed" /> | |
125 </section> | |
126 </inputs> | |
127 <outputs> | |
128 <data name="out_counts" format="tsv" /> | |
129 <data name="out_log" format="txt" /> | |
130 </outputs> | |
131 <tests> | |
132 <test><!--count_single_gene_tag:--> | |
133 <param name="input_bam" value="chr19_gene_tags.bam" /> | |
134 <param name="random_seed" value="123456789" /> | |
135 <param name="grouping_method" value="directional" /> | |
136 <param name="gene_tag" value="XF" /> | |
137 <param name="skip_tags_regex" value="^[__|Unassigned]" /> | |
138 <param name="extract_umi_method" value="umis" /> | |
139 <output name="out_counts" value="count_single_gene_tag.tsv" /> | |
140 </test> | |
141 <test><!--count_single_cells_gene_tag:--> | |
142 <param name="input_bam" value="chr19_gene_tags.bam" /> | |
143 <param name="random_seed" value="123456789" /> | |
144 <param name="grouping_method" value="directional" /> | |
145 <param name="gene_tag" value="XF" /> | |
146 <param name="skip_tags_regex" value="^[__|Unassigned]" /> | |
147 <param name="per_cell" value="true" /><!-- new --> | |
148 <param name="extract_umi_method" value="umis" /> | |
149 <output name="out_counts" value="count_single_cells_gene_tag.tsv" /> | |
150 </test> | |
151 <test><!--count_single_cells_wide_gene_tag:--> | |
152 <param name="input_bam" value="chr19_gene_tags.bam" /> | |
153 <param name="random_seed" value="123456789" /> | |
154 <param name="grouping_method" value="directional" /> | |
155 <param name="gene_tag" value="XF" /> | |
156 <param name="skip_tags_regex" value="^[__|Unassigned]" /> | |
157 <param name="per_cell" value="true" /><!-- new --> | |
158 <param name="extract_umi_method" value="umis" /> | |
159 <param name="wide_format_cell_counts" value="true" /> | |
160 <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" /> | |
161 </test> | |
162 </tests> | |
163 <help><![CDATA[ | |
164 | |
165 UMI Tools count - Count reads per gene from BAM using UMIs | |
166 ---------------------------------------------------------- | |
167 | |
168 Purpose | |
169 ------- | |
170 | |
171 The purpose of this command is to count the number of reads per gene based | |
172 on the mapping co-ordinate and the UMI attached to the read. | |
173 | |
174 | |
175 It is assumed that the FASTQ files were processed with extract_umi.py | |
176 before mapping and thus the UMI is the last word of the read name. e.g: | |
177 | |
178 @HISEQ:87:00000000_AATT | |
179 | |
180 where AATT is the UMI sequeuence. | |
181 | |
182 If you have used an alternative method which does not separate the | |
183 read id and UMI with a "_", such as bcl2fastq which uses ":", you can | |
184 specify the separator, or if your UMIs are encoded in a tag you can also specify this. | |
185 | |
186 ]]></help> | |
187 <expand macro="citations" /> | |
188 </tool> |