Mercurial > repos > iuc > umi_tools_count
comparison umi-tools_counts.xml @ 1:3c932ad4a174 draft
planemo upload commit 9a3aeb2c588f9f67824ea5568923ce70b048499a
author | iuc |
---|---|
date | Sat, 14 Jul 2018 06:14:24 -0400 |
parents | 8db56d2f8b72 |
children | b557acca0b56 |
comparison
equal
deleted
inserted
replaced
0:8db56d2f8b72 | 1:3c932ad4a174 |
---|---|
1 <tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0"> | 1 <tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.1"> |
2 <description>Count UMIs from BAM files</description> | 2 <description>performs quantification of UMIs from BAM files</description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 <xml name="sanitize_tag" > | 5 <xml name="sanitize_tag" > |
6 <sanitizer invalid_char=""> | 6 <sanitizer invalid_char=""> |
7 <valid initial="string.letters,string.digits" /> | 7 <valid initial="string.letters,string.digits" /> |
8 </sanitizer> | 8 </sanitizer> |
9 </xml> | 9 </xml> |
10 </macros> | 10 </macros> |
11 <expand macro="requirements" /> | 11 <expand macro="requirements" /> |
12 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
13 | |
14 ln -s '${input_bam}' 'input.bam' && | 13 ln -s '${input_bam}' 'input.bam' && |
15 ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && | 14 ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && |
16 | 15 |
17 umi_tools count | 16 umi_tools count |
18 -I input.bam | 17 -I input.bam |
19 '$bam_paired' | 18 '$paired' |
20 --extract-umi-method='$barcodes.extract_umi_method.value' | 19 --extract-umi-method='$barcodes.extract_umi_method.value' |
21 #if $barcodes.extract_umi_method == 'read_id': | 20 #if str($barcodes.extract_umi_method) == 'read_id': |
22 --umi-separator='$barcodes.delimiter' | 21 --umi-separator='$barcodes.umi_separator.value' |
23 #else if $barcodes.extract_umi_method == 'tag': | 22 #else if str($barcodes.extract_umi_method) == 'tag': |
24 --umi-tag='$barcodes.umi_tag' | 23 --umi-tag='$barcodes.umi_tag.value' |
25 --cell-tag='$barcodes.cell_tag' | 24 --cell-tag='$barcodes.cell_tag.value' |
26 #end if | 25 #end if |
27 --method='$grouping_method.value' | 26 --method='$method.value' |
28 --edit-distance-threshold='$hamming_distance' | 27 --edit-distance-threshold='$edit_distance_threshold' |
29 --mapping-quality='$advanced.mapping_quality' | 28 --mapping-quality='$advanced.mapping_quality' |
30 --per-gene | 29 --per-gene |
31 $wide_format_cell_counts | 30 '$wide_format_cell_counts' |
32 $advanced.per_contig | 31 '$advanced.per_contig' |
33 '$advanced.per_cell' | 32 '$advanced.per_cell' |
34 #if $advanced.gene_tag: | 33 #if str($advanced.gene_tag) != "": |
35 --gene-tag='$advanced.gene_tag' | 34 --gene-tag='$advanced.gene_tag.value' |
36 #end if | 35 #end if |
37 #if $advanced.skip_tags_regex.value: | 36 #if str($advanced.skip_tags_regex) != "": |
38 --skip-tags-regex='$advanced.skip_tags_regex' | 37 --skip-tags-regex='$advanced.skip_tags_regex.value' |
39 #end if | 38 #end if |
40 #if $advanced.random_seed != 0: | 39 #if '$advanced.random_seed' != 0: |
41 --random-seed='$advanced.random_seed' | 40 --random-seed='$advanced.random_seed' |
42 #end if | 41 #end if |
43 -S '$out_counts' | 42 -S '$out_counts' |
44 -L '$out_log' | |
45 ]]></command> | 43 ]]></command> |
46 <inputs> | 44 <inputs> |
47 <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" /> | 45 <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" /> |
48 | 46 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" checked="false" label="Bam is paired-end" help="both read pairs will be output. This will also force the use of the template length to determine reads with the same mapping coordinates." /> |
49 <param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false" | |
50 label="Bam is paired-end" | |
51 help="both read pairs will be output. This will also force the use of the template length to determine | |
52 reads with the same mapping coordinates." /> | |
53 | |
54 <conditional name="barcodes" > | 47 <conditional name="barcodes" > |
55 <param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" > | 48 <param argument="--extract-umi-method" name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" > |
56 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option> | 49 <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option> |
57 <option value="tag" >Barcodes are contained in tags</option> | 50 <option value="tag" >Barcodes are contained in tags</option> |
58 <option value="umis" >Barcodes were extracted using umis</option> | 51 <option value="umis" >Barcodes were extracted using umis</option> |
59 </param> | 52 </param> |
60 <when value="read_id" > | 53 <when value="read_id" > |
61 <param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" > | 54 <param argument="--umi-separator" name="umi_separator" type="text" label="Delimiter between read id and the UMI" value="_" > |
62 <expand macro="sanitize_tag" /> | 55 <sanitizer invalid_char="" > |
56 <valid initial="string.punctuation" /> | |
57 </sanitizer> | |
63 </param> | 58 </param> |
64 </when> | 59 </when> |
65 <when value="tag" > | 60 <when value="tag" > |
66 <param name="umi_tag" type="text" label="Tag which contains the UMI" > | 61 <param argument="--umi-tag" name="umi_tag" type="text" label="Tag which contains the UMI" > |
67 <expand macro="sanitize_tag" /> | 62 <expand macro="sanitize_tag" /> |
68 </param> | 63 </param> |
69 <param name="cell_tag" type="text" label="Tag which contains the cell barcode" > | 64 <param argument="--cell-tag" name="cell_tag" type="text" label="Tag which contains the cell barcode" > |
70 <expand macro="sanitize_tag" /> | 65 <expand macro="sanitize_tag" /> |
71 </param> | 66 </param> |
72 </when> | 67 </when> |
73 <when value="umis"></when> | 68 <when value="umis"></when> |
74 </conditional> | 69 </conditional> |
75 | 70 <param argument="--method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical |
76 <param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical | 71 UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with |
77 UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with | 72 counts < 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." > |
78 counts < 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent | |
79 UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." > | |
80 <option value="unique" >Unique</option> | 73 <option value="unique" >Unique</option> |
81 <option value="percentile">Percentile</option> | 74 <option value="percentile">Percentile</option> |
82 <option value="cluster">Cluster</option> | 75 <option value="cluster">Cluster</option> |
83 <option value="adjacency">Adjacency</option> | 76 <option value="adjacency">Adjacency</option> |
84 <option value="directional" selected="true" >Directional</option> | 77 <option value="directional" selected="true" >Directional</option> |
85 </param> | 78 </param> |
86 | 79 <param argument="--edit-distance-threshold" name="edit_distance_threshold" type="integer" label="Edit distance threshold" min="0" value="1" /> |
87 <param name="hamming_distance" type="integer" label="Edit distance threshold" min="0" value="1" /> | 80 <param argument="--wide-format-cell-counts" name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="true" label="Output a matrix of genes and cells, instead of a flat file" /> |
88 <param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="false" label="Output a mtrix of genes and cells, instead of a flat file" /> | |
89 | |
90 <section name="advanced" title="Extra parameters" > | 81 <section name="advanced" title="Extra parameters" > |
91 <param name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" /> | 82 <param argument="--mapping-quality" name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" /> |
92 <!-- Currently hard-coded parameter. Leave here if useful to future wrapper --> | 83 <!-- Currently hard-coded parameter. Leave here if useful to future wrapper --> |
93 <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library | 84 <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library |
94 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either | 85 prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either |
95 -\-gene-tag or -\-per-contig option" /> --> | 86 -\-gene-tag or -\-per-contig option" /> --> |
96 <param name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="" > | 87 <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="XT" > |
97 <expand macro="sanitize_tag" /> | 88 <expand macro="sanitize_tag" /> |
98 </param> | 89 </param> |
99 <param name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > | 90 <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > |
100 <sanitizer invalid_char=""> | 91 <sanitizer invalid_char=""> |
101 <valid initial="string.letters,string.digits"> | 92 <valid initial="string.letters,string.digits"> |
102 <add value="!="/> | 93 <add value="!="/> |
103 <add value="-"/> | 94 <add value="-"/> |
104 <add value="_"/> | 95 <add value="_"/> |
114 <add value="("/> <!-- left parenthesis --> | 105 <add value="("/> <!-- left parenthesis --> |
115 <add value=")"/> <!-- right parenthesis --> | 106 <add value=")"/> <!-- right parenthesis --> |
116 </valid> | 107 </valid> |
117 </sanitizer> | 108 </sanitizer> |
118 </param> | 109 </param> |
119 <param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" | 110 <param argument="--per-contig" name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" label="Deduplicate per contig (field 3 in BAM; RNAME)" help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." /> |
120 label="Deduplicate per contig (field 3 in BAM; RNAME)" | 111 <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="true" label="Group reads only if they have the same cell barcode." /> |
121 help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." /> | 112 <param argument="--random-seed" name="random_seed" type="integer" min="0" value="0" label="Random Seed" /> |
122 <param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false" | 113 </section> |
123 label="Group reads only if they have the same cell barcode." /> | |
124 <param name="random_seed" type="integer" min="0" value="0" label="Random Seed" /> | |
125 </section> | |
126 </inputs> | 114 </inputs> |
127 <outputs> | 115 <outputs> |
128 <data name="out_counts" format="tsv" /> | 116 <data name="out_counts" format="tabular" /> |
129 <data name="out_log" format="txt" /> | |
130 </outputs> | 117 </outputs> |
131 <tests> | 118 <tests> |
132 <test><!--count_single_gene_tag:--> | 119 <test><!--count_single_gene_tag:--> |
133 <param name="input_bam" value="chr19_gene_tags.bam" /> | 120 <param name="input_bam" value="chr19_gene_tags.bam" /> |
134 <param name="random_seed" value="123456789" /> | 121 <param name="random_seed" value="123456789" /> |
135 <param name="grouping_method" value="directional" /> | 122 <param name="method" value="directional" /> |
136 <param name="gene_tag" value="XF" /> | 123 <param name="gene_tag" value="XF" /> |
137 <param name="skip_tags_regex" value="^[__|Unassigned]" /> | 124 <param name="skip_tags_regex" value="^[__|Unassigned]" /> |
138 <param name="extract_umi_method" value="umis" /> | 125 <param name="extract_umi_method" value="umis" /> |
126 <param name="wide_format_cell_counts" value="false" /> | |
127 <param name="per_cell" value="false" /> | |
139 <output name="out_counts" value="count_single_gene_tag.tsv" /> | 128 <output name="out_counts" value="count_single_gene_tag.tsv" /> |
140 </test> | 129 </test> |
141 <test><!--count_single_cells_gene_tag:--> | 130 <test><!--count_single_cells_gene_tag:--> |
142 <param name="input_bam" value="chr19_gene_tags.bam" /> | 131 <param name="input_bam" value="chr19_gene_tags.bam" /> |
143 <param name="random_seed" value="123456789" /> | 132 <param name="random_seed" value="123456789" /> |
144 <param name="grouping_method" value="directional" /> | 133 <param name="method" value="directional" /> |
145 <param name="gene_tag" value="XF" /> | 134 <param name="gene_tag" value="XF" /> |
146 <param name="skip_tags_regex" value="^[__|Unassigned]" /> | 135 <param name="skip_tags_regex" value="^[__|Unassigned]" /> |
147 <param name="per_cell" value="true" /><!-- new --> | 136 <param name="per_cell" value="true" /> |
148 <param name="extract_umi_method" value="umis" /> | 137 <param name="extract_umi_method" value="umis" /> |
138 <param name="wide_format_cell_counts" value="false" /> | |
149 <output name="out_counts" value="count_single_cells_gene_tag.tsv" /> | 139 <output name="out_counts" value="count_single_cells_gene_tag.tsv" /> |
150 </test> | 140 </test> |
151 <test><!--count_single_cells_wide_gene_tag:--> | 141 <test><!--count_single_cells_wide_gene_tag:--> |
152 <param name="input_bam" value="chr19_gene_tags.bam" /> | 142 <param name="input_bam" value="chr19_gene_tags.bam" /> |
153 <param name="random_seed" value="123456789" /> | 143 <param name="random_seed" value="123456789" /> |
154 <param name="grouping_method" value="directional" /> | 144 <param name="method" value="directional" /> |
155 <param name="gene_tag" value="XF" /> | 145 <param name="gene_tag" value="XF" /> |
156 <param name="skip_tags_regex" value="^[__|Unassigned]" /> | 146 <param name="skip_tags_regex" value="^[__|Unassigned]" /> |
157 <param name="per_cell" value="true" /><!-- new --> | 147 <param name="per_cell" value="true" /> |
158 <param name="extract_umi_method" value="umis" /> | 148 <param name="extract_umi_method" value="umis" /> |
159 <param name="wide_format_cell_counts" value="true" /> | 149 <param name="wide_format_cell_counts" value="true" /> |
160 <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" /> | 150 <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" /> |
151 </test> | |
152 <test><!-- count ENSDARG00000019692, with defaults --> | |
153 <param name="input_bam" value="fc.ENSDARG00000019692.bam" /> | |
154 <param name="method" value="unique" /> | |
155 <output name="out_counts" value="fc.ENSDARG00000019692.counts" /> | |
161 </test> | 156 </test> |
162 </tests> | 157 </tests> |
163 <help><![CDATA[ | 158 <help><![CDATA[ |
164 | 159 |
165 UMI Tools count - Count reads per gene from BAM using UMIs | 160 UMI Tools count - Count reads per gene from BAM using UMIs |