Mercurial > repos > iuc > umi_tools_group
annotate umi-tools_group.xml @ 11:30c3906fbf43 draft
"planemo upload commit 6ba769440f8f6a62e9ebfac069a30edc541bac0a"
author | iuc |
---|---|
date | Thu, 05 Dec 2019 01:31:32 -0500 |
parents | a24f5b991320 |
children | cf25b50eff0a |
rev | line source |
---|---|
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
1 <tool id="umi_tools_group" name="UMI-tools group" version="@VERSION@.0"> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
2 <description>Extract UMI from fastq files</description> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
3 <macros> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
4 <import>macros.xml</import> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
5 </macros> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
6 <expand macro="requirements"> |
11
30c3906fbf43
"planemo upload commit 6ba769440f8f6a62e9ebfac069a30edc541bac0a"
iuc
parents:
10
diff
changeset
|
7 <requirement type="package" version="1.9">samtools</requirement> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
8 </expand> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
9 <command detect_errors="exit_code"><![CDATA[ |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
10 #if $input.is_of_type("sam"): |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
11 #set $input_file = $input |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
12 #else: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
13 ln -sf '${input}' 'input.bam' && |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' && |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
15 #set $input_file = 'input.bam' |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
16 #end if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
17 |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
18 umi_tools group |
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
19 --random-seed 0 |
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
20 --extract-umi-method $extract_umi_method |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
21 #if str($extract_umi_method) != 'read_id': |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
22 --umi-separator '$umi_separator' --umi-tag '$umi_tag' |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
23 #end if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
24 --method $method --edit-distance-threshold $edit_distance_threshold |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
25 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
26 $read_length $whole_contig --subset $subset $per_contig $per_gene |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
27 #if $gene_transcript_map: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
28 --gene-transcript-map '$gene_transcript_map' |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
29 #end if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
30 #if len(str($gene_tag)) > 0: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
31 --gene-tag '$gene_tag' |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
32 #end if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
33 #if $group_output: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
34 --group-out '$group_out' |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
35 #end if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
36 #if $input.is_of_type("sam"): |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
37 --in-sam |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
38 #end if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
39 --output-bam |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
40 -I '$input_file' -S grouped.bam && |
10
a24f5b991320
"planemo upload commit 5d3fc4232e0e036ac1ed9e2c36adc41d6af4987f"
iuc
parents:
1
diff
changeset
|
41 samtools sort grouped.bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$output' -O BAM |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
42 ]]></command> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
43 <inputs> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
44 <param name="input" type="data" format="sam,bam" label="Reads to group in SAM or BAM format" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
45 <param name="extract_umi_method" argument="--extract-umi-method" type="select"> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
46 <option value="read_id" selected="True">Read ID</option> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
47 <option value="tag">Tag</option> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
48 </param> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
49 <param name="group_output" argument="--group-out" type="boolean" truevalue="--group-out" falsevalue="" label="Output a flatfile describing the read groups" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
50 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
51 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
52 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position"> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
53 <option value="unique">Reads group share the exact same UMI</option> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
54 <option value="cluster">Identify clusters based on hamming distance</option> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
55 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
56 </param> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
57 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (>14bp)" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
58 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
59 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
60 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
61 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
62 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" /> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
63 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" /> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
64 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
65 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
66 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
67 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
68 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
69 </inputs> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
70 <outputs> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
71 <data format="bam" name="output" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
72 <data format="tabular" name="group_out"> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
73 <filter>group_out</filter> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
74 </data> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
75 </outputs> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
76 <tests> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
77 <test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
78 <param name="input" value="group_in2.bam" ftype="bam" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
79 <param name="extract_umi_method" value="read_id" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
80 <param name="paired" value="True" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
81 <param name="method" value="unique" /> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
82 <output name="output" file="group_out2.bam" ftype="bam" sort="True" /> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
83 </test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
84 <test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
85 <param name="input" value="group_in3.bam" ftype="bam" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
86 <param name="extract_umi_method" value="read_id" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
87 <param name="group_output" value="True" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
88 <param name="method" value="unique" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
89 <output name="group_out" file="group_out3.tab" /> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
90 <output name="output" file="group_out3.bam" ftype="bam" sort="True" /> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
91 </test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
92 <test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
93 <param name="input" value="group_in4.bam" ftype="bam" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
94 <param name="extract_umi_method" value="tag" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
95 <param name="umi_tag" value="BX" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
96 <param name="method" value="unique" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
97 <output name="group_out" file="group_out4.tab" /> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
98 <output name="output" file="group_out4.bam" ftype="bam" sort="True" /> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
99 </test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
100 <test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
101 <param name="input" value="group_in5.bam" ftype="bam" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
102 <param name="extract_umi_method" value="read_id" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
103 <param name="umi_tag" value="BX" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
104 <param name="method" value="cluster" /> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
105 <output name="output" file="group_out5.bam" ftype="bam" sort="True" /> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
106 </test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
107 <test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
108 <param name="input" value="group_in6.bam" ftype="bam" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
109 <param name="extract_umi_method" value="read_id" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
110 <param name="umi_tag" value="BX" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
111 <param name="method" value="directional" /> |
1
f73f13641bb6
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
0
diff
changeset
|
112 <output name="output" file="group_out6.bam" ftype="bam" sort="True" /> |
0
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
113 </test> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
114 </tests> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
115 <help><![CDATA[ |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
116 umi_tools group - Group reads based on their UMI |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
117 ================================================ |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
118 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
119 Purpose |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
120 ------- |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
121 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
122 The purpose of this command is to identify groups of reads based on |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
123 their genomic coordinate and UMI. It is assumed that the FASTQ files |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
124 were processed with umi_tools extract before mapping and thus the UMI is |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
125 the last word of the read name. e.g: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
126 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
127 @HISEQ:87:00000000_AATT |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
128 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
129 where AATT is the UMI sequeuence. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
130 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
131 If you have used an alternative method which does not separate the |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
132 read id and UMI with a "_", such as bcl2fastq which uses ":", you can |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
133 specify the separator with the option "--umi-separator=<sep>", |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
134 replacing <sep> with e.g ":". |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
135 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
136 Alternatively, if your UMIs are encoded in a tag, you can specify this |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
137 by setting the option --extract-umi-method=tag and set the tag name |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
138 with the --umi-tag option. For example, if your UMIs are encoded in |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
139 the 'UM' tag, provide the following options: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
140 "--extract-umi-method=tag --umi-tag=UM" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
141 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
142 By default, reads are considered identical if they have the same start |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
143 coordinate, are on the same strand, and have the same UMI. Optionally, |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
144 splicing status can be considered (see below). |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
145 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
146 The start postion of a read is considered to be the start of its alignment |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
147 minus any soft clipped bases. A read aligned at position 500 with |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
148 cigar 2S98M will be assumed to start at postion 498. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
149 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
150 Methods |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
151 ------- |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
152 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
153 group can be run with multiple methods to identify group of reads with |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
154 the same (or similar) UMI(s). All methods start by identifying the |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
155 reads with the same mapping position. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
156 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
157 The simpliest method, "unique", groups reads with the exact same |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
158 UMI. The network-based methods, "cluster", "adjacency" and |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
159 "directional", build networks where nodes are UMIs and edges connect |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
160 UMIs with an edit distance <= threshold (usually 1). The groups of |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
161 reads are then defined from the network in a method-specific manner. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
162 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
163 Note that the "percentile" method used with the dedup command is not |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
164 available with group. This is because this method does not group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
165 similar UMIs as per the network methods. Instead it applies a |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
166 threshold for inclusion of the UMI in the output and excluded UMIs are |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
167 not assigned to a "true" UMI. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
168 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
169 "unique" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
170 Reads group share the exact same UMI |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
171 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
172 "cluster" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
173 Identify clusters of connected UMIs (based on hamming distance |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
174 threshold). Each network is a read group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
175 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
176 "directional" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
177 Identify clusters of connected UMIs (based on hamming distance |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
178 threshold) and umi A counts >= (2* umi B counts) - 1. Each |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
179 network is a read group. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
180 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
181 The group command can be used to create two types of outfile: a tagged |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
182 BAM or a flatfile describing the read groups |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
183 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
184 To generate the tagged-BAM file, use the option --output-bam and |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
185 provide a filename with the -S option. Alternatively, if you do not |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
186 provide a filename, the bam file will be outputted to the stdout. If |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
187 you have provided the --log/-L option to send the logging output |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
188 elsewhere, you can pipe the output from the group command directly to |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
189 e.g samtools sort like so: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
190 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
191 ``umi_tools group -I inf.bam --group-out=grouped.tsv --output-bam --log=group.log --paired | samtools sort - -o grouped_sorted.bam`` |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
192 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
193 The tagged-BAM file will have two tagged per read: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
194 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
195 - UG = Unique_id. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
196 0-indexed unique id number for each group of reads with the same genomic position and UMI or UMIs inferred to be from the same true UMI + errors |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
197 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
198 - BX = Final UMI. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
199 The inferred true UMI for the group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
200 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
201 To generate the flatfile describing the read groups, include the |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
202 --group-out=<filename> option. The columns of the read groups file are |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
203 below. The first five columns relate to the read. The final 3 columns |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
204 relate to the group. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
205 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
206 - read_id |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
207 read identifier |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
208 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
209 - contig |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
210 alignment contig |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
211 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
212 - position |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
213 Alignment position. Note that this position is not the start position of the read in the BAM file but the start of the read taking into account the read strand and cigar |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
214 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
215 - umi |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
216 The read UMI |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
217 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
218 - umi_count |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
219 The number of times this UMI is observed for reads at the same position |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
220 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
221 - final_umi |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
222 The inferred true UMI for the group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
223 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
224 - final_umi_count |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
225 The total number of reads within the group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
226 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
227 - unique_id |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
228 The unique id for the group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
229 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
230 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
231 Options |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
232 ------- |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
233 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
234 --extract-umi-method (choice) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
235 How are the UMIs encoded in the read? |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
236 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
237 Options are: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
238 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
239 - "read_id" (default) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
240 UMIs contained at the end of the read separated as |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
241 specified with --umi-separator option |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
242 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
243 - "tag" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
244 UMIs contained in a tag, see --umi-tag option |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
245 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
246 --umi-separator (string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
247 Separator between read id and UMI. See --extract-umi-method above |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
248 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
249 --umi-tag (string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
250 Tag which contains UMI. See --extract-umi-method above |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
251 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
252 --method (choice, string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
253 Method used to identify PCR duplicates within reads. All methods |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
254 start by identifying the reads with the same mapping position |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
255 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
256 Options are: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
257 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
258 - "unique" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
259 Reads group share the exact same UMI |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
260 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
261 - "cluster" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
262 Identify clusters of connected UMIs (based on edit distance |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
263 threshold). Each network is a read group |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
264 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
265 - "directional" |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
266 Identify clusters of connected UMIs (based on edit distance |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
267 threshold) and umi A counts >= (2* umi B counts) - 1. Each |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
268 network is a read group. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
269 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
270 --edit-distance-threshold (int) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
271 For the adjacency and cluster methods the threshold for the |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
272 edit distance to connect two UMIs in the network can be |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
273 increased. The default value of 1 works best unless the UMI is |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
274 very long (>14bp) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
275 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
276 --paired |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
277 BAM is paired end - output both read pairs. This will also |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
278 force the use of the template length to determine reads with |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
279 the same mapping coordinates. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
280 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
281 --spliced-is-unique |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
282 Causes two reads that start in the same position on the same |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
283 strand and having the same UMI to be considered unique if one is |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
284 spliced and the other is not. (Uses the 'N' cigar operation to test |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
285 for splicing) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
286 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
287 --soft-clip-threshold (int) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
288 Mappers that soft clip, will sometimes do so rather than mapping a |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
289 spliced read if there is only a small overhang over the exon |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
290 junction. By setting this option, you can treat reads with at least |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
291 this many bases soft-clipped at the 3' end as spliced. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
292 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
293 --multimapping-detection-method (string, choice) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
294 If the sam/bam contains tags to identify multimapping reads, you can |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
295 specify for use when selecting the best read at a given loci. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
296 Supported tags are "NH", "X0" and "XT". If not specified, the read |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
297 with the highest mapping quality will be selected |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
298 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
299 --read-length |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
300 Use the read length as as a criteria when deduping, for e.g sRNA-Seq |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
301 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
302 --whole-contig |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
303 Consider all alignments to a single contig together. This is useful if |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
304 you have aligned to a transcriptome multi-fasta |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
305 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
306 --subset (float, [0-1]) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
307 Only consider a fraction of the reads, chosen at random. This is useful |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
308 for doing saturation analyses. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
309 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
310 --chrom |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
311 Only consider a single chromosome. This is useful for debugging purposes |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
312 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
313 --per-contig (string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
314 Deduplicate per contig (field 3 in BAM; RNAME). |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
315 All reads with the same contig will be |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
316 considered to have the same alignment position. This is useful |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
317 if your library prep generates PCR duplicates with non identical |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
318 alignment positions such as CEL-Seq. In this case, you would |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
319 align to a reference transcriptome with one transcript per gene |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
320 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
321 --per-gene (string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
322 Deduplicate per gene. As above except with this option you can |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
323 align to a reference transcriptome with more than one transcript |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
324 per gene. You need to also provide --gene-transcript-map option. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
325 This will also add a metacontig ('MC') tag to the reads if used |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
326 in conjunction with --output-bam |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
327 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
328 --gene-transcript-map (string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
329 File mapping genes to transripts (tab separated), e.g: |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
330 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
331 gene1 transcript1 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
332 gene1 transcript2 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
333 gene2 transcript3 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
334 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
335 --gene-tag (string) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
336 Deduplicate per gene. As per --per-gene except here the gene |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
337 information is encoded in the bam read tag specified so you do |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
338 not need to supply --gene-transcript-map |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
339 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
340 --group-out (string, filename) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
341 Output a flatfile describing the read groups |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
342 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
343 --output-bam (string, filename) |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
344 Output a tagged bam file to stdout or -S <filename> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
345 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
346 -i, --in-sam/-o, --out-sam |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
347 By default, inputs are assumed to be in BAM format and output are output |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
348 in BAM format. Use these options to specify the use of SAM format for |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
349 inputs or outputs. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
350 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
351 -I (string, filename) input file name |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
352 The input file must be sorted and indexed. |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
353 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
354 -S (string, filename) output file name |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
355 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
356 -L (string, filename) log file name |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
357 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
358 Usage |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
359 ----- |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
360 umi_tools group -I infile.bam --output-bam -S grouped.bam -L group.log -- |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
361 |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
362 ]]></help> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
363 <expand macro="citations" /> |
860bc357b678
planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff
changeset
|
364 </tool> |