annotate umi-tools_group.xml @ 0:860bc357b678 draft

planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
author iuc
date Tue, 29 Aug 2017 17:37:21 -0400
parents
children f73f13641bb6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
1 <tool id="umi_tools_group" name="UMI-tools group" version="@VERSION@.0">
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
3 <macros>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
4 <import>macros.xml</import>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
5 </macros>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
6 <expand macro="requirements">
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
7 <requirement type="package" version="1.5">samtools</requirement>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
8 </expand>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
9 <command detect_errors="exit_code"><![CDATA[
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
10 #if $input.is_of_type("sam"):
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
11 #set $input_file = $input
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
12 #else:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
13 ln -sf '${input}' 'input.bam' &&
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' &&
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
15 #set $input_file = 'input.bam'
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
16 #end if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
17
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
18 umi_tools group --extract-umi-method $extract_umi_method
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
19 #if str($extract_umi_method) != 'read_id':
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
20 --umi-separator '$umi_separator' --umi-tag $umi_tag
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
21 #end if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
22 --method $method --edit-distance-threshold $edit_distance_threshold
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
23 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
24 $read_length $whole_contig --subset $subset $per_contig $per_gene
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
25 #if $gene_transcript_map:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
26 --gene-transcript-map '$gene_transcript_map'
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
27 #end if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
28 #if len(str($gene_tag)) > 0:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
29 --gene-tag '$gene_tag'
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
30 #end if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
31 #if $group_output:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
32 --group-out '$group_out'
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
33 #end if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
34 #if $input.is_of_type("sam"):
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
35 --in-sam
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
36 #end if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
37 --output-bam
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
38 -I '$input_file' -S grouped.bam &&
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
39 samtools sort grouped.bam -@ \${GALAXY_SLOTS:-1} -o '$output' -O BAM
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
40 ]]></command>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
41 <inputs>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
42 <param name="input" type="data" format="sam,bam" label="Reads to group in SAM or BAM format" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
43 <param name="extract_umi_method" argument="--extract-umi-method" type="select">
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
44 <option value="read_id" selected="True">Read ID</option>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
45 <option value="tag">Tag</option>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
46 </param>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
47 <param name="group_output" argument="--group-out" type="boolean" truevalue="--group-out" falsevalue="" label="Output a flatfile describing the read groups" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
48 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
49 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
50 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position">
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
51 <option value="unique">Reads group share the exact same UMI</option>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
52 <option value="cluster">Identify clusters based on hamming distance</option>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
53 <option value="directional">Identify clusters based on distance and counts</option>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
54 </param>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
55 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
56 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
57 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
58 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
59 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
60 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
61 <param argument="--subset" type="float" min="0" max="1" value="1" label="Only consider a random selection of the reads" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
62 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
63 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
64 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
65 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
66 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
67 </inputs>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
68 <outputs>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
69 <data format="bam" name="output" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
70 <data format="tabular" name="group_out">
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
71 <filter>group_out</filter>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
72 </data>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
73 </outputs>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
74 <tests>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
75 <test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
76 <param name="input" value="group_in1.sam" ftype="sam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
77 <param name="extract_umi_method" value="read_id" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
78 <param name="method" value="unique" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
79 <output name="output" file="group_out1.bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
80 </test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
81 <test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
82 <param name="input" value="group_in2.bam" ftype="bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
83 <param name="extract_umi_method" value="read_id" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
84 <param name="paired" value="True" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
85 <param name="method" value="unique" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
86 <output name="output" file="group_out2.bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
87 </test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
88 <test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
89 <param name="input" value="group_in3.bam" ftype="bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
90 <param name="extract_umi_method" value="read_id" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
91 <param name="group_output" value="True" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
92 <param name="method" value="unique" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
93 <output name="group_out" file="group_out3.tab" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
94 <output name="output" file="group_out3.bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
95 </test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
96 <test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
97 <param name="input" value="group_in4.bam" ftype="bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
98 <param name="extract_umi_method" value="tag" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
99 <param name="umi_tag" value="BX" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
100 <param name="method" value="unique" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
101 <output name="group_out" file="group_out4.tab" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
102 <output name="output" file="group_out4.bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
103 </test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
104 <test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
105 <param name="input" value="group_in5.bam" ftype="bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
106 <param name="extract_umi_method" value="read_id" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
107 <param name="umi_tag" value="BX" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
108 <param name="method" value="cluster" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
109 <output name="output" file="group_out5.bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
110 </test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
111 <test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
112 <param name="input" value="group_in6.bam" ftype="bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
113 <param name="extract_umi_method" value="read_id" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
114 <param name="umi_tag" value="BX" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
115 <param name="method" value="directional" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
116 <output name="output" file="group_out6.bam" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
117 </test>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
118 </tests>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
119 <help><![CDATA[
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
120 umi_tools group - Group reads based on their UMI
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
121 ================================================
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
122
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
123 Purpose
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
124 -------
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
125
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
126 The purpose of this command is to identify groups of reads based on
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
127 their genomic coordinate and UMI. It is assumed that the FASTQ files
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
128 were processed with umi_tools extract before mapping and thus the UMI is
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
129 the last word of the read name. e.g:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
130
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
131 @HISEQ:87:00000000_AATT
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
132
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
133 where AATT is the UMI sequeuence.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
134
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
135 If you have used an alternative method which does not separate the
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
136 read id and UMI with a "_", such as bcl2fastq which uses ":", you can
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
137 specify the separator with the option "--umi-separator=<sep>",
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
138 replacing <sep> with e.g ":".
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
139
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
140 Alternatively, if your UMIs are encoded in a tag, you can specify this
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
141 by setting the option --extract-umi-method=tag and set the tag name
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
142 with the --umi-tag option. For example, if your UMIs are encoded in
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
143 the 'UM' tag, provide the following options:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
144 "--extract-umi-method=tag --umi-tag=UM"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
145
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
146 By default, reads are considered identical if they have the same start
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
147 coordinate, are on the same strand, and have the same UMI. Optionally,
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
148 splicing status can be considered (see below).
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
149
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
150 The start postion of a read is considered to be the start of its alignment
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
151 minus any soft clipped bases. A read aligned at position 500 with
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
152 cigar 2S98M will be assumed to start at postion 498.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
153
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
154 Methods
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
155 -------
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
156
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
157 group can be run with multiple methods to identify group of reads with
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
158 the same (or similar) UMI(s). All methods start by identifying the
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
159 reads with the same mapping position.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
160
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
161 The simpliest method, "unique", groups reads with the exact same
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
162 UMI. The network-based methods, "cluster", "adjacency" and
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
163 "directional", build networks where nodes are UMIs and edges connect
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
164 UMIs with an edit distance <= threshold (usually 1). The groups of
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
165 reads are then defined from the network in a method-specific manner.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
166
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
167 Note that the "percentile" method used with the dedup command is not
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
168 available with group. This is because this method does not group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
169 similar UMIs as per the network methods. Instead it applies a
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
170 threshold for inclusion of the UMI in the output and excluded UMIs are
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
171 not assigned to a "true" UMI.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
172
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
173 "unique"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
174 Reads group share the exact same UMI
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
175
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
176 "cluster"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
177 Identify clusters of connected UMIs (based on hamming distance
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
178 threshold). Each network is a read group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
179
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
180 "directional"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
181 Identify clusters of connected UMIs (based on hamming distance
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
182 threshold) and umi A counts >= (2* umi B counts) - 1. Each
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
183 network is a read group.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
184
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
185 The group command can be used to create two types of outfile: a tagged
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
186 BAM or a flatfile describing the read groups
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
187
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
188 To generate the tagged-BAM file, use the option --output-bam and
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
189 provide a filename with the -S option. Alternatively, if you do not
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
190 provide a filename, the bam file will be outputted to the stdout. If
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
191 you have provided the --log/-L option to send the logging output
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
192 elsewhere, you can pipe the output from the group command directly to
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
193 e.g samtools sort like so:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
194
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
195 ``umi_tools group -I inf.bam --group-out=grouped.tsv --output-bam --log=group.log --paired | samtools sort - -o grouped_sorted.bam``
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
196
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
197 The tagged-BAM file will have two tagged per read:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
198
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
199 - UG = Unique_id.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
200 0-indexed unique id number for each group of reads with the same genomic position and UMI or UMIs inferred to be from the same true UMI + errors
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
201
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
202 - BX = Final UMI.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
203 The inferred true UMI for the group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
204
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
205 To generate the flatfile describing the read groups, include the
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
206 --group-out=<filename> option. The columns of the read groups file are
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
207 below. The first five columns relate to the read. The final 3 columns
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
208 relate to the group.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
209
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
210 - read_id
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
211 read identifier
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
212
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
213 - contig
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
214 alignment contig
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
215
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
216 - position
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
217 Alignment position. Note that this position is not the start position of the read in the BAM file but the start of the read taking into account the read strand and cigar
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
218
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
219 - umi
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
220 The read UMI
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
221
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
222 - umi_count
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
223 The number of times this UMI is observed for reads at the same position
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
224
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
225 - final_umi
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
226 The inferred true UMI for the group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
227
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
228 - final_umi_count
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
229 The total number of reads within the group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
230
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
231 - unique_id
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
232 The unique id for the group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
233
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
234
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
235 Options
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
236 -------
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
237
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
238 --extract-umi-method (choice)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
239 How are the UMIs encoded in the read?
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
240
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
241 Options are:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
242
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
243 - "read_id" (default)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
244 UMIs contained at the end of the read separated as
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
245 specified with --umi-separator option
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
246
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
247 - "tag"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
248 UMIs contained in a tag, see --umi-tag option
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
249
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
250 --umi-separator (string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
251 Separator between read id and UMI. See --extract-umi-method above
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
252
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
253 --umi-tag (string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
254 Tag which contains UMI. See --extract-umi-method above
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
255
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
256 --method (choice, string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
257 Method used to identify PCR duplicates within reads. All methods
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
258 start by identifying the reads with the same mapping position
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
259
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
260 Options are:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
261
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
262 - "unique"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
263 Reads group share the exact same UMI
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
264
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
265 - "cluster"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
266 Identify clusters of connected UMIs (based on edit distance
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
267 threshold). Each network is a read group
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
268
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
269 - "directional"
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
270 Identify clusters of connected UMIs (based on edit distance
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
271 threshold) and umi A counts >= (2* umi B counts) - 1. Each
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
272 network is a read group.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
273
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
274 --edit-distance-threshold (int)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
275 For the adjacency and cluster methods the threshold for the
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
276 edit distance to connect two UMIs in the network can be
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
277 increased. The default value of 1 works best unless the UMI is
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
278 very long (>14bp)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
279
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
280 --paired
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
281 BAM is paired end - output both read pairs. This will also
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
282 force the use of the template length to determine reads with
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
283 the same mapping coordinates.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
284
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
285 --spliced-is-unique
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
286 Causes two reads that start in the same position on the same
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
287 strand and having the same UMI to be considered unique if one is
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
288 spliced and the other is not. (Uses the 'N' cigar operation to test
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
289 for splicing)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
290
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
291 --soft-clip-threshold (int)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
292 Mappers that soft clip, will sometimes do so rather than mapping a
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
293 spliced read if there is only a small overhang over the exon
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
294 junction. By setting this option, you can treat reads with at least
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
295 this many bases soft-clipped at the 3' end as spliced.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
296
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
297 --multimapping-detection-method (string, choice)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
298 If the sam/bam contains tags to identify multimapping reads, you can
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
299 specify for use when selecting the best read at a given loci.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
300 Supported tags are "NH", "X0" and "XT". If not specified, the read
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
301 with the highest mapping quality will be selected
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
302
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
303 --read-length
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
304 Use the read length as as a criteria when deduping, for e.g sRNA-Seq
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
305
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
306 --whole-contig
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
307 Consider all alignments to a single contig together. This is useful if
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
308 you have aligned to a transcriptome multi-fasta
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
309
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
310 --subset (float, [0-1])
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
311 Only consider a fraction of the reads, chosen at random. This is useful
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
312 for doing saturation analyses.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
313
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
314 --chrom
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
315 Only consider a single chromosome. This is useful for debugging purposes
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
316
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
317 --per-contig (string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
318 Deduplicate per contig (field 3 in BAM; RNAME).
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
319 All reads with the same contig will be
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
320 considered to have the same alignment position. This is useful
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
321 if your library prep generates PCR duplicates with non identical
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
322 alignment positions such as CEL-Seq. In this case, you would
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
323 align to a reference transcriptome with one transcript per gene
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
324
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
325 --per-gene (string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
326 Deduplicate per gene. As above except with this option you can
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
327 align to a reference transcriptome with more than one transcript
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
328 per gene. You need to also provide --gene-transcript-map option.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
329 This will also add a metacontig ('MC') tag to the reads if used
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
330 in conjunction with --output-bam
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
331
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
332 --gene-transcript-map (string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
333 File mapping genes to transripts (tab separated), e.g:
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
334
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
335 gene1 transcript1
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
336 gene1 transcript2
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
337 gene2 transcript3
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
338
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
339 --gene-tag (string)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
340 Deduplicate per gene. As per --per-gene except here the gene
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
341 information is encoded in the bam read tag specified so you do
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
342 not need to supply --gene-transcript-map
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
343
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
344 --group-out (string, filename)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
345 Output a flatfile describing the read groups
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
346
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
347 --output-bam (string, filename)
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
348 Output a tagged bam file to stdout or -S <filename>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
349
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
350 -i, --in-sam/-o, --out-sam
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
351 By default, inputs are assumed to be in BAM format and output are output
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
352 in BAM format. Use these options to specify the use of SAM format for
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
353 inputs or outputs.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
354
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
355 -I (string, filename) input file name
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
356 The input file must be sorted and indexed.
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
357
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
358 -S (string, filename) output file name
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
359
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
360 -L (string, filename) log file name
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
361
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
362 Usage
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
363 -----
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
364 umi_tools group -I infile.bam --output-bam -S grouped.bam -L group.log --
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
365
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
366 ]]></help>
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
367 <expand macro="citations" />
860bc357b678 planemo upload commit c8e46ecad0b1473097517e582ed6c43eb0635b36
iuc
parents:
diff changeset
368 </tool>