comparison umi-tools_group.xml @ 1:f73f13641bb6 draft

planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
author iuc
date Wed, 10 Jan 2018 19:09:28 -0500
parents 860bc357b678
children a24f5b991320
comparison
equal deleted inserted replaced
0:860bc357b678 1:f73f13641bb6
2 <description>Extract UMI from fastq files</description> 2 <description>Extract UMI from fastq files</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
7 <requirement type="package" version="1.5">samtools</requirement> 7 <requirement type="package" version="1.6">samtools</requirement>
8 </expand> 8 </expand>
9 <command detect_errors="exit_code"><![CDATA[ 9 <command detect_errors="exit_code"><![CDATA[
10 #if $input.is_of_type("sam"): 10 #if $input.is_of_type("sam"):
11 #set $input_file = $input 11 #set $input_file = $input
12 #else: 12 #else:
13 ln -sf '${input}' 'input.bam' && 13 ln -sf '${input}' 'input.bam' &&
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' && 14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' &&
15 #set $input_file = 'input.bam' 15 #set $input_file = 'input.bam'
16 #end if 16 #end if
17 17
18 umi_tools group --extract-umi-method $extract_umi_method 18 umi_tools group
19 --random-seed 0
20 --extract-umi-method $extract_umi_method
19 #if str($extract_umi_method) != 'read_id': 21 #if str($extract_umi_method) != 'read_id':
20 --umi-separator '$umi_separator' --umi-tag $umi_tag 22 --umi-separator '$umi_separator' --umi-tag '$umi_tag'
21 #end if 23 #end if
22 --method $method --edit-distance-threshold $edit_distance_threshold 24 --method $method --edit-distance-threshold $edit_distance_threshold
23 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold 25 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold
24 $read_length $whole_contig --subset $subset $per_contig $per_gene 26 $read_length $whole_contig --subset $subset $per_contig $per_gene
25 #if $gene_transcript_map: 27 #if $gene_transcript_map:
48 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" /> 50 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" />
49 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." /> 51 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." />
50 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position"> 52 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position">
51 <option value="unique">Reads group share the exact same UMI</option> 53 <option value="unique">Reads group share the exact same UMI</option>
52 <option value="cluster">Identify clusters based on hamming distance</option> 54 <option value="cluster">Identify clusters based on hamming distance</option>
53 <option value="directional">Identify clusters based on distance and counts</option> 55 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option>
54 </param> 56 </param>
55 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" /> 57 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" />
56 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." /> 58 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." />
57 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" /> 59 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" />
58 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." /> 60 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." />
59 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" /> 61 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" />
60 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" /> 62 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />
61 <param argument="--subset" type="float" min="0" max="1" value="1" label="Only consider a random selection of the reads" /> 63 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" />
62 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" /> 64 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" />
63 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" /> 65 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />
64 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." /> 66 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
65 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" /> 67 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" />
66 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." /> 68 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." />
71 <filter>group_out</filter> 73 <filter>group_out</filter>
72 </data> 74 </data>
73 </outputs> 75 </outputs>
74 <tests> 76 <tests>
75 <test> 77 <test>
76 <param name="input" value="group_in1.sam" ftype="sam" />
77 <param name="extract_umi_method" value="read_id" />
78 <param name="method" value="unique" />
79 <output name="output" file="group_out1.bam" />
80 </test>
81 <test>
82 <param name="input" value="group_in2.bam" ftype="bam" /> 78 <param name="input" value="group_in2.bam" ftype="bam" />
83 <param name="extract_umi_method" value="read_id" /> 79 <param name="extract_umi_method" value="read_id" />
84 <param name="paired" value="True" /> 80 <param name="paired" value="True" />
85 <param name="method" value="unique" /> 81 <param name="method" value="unique" />
86 <output name="output" file="group_out2.bam" /> 82 <output name="output" file="group_out2.bam" ftype="bam" sort="True" />
87 </test> 83 </test>
88 <test> 84 <test>
89 <param name="input" value="group_in3.bam" ftype="bam" /> 85 <param name="input" value="group_in3.bam" ftype="bam" />
90 <param name="extract_umi_method" value="read_id" /> 86 <param name="extract_umi_method" value="read_id" />
91 <param name="group_output" value="True" /> 87 <param name="group_output" value="True" />
92 <param name="method" value="unique" /> 88 <param name="method" value="unique" />
93 <output name="group_out" file="group_out3.tab" /> 89 <output name="group_out" file="group_out3.tab" />
94 <output name="output" file="group_out3.bam" /> 90 <output name="output" file="group_out3.bam" ftype="bam" sort="True" />
95 </test> 91 </test>
96 <test> 92 <test>
97 <param name="input" value="group_in4.bam" ftype="bam" /> 93 <param name="input" value="group_in4.bam" ftype="bam" />
98 <param name="extract_umi_method" value="tag" /> 94 <param name="extract_umi_method" value="tag" />
99 <param name="umi_tag" value="BX" /> 95 <param name="umi_tag" value="BX" />
100 <param name="method" value="unique" /> 96 <param name="method" value="unique" />
101 <output name="group_out" file="group_out4.tab" /> 97 <output name="group_out" file="group_out4.tab" />
102 <output name="output" file="group_out4.bam" /> 98 <output name="output" file="group_out4.bam" ftype="bam" sort="True" />
103 </test> 99 </test>
104 <test> 100 <test>
105 <param name="input" value="group_in5.bam" ftype="bam" /> 101 <param name="input" value="group_in5.bam" ftype="bam" />
106 <param name="extract_umi_method" value="read_id" /> 102 <param name="extract_umi_method" value="read_id" />
107 <param name="umi_tag" value="BX" /> 103 <param name="umi_tag" value="BX" />
108 <param name="method" value="cluster" /> 104 <param name="method" value="cluster" />
109 <output name="output" file="group_out5.bam" /> 105 <output name="output" file="group_out5.bam" ftype="bam" sort="True" />
110 </test> 106 </test>
111 <test> 107 <test>
112 <param name="input" value="group_in6.bam" ftype="bam" /> 108 <param name="input" value="group_in6.bam" ftype="bam" />
113 <param name="extract_umi_method" value="read_id" /> 109 <param name="extract_umi_method" value="read_id" />
114 <param name="umi_tag" value="BX" /> 110 <param name="umi_tag" value="BX" />
115 <param name="method" value="directional" /> 111 <param name="method" value="directional" />
116 <output name="output" file="group_out6.bam" /> 112 <output name="output" file="group_out6.bam" ftype="bam" sort="True" />
117 </test> 113 </test>
118 </tests> 114 </tests>
119 <help><![CDATA[ 115 <help><![CDATA[
120 umi_tools group - Group reads based on their UMI 116 umi_tools group - Group reads based on their UMI
121 ================================================ 117 ================================================