0
|
1 <tool id="indel_sam2interval" name="Extract indels" version="1.0.0">
|
|
2 <description>from SAM</description>
|
|
3 <command interpreter="python">
|
|
4 indel_sam2interval.py
|
|
5 --input=$input1
|
|
6 --include_base=$include_base
|
|
7 --collapse=$collapse
|
|
8 --int_out=$output1
|
|
9 #if $ins_out.include_ins_out == "true"
|
|
10 --bed_ins_out=$output2
|
|
11 #else
|
|
12 --bed_ins_out="None"
|
|
13 #end if
|
|
14 #if $del_out.include_del_out == "true"
|
|
15 --bed_del_out=$output3
|
|
16 #else
|
|
17 --bed_del_out="None"
|
|
18 #end if
|
|
19 </command>
|
|
20 <inputs>
|
|
21 <param format="sam" name="input1" type="data" label="Select dataset to convert" />
|
|
22 <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
|
|
23 <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
|
|
24 <conditional name="ins_out">
|
|
25 <param name="include_ins_out" type="select" label="Include insertions output bed file?">
|
|
26 <option value="true">Yes</option>
|
|
27 <option value="false">No</option>
|
|
28 </param>
|
|
29 <when value="true" />
|
|
30 <when value="false" />
|
|
31 </conditional>
|
|
32 <conditional name="del_out">
|
|
33 <param name="include_del_out" type="select" label="Include deletions output bed file?">
|
|
34 <option value="true">Yes</option>
|
|
35 <option value="false">No</option>
|
|
36 </param>
|
|
37 <when value="true" />
|
|
38 <when value="false" />
|
|
39 </conditional>
|
|
40 </inputs>
|
|
41 <outputs>
|
|
42 <data format="interval" name="output1" />
|
|
43 <data format="bed" name="output2">
|
|
44 <filter>ins_out[ "include_ins_out" ] == "true"</filter>
|
|
45 </data>
|
|
46 <data format="bed" name="output3">
|
|
47 <filter>del_out[ "include_del_out" ] == "true"</filter>
|
|
48 </data>
|
|
49 </outputs>
|
|
50 <tests>
|
|
51 <test>
|
|
52 <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
|
|
53 <param name="include_base" value="true"/>
|
|
54 <param name="collapse" value="true"/>
|
|
55 <param name="include_ins_out" value="true" />
|
|
56 <param name="include_del_out" value="true" />
|
|
57 <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
|
|
58 <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
|
|
59 <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
|
|
60 </test>
|
|
61 </tests>
|
|
62 <help>
|
|
63
|
|
64 **What it does**
|
|
65
|
|
66 Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.
|
|
67
|
|
68 -----
|
|
69
|
|
70 **Example**
|
|
71
|
|
72 Suppose you have the following mapping results::
|
|
73
|
|
74 r327 16 chrM 11 37 8M1D10M * 0 0 CTTACCAGATAGTCATCA -+<2;?@BA@?-,.+4=4 XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:41^C35
|
|
75 r457 0 chr1 14 37 14M * 0 0 ACCTGACAGATATC =/DF;?@1A@?-,. XT:A:U NM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
76 r501 16 chrM 6 23 7M1I13M * 0 0 TCTGTGCCTACCAGACATTCA +=$2;?@BA@?-,.+4=4=4A XT:A:U NM:i:3 X0:i:1 X1:i:1 XM:i:2 XO:i:1 XG:i:1 MD:Z:28C36G9 XA:Z:chrM,+134263658,14M1I61M,4;
|
|
77 r1288 16 chrM 8 37 11M1I7M * 0 0 TCACTTACCTGTACACACA /*F2;?@%A@?-,.+4=4= XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:2T0T1A69
|
|
78 r1902 0 chr1 4 37 7M2D18M * 0 0 AGTCTCTTACCTGACGGTTATGA <2;?@BA@?-,.+4=4=4AA663 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
|
|
79 r2204 16 chrM 9 0 19M * 0 0 CTGGTACCTGACAGGTATC 2;?@BA@?-,.+4=4=4AA XT:A:R NM:i:1 X0:i:2 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:0T75 XA:Z:chrM,-564927,76M,1;
|
|
80 r2314 16 chrM 6 37 10M2D8M * 0 0 TCACTCTTACGTCTGA <2;?@BA@?-,.+4=4 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:25A5^CA45
|
|
81 r3001 0 chrM 13 37 3M1D5M2I7M * 0 0 TACAGTCACCCTCATCA <2;?@BA/(@?-,$& XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
|
|
82 r3218 0 chr1 13 37 8M1D7M * 0 0 TACAGTCACTCATCA <2;?@BA/(@?-,$& XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
|
|
83 r4767 16 chr2 3 37 15M2I7M * 0 0 CAGACTCTCTTACCAAAGACAGAC <2;?@BA/(@?-,.+4=4=4AA66 XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:2T1A4T65
|
|
84 r5333 0 chrM 5 37 17M1D8M * 0 0 GTCTCTCATACCAGACAACGGCAT FB3$@BA/(@?-,.+4=4=4AA66 XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:45C10^C0C5C13
|
|
85 r6690 16 chrM 7 23 20M * 0 0 CTCTCTTACCAGACAGACAT 2;?@BA/(@?-,.+4=4=4A XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76 XA:Z:chrM,-568532,76M,1;
|
|
86 r7211 0 chrM 7 37 24M * 0 0 CGACAGAGACAAAATAACATTTAA //<2;?@BA@?-,.+4=442;;6: XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:2 XO:i:1 XG:i:1 MD:Z:73G0G0
|
|
87 r7899 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&%#%$$$%#%#'#
|
|
88 r9192 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&&%
|
|
89 r9922 16 chrM 4 0 7M3I9M * 0 0 CCAGACATTTGAAATCAGG F/D4=44^D++26632;;6 XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
90 r9987 16 chrM 4 0 9M1I18M * 0 0 AGGTTCTCATTACCTGACACTCATCTTG G/AD6"/+4=4426632;;6:<2;?@BA XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
91 r10145 16 chr1 16 0 5M2D7M * 0 0 CACATTGTTGTA G//+4=44=4AA XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
92 r10324 16 chrM 15 0 6M1D5M * 0 0 CCGTTCTACTTG A@??8.G//+4= XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
93 r12331 16 chrM 17 0 4M2I6M * 0 0 AGTCGAATACGTG 632;;6:<2;?@B XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
94 r12914 16 chr2 24 0 4M3I3M * 0 0 ACTACCCCAA G//+4=42,. XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
95 r13452 16 chrM 13 0 3M1D11M * 0 0 TACGTCACTCATCA IIIABCCCICCCCI XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
|
|
96
|
|
97
|
|
98 The following three files will be produced (Interval, Insertions BED and Deletions BED)::
|
|
99
|
|
100 chr1 11 13 D - 1
|
|
101 chr1 21 22 D - 1
|
|
102 chr1 21 23 D - 1
|
|
103 chr2 18 19 I AA 1
|
|
104 chr2 28 29 I CCC 1
|
|
105 chrM 11 12 I TTT 1
|
|
106 chrM 13 14 I C 1
|
|
107 chrM 13 14 I T 1
|
|
108 chrM 16 17 D - 1
|
|
109 chrM 16 18 D - 1
|
|
110 chrM 19 20 D - 1
|
|
111 chrM 19 20 I T 1
|
|
112 chrM 21 22 D - 1
|
|
113 chrM 21 22 I GA 1
|
|
114 chrM 22 23 D - 1
|
|
115
|
|
116 chr2 18 19
|
|
117 chr2 28 29
|
|
118 chrM 11 12
|
|
119 chrM 13 14
|
|
120 chrM 13 14
|
|
121 chrM 19 20
|
|
122 chrM 21 22
|
|
123
|
|
124 chr1 11 13
|
|
125 chr1 21 22
|
|
126 chr1 21 23
|
|
127 chrM 16 17
|
|
128 chrM 16 18
|
|
129 chrM 19 20
|
|
130 chrM 21 22
|
|
131 chrM 22 23
|
|
132
|
|
133 For more information on SAM, please consult the `SAM format description`__.
|
|
134
|
|
135 .. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
|
|
136
|
|
137
|
|
138 </help>
|
|
139 </tool>
|