annotate RaGOO/sam2delta.py @ 13:b9a3aeb162ab draft default tip

Uploaded
author dereeper
date Mon, 26 Jul 2021 18:22:37 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/env python
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
2 import argparse
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
3 from collections import defaultdict
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
4
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
5 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
6 This utility converts a SAM file to a nucmer delta file.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
7
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
8 SAM files must contain an NM tag, which is default in minimap2 alignments.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
9 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
10
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
11
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
12 class SAMAlignment:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
13
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
14 def __init__(self, in_ref_header, in_query_header, in_ref_start, in_cigar, in_flag, in_seq_len, in_num_mismatches):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
15 self.num_mismatches = in_num_mismatches
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
16 self.seq_len = in_seq_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
17 self.flag = int(in_flag)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
18 self.is_rc = False
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
19
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
20 # Check if the query was reverse complemented
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
21 if self.flag & 16:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
22 self.is_rc = True
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
23
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
24 self.cigar = in_cigar
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
25 self.parsed_cigar = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
26 self._parse_cigar()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
27
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
28 if self.is_rc:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
29 self.parsed_cigar = self.parsed_cigar[::-1]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
30
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
31 self.ref_header = in_ref_header
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
32 self.query_header = in_query_header
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
33 self.ref_start = int(in_ref_start)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
34 self.ref_end = None
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
35 self.query_start = None
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
36 self.query_end = None
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
37 self.query_aln_len = 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
38 self.ref_aln_len = 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
39
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
40 num_hard_clipped = 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
41 for i in self.parsed_cigar:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
42 if i[-1] == 'H':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
43 num_hard_clipped += int(i[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
44
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
45 if self.seq_len == 1:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
46 self.seq_len = 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
47 for i in self.parsed_cigar:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
48 if i[-1] == 'M' or i[-1] == 'I' or i[-1] == 'S' or i[-1] == 'X' or i[-1] == '=':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
49 self.seq_len += int(i[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
50
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
51 self.query_len = self.seq_len + num_hard_clipped
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
52
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
53 self._get_query_start()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
54 self._get_query_aln_len()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
55 self._get_ref_aln_len()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
56 self._get_query_end()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
57 self._get_ref_end()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
58
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
59 if self.is_rc:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
60 self.query_start, self.query_end = self.query_end, self.query_start
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
61 # Lazily taking care of off-by-one errors
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
62 self.query_end += 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
63 self.query_start -= 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
64
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
65 self.query_end -= 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
66 self.ref_end -= 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
67
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
68 def __str__(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
69 return ' '.join([self.ref_header, self.query_header])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
70
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
71 def __repr__(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
72 return '<' + ' '.join([self.ref_header, self.query_header]) + '>'
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
73
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
74 def _get_query_start(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
75 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
76 An alignment will either start with soft clipping, hard clipping, or with one of the alignment
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
77 codes (e.g. Match/Mismatch (M) or InDel (I/D)). If hard or soft clipped, the start of the alignment is
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
78 the very next base after clipping. If the alignment has commenced, then the start of the alignment is 1.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
79
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
80 Notes:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
81 At least for now, I am only specifying this for minimap2 output, which I believe only has the following
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
82 characters in cigar strings:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
83 {'I', 'S', 'M', 'D', 'H'}
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
84 so I will only handle these cases.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
85 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
86 if self.parsed_cigar[0][-1] == 'I' or self.parsed_cigar[0][-1] == 'D' or self.parsed_cigar[0][-1] == 'M':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
87 self.query_start = 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
88 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
89 # The alignment has started with either soft or hard clipping
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
90 # Need to double check if a 1 needs to be added here
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
91 self.query_start = int(self.parsed_cigar[0][:-1]) + 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
92
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
93 def _get_query_aln_len(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
94 """ Just the addition of all cigar codes which "consume" the query (M, I)"""
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
95 for i in self.parsed_cigar:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
96 if i[-1] == 'M' or i[-1] == 'I':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
97 self.query_aln_len += int(i[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
98
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
99 def _get_ref_aln_len(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
100 for i in self.parsed_cigar:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
101 if i[-1] == 'M' or i[-1] == 'D':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
102 self.ref_aln_len += int(i[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
103
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
104 def _get_query_end(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
105 """ This is the length of the alignment + query start """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
106 self.query_end = self.query_start + self.query_aln_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
107
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
108 def _get_ref_end(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
109 """ This is the length of the alignment + query start """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
110 self.ref_end = self.ref_start + self.ref_aln_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
111
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
112 def _parse_cigar(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
113 cigar_chars = {
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
114 'M',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
115 'I',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
116 'D',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
117 'N',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
118 'S',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
119 'H',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
120 'P',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
121 '=',
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
122 'X'
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
123 }
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
124
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
125 this_field = ''
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
126 for char in self.cigar:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
127 this_field += char
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
128 if char in cigar_chars:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
129 self.parsed_cigar.append(this_field)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
130 this_field = ''
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
131
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
132
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
133 def write_delta(in_alns, in_file_name):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
134 with open(in_file_name, 'w') as f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
135 f.write('file1 file2\n')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
136 f.write('NUCMER\n')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
137 for aln in in_alns.keys():
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
138 query_len = in_alns[aln][0].query_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
139 #print (query_len)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
140 f.write('>%s %s %r %r\n' % (aln[0], aln[1], ref_chr_lens[aln[0]], query_len))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
141 for i in in_alns[aln]:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
142 f.write('%r %r %r %r %r %r %r\n' % (
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
143 i.ref_start,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
144 i.ref_end,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
145 i.query_start,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
146 i.query_end,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
147 i.num_mismatches,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
148 i.num_mismatches,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
149 0
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
150 ))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
151 # Continue with the cigar string
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
152 offsets = []
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
153 cigar = i.parsed_cigar
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
154 if cigar[0][-1] == 'S' or cigar[0][-1] == 'H':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
155 cigar = cigar[1:-1]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
156 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
157 cigar = cigar[:-1]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
158
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
159 counter = 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
160 for code in cigar:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
161 if code[-1] == 'M':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
162 counter += int(code[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
163 elif code[-1] == 'D':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
164 offsets.append(counter)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
165 num_I = int(code[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
166 for i in range(1, num_I):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
167 offsets.append(1)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
168 counter = 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
169 elif code[-1] == 'I':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
170 offsets.append(-1*counter)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
171 num_I = int(code[:-1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
172 for i in range(1, num_I):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
173 offsets.append(-1)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
174 counter = 1
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
175 else:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
176 raise ValueError('Unexpected CIGAR code')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
177 offsets.append(0)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
178 offsets = [str(i) for i in offsets]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
179 f.write('\n'.join(offsets) + '\n')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
180
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
181
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
182 parser = argparse.ArgumentParser(description='Convert a SAM file to a nucmer delta file.')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
183 parser.add_argument("sam_file", metavar="<alns.sam>", type=str, help="SAM file to convert")
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
184
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
185 args = parser.parse_args()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
186 sam_file = args.sam_file
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
187
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
188 # Make a dictionary storing all alignments between and query and a reference sequence.
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
189 # key = (reference_header, query_header)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
190 # value = list of alignments between those sequences. only store info needed for the delta file
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
191
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
192 alns = defaultdict(list)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
193
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
194 # Read through the sam file
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
195 ref_chr_lens = dict()
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
196 with open(sam_file) as f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
197 for line in f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
198 # Skip SAM headers
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
199 if line.startswith('@SQ'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
200 header_list = line.split('\t')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
201 chrom = header_list[1].replace('SN:', '')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
202 chr_len = int(header_list[2].replace('LN:', ''))
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
203 ref_chr_lens[chrom] = chr_len
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
204 continue
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
205
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
206 if line.startswith('@'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
207 continue
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
208
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
209 fields = line.split('\t')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
210 ref_header = fields[2]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
211 query_header = fields[0]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
212 ref_start = fields[3]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
213 cigar = fields[5]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
214 flag = fields[1]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
215 seq_len = len(fields[9])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
216
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
217 if not cigar == '*':
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
218 # Get the NM flag
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
219 nm = None
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
220 for i in fields:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
221 if i.startswith('NM:i'):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
222 nm = int(i[5:])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
223 continue
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
224
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
225 if nm is None:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
226 raise ValueError('SAM file must include NM tag.')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
227
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
228 x = SAMAlignment(
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
229 ref_header,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
230 query_header,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
231 ref_start,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
232 cigar,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
233 flag,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
234 seq_len,
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
235 nm
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
236 )
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
237 alns[(ref_header, query_header)].append(x)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
238
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
239 write_delta(alns, sam_file + '.delta')