annotate RaGOO/ragoo_utilities/PAFReader.py @ 13:b9a3aeb162ab draft default tip

Uploaded
author dereeper
date Mon, 26 Jul 2021 18:22:37 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
1 class PAFLine:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
2 """ Object to represent a single alignment in a minimap PAF file. """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
3
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
4 def __init__(self, in_line):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
5 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
6 start positions should be before end positions for both query and target
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
7 """
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
8 self.line = in_line.rstrip().split('\t')
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
9 self.contig = self.line[0]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
10 self.query_len = int(self.line[1])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
11 self.query_start = int(self.line[2])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
12 self.query_end = int(self.line[3])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
13 self.strand = self.line[4]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
14 self.ref_header = self.line[5]
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
15 self.ref_len = int(self.line[6])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
16 self.ref_start = int(self.line[7])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
17 self.ref_end = int(self.line[8])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
18 self.num_match = int(self.line[9])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
19 self.aln_len = int(self.line[10])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
20 self.mapq = int(self.line[11])
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
21
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
22 assert self.query_start <= self.query_end
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
23 assert self.ref_start <= self.ref_end
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
24
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
25 def __str__(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
26 return '\t'.join(self.line)
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
27
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
28 def __eq__(self, other):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
29 return self.line == other.line
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
30
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
31
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
32 class PAFReader:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
33
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
34 def __init__(self, paf_file):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
35 self.paf_file = paf_file
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
36
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
37 def parse_paf(self):
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
38 with open(self.paf_file) as f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
39 for line in f:
b9a3aeb162ab Uploaded
dereeper
parents:
diff changeset
40 yield PAFLine(line)