annotate getreads.py @ 0:7f170cb06e2e draft

planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
author nick
date Tue, 01 Dec 2015 21:33:27 -0500
parents
children 464aee13e2df
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
1 """A simple parser for FASTA, FASTQ, SAM, etc. Create generators that just return the read name and
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
2 sequence.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
3 All format parsers follow this API:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
4 with open('sequence.fasta') as fasta:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
5 for read in getreads.getparser(fasta, filetype='fasta'):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
6 print "There is a sequence with this FASTA identifier: "+read.id
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
7 print "Its sequence is "+read.seq
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
8 The properties of Read are:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
9 name: The entire FASTA header line, SAM column 1, etc.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
10 id: The first whitespace-delimited part of the name.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
11 seq: The sequence.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
12 qual: The quality scores (unless the format is FASTA).
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
13 """
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
14
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
15
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
16 def getparser(filehandle, filetype='fasta'):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
17 if filetype == 'fasta':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
18 return FastaReader(filehandle)
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
19 elif filetype == 'fastq':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
20 return FastqReader(filehandle)
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
21 elif filetype == 'sam':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
22 return SamReader(filehandle)
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
23 elif filetype == 'tsv':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
24 return TsvReader(filehandle)
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
25 else:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
26 raise ValueError('Illegal argument: filetype=\''+filetype+'\'')
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
27
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
28
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
29 class FormatError(Exception):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
30 def __init__(self, message=None):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
31 if message:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
32 Exception.__init__(self, message)
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
33
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
34
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
35 class Read(object):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
36 def __init__(self, name='', seq='', id_='', qual=''):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
37 self.name = name
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
38 self.seq = seq
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
39 self.id = id_
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
40 self.qual = qual
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
41
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
42
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
43 class Reader(object):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
44 """Base class for all other parsers."""
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
45 def __init__(self, filehandle):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
46 self.filehandle = filehandle
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
47 def __iter__(self):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
48 return self.parser()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
49
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
50
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
51 class TsvReader(Reader):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
52 """A parser for a simple tab-delimited format.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
53 Column 1: name
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
54 Column 2: sequence
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
55 Column 3: quality scores (optional)"""
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
56 def parser(self):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
57 for line in self.filehandle:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
58 fields = line.rstrip('\r\n').split('\t')
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
59 if len(fields) < 2:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
60 continue
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
61 read = Read()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
62 read.name = fields[0]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
63 if read.name:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
64 read.id = read.name.split()[0]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
65 read.seq = fields[1]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
66 if len(fields) >= 3:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
67 read.qual = fields[2]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
68 yield read
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
69
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
70
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
71 class SamReader(Reader):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
72 """A simple SAM parser.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
73 Assumptions:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
74 Lines starting with "@" with 3 fields are headers. All others are alignments.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
75 All alignment lines have 11 or more fields. Other lines will be skipped.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
76 """
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
77 def parser(self):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
78 for line in self.filehandle:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
79 fields = line.split('\t')
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
80 if len(fields) < 11:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
81 continue
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
82 # Skip headers.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
83 if fields[0].startswith('@') and len(fields[0]) == 3:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
84 continue
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
85 read = Read()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
86 read.name = fields[0]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
87 if read.name:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
88 read.id = read.name.split()[0]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
89 read.seq = fields[9]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
90 read.qual = fields[10].rstrip('\r\n')
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
91 yield read
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
92
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
93
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
94 class FastaReader(Reader):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
95 """A simple FASTA parser that reads one sequence at a time into memory."""
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
96 def parser(self):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
97 read = Read()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
98 while True:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
99 line_raw = self.filehandle.readline()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
100 if not line_raw:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
101 if read.seq:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
102 yield read
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
103 raise StopIteration
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
104 line = line_raw.strip()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
105 # Allow empty lines.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
106 if not line:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
107 continue
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
108 if line.startswith('>'):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
109 if read.seq:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
110 yield read
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
111 read = Read()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
112 read.name = line[1:] # remove ">"
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
113 if read.name:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
114 read.id = read.name.split()[0]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
115 continue
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
116 else:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
117 read.seq += line
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
118
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
119
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
120 class FastqReader(Reader):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
121 """A simple FASTQ parser. Can handle multi-line sequences, though."""
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
122 def parser(self):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
123 read = Read()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
124 state = 'header'
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
125 while True:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
126 line_raw = self.filehandle.readline()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
127 if not line_raw:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
128 if read.seq:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
129 yield read
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
130 raise StopIteration
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
131 line = line_raw.strip()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
132 # Allow empty lines.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
133 if not line:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
134 continue
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
135 if state == 'header':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
136 if not line.startswith('@'):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
137 raise FormatError('line state = "header" but line does not start with "@"')
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
138 if read.seq:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
139 yield read
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
140 read = Read()
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
141 read.name = line[1:] # remove '@'
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
142 if read.name:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
143 read.id = read.name.split()[0]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
144 state = 'sequence'
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
145 elif state == 'sequence':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
146 if line.startswith('+'):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
147 state = 'plus'
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
148 else:
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
149 read.seq += line
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
150 elif state == 'plus' or state == 'quality':
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
151 state = 'quality'
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
152 togo = len(read.seq) - len(read.qual)
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
153 read.qual += line[:togo]
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
154 # The end of the quality lines is when we have a quality string as long as the sequence.
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
155 if len(read.qual) >= len(read.seq):
7f170cb06e2e planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff changeset
156 state = 'header'