Mercurial > repos > nick > sequence_content_trimmer
annotate getreads.py @ 0:7f170cb06e2e draft
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
author | nick |
---|---|
date | Tue, 01 Dec 2015 21:33:27 -0500 |
parents | |
children | 464aee13e2df |
rev | line source |
---|---|
0
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
1 """A simple parser for FASTA, FASTQ, SAM, etc. Create generators that just return the read name and |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
2 sequence. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
3 All format parsers follow this API: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
4 with open('sequence.fasta') as fasta: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
5 for read in getreads.getparser(fasta, filetype='fasta'): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
6 print "There is a sequence with this FASTA identifier: "+read.id |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
7 print "Its sequence is "+read.seq |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
8 The properties of Read are: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
9 name: The entire FASTA header line, SAM column 1, etc. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
10 id: The first whitespace-delimited part of the name. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
11 seq: The sequence. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
12 qual: The quality scores (unless the format is FASTA). |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
13 """ |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
14 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
15 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
16 def getparser(filehandle, filetype='fasta'): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
17 if filetype == 'fasta': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
18 return FastaReader(filehandle) |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
19 elif filetype == 'fastq': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
20 return FastqReader(filehandle) |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
21 elif filetype == 'sam': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
22 return SamReader(filehandle) |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
23 elif filetype == 'tsv': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
24 return TsvReader(filehandle) |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
25 else: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
26 raise ValueError('Illegal argument: filetype=\''+filetype+'\'') |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
27 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
28 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
29 class FormatError(Exception): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
30 def __init__(self, message=None): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
31 if message: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
32 Exception.__init__(self, message) |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
33 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
34 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
35 class Read(object): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
36 def __init__(self, name='', seq='', id_='', qual=''): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
37 self.name = name |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
38 self.seq = seq |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
39 self.id = id_ |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
40 self.qual = qual |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
41 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
42 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
43 class Reader(object): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
44 """Base class for all other parsers.""" |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
45 def __init__(self, filehandle): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
46 self.filehandle = filehandle |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
47 def __iter__(self): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
48 return self.parser() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
49 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
50 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
51 class TsvReader(Reader): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
52 """A parser for a simple tab-delimited format. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
53 Column 1: name |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
54 Column 2: sequence |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
55 Column 3: quality scores (optional)""" |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
56 def parser(self): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
57 for line in self.filehandle: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
58 fields = line.rstrip('\r\n').split('\t') |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
59 if len(fields) < 2: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
60 continue |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
61 read = Read() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
62 read.name = fields[0] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
63 if read.name: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
64 read.id = read.name.split()[0] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
65 read.seq = fields[1] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
66 if len(fields) >= 3: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
67 read.qual = fields[2] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
68 yield read |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
69 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
70 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
71 class SamReader(Reader): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
72 """A simple SAM parser. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
73 Assumptions: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
74 Lines starting with "@" with 3 fields are headers. All others are alignments. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
75 All alignment lines have 11 or more fields. Other lines will be skipped. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
76 """ |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
77 def parser(self): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
78 for line in self.filehandle: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
79 fields = line.split('\t') |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
80 if len(fields) < 11: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
81 continue |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
82 # Skip headers. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
83 if fields[0].startswith('@') and len(fields[0]) == 3: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
84 continue |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
85 read = Read() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
86 read.name = fields[0] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
87 if read.name: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
88 read.id = read.name.split()[0] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
89 read.seq = fields[9] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
90 read.qual = fields[10].rstrip('\r\n') |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
91 yield read |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
92 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
93 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
94 class FastaReader(Reader): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
95 """A simple FASTA parser that reads one sequence at a time into memory.""" |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
96 def parser(self): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
97 read = Read() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
98 while True: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
99 line_raw = self.filehandle.readline() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
100 if not line_raw: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
101 if read.seq: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
102 yield read |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
103 raise StopIteration |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
104 line = line_raw.strip() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
105 # Allow empty lines. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
106 if not line: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
107 continue |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
108 if line.startswith('>'): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
109 if read.seq: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
110 yield read |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
111 read = Read() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
112 read.name = line[1:] # remove ">" |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
113 if read.name: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
114 read.id = read.name.split()[0] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
115 continue |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
116 else: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
117 read.seq += line |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
118 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
119 |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
120 class FastqReader(Reader): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
121 """A simple FASTQ parser. Can handle multi-line sequences, though.""" |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
122 def parser(self): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
123 read = Read() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
124 state = 'header' |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
125 while True: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
126 line_raw = self.filehandle.readline() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
127 if not line_raw: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
128 if read.seq: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
129 yield read |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
130 raise StopIteration |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
131 line = line_raw.strip() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
132 # Allow empty lines. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
133 if not line: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
134 continue |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
135 if state == 'header': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
136 if not line.startswith('@'): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
137 raise FormatError('line state = "header" but line does not start with "@"') |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
138 if read.seq: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
139 yield read |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
140 read = Read() |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
141 read.name = line[1:] # remove '@' |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
142 if read.name: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
143 read.id = read.name.split()[0] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
144 state = 'sequence' |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
145 elif state == 'sequence': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
146 if line.startswith('+'): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
147 state = 'plus' |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
148 else: |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
149 read.seq += line |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
150 elif state == 'plus' or state == 'quality': |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
151 state = 'quality' |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
152 togo = len(read.seq) - len(read.qual) |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
153 read.qual += line[:togo] |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
154 # The end of the quality lines is when we have a quality string as long as the sequence. |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
155 if len(read.qual) >= len(read.seq): |
7f170cb06e2e
planemo upload commit d76a1cf04f3e4bc735d320ccccbf7aecbc193395
nick
parents:
diff
changeset
|
156 state = 'header' |