Mercurial > repos > davidmurphy > codonlogo
comparison corebio/seq_io/plain_io.py @ 7:8d676bbd1f2d
Uploaded
author | davidmurphy |
---|---|
date | Mon, 16 Jan 2012 07:03:36 -0500 |
parents | c55bdc2fb9fa |
children |
comparison
equal
deleted
inserted
replaced
6:4a4aca3d57c9 | 7:8d676bbd1f2d |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
4 # | |
5 # This software is distributed under the MIT Open Source License. | |
6 # <http://www.opensource.org/licenses/mit-license.html> | |
7 # | |
8 # Permission is hereby granted, free of charge, to any person obtaining a | |
9 # copy of this software and associated documentation files (the "Software"), | |
10 # to deal in the Software without restriction, including without limitation | |
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 # and/or sell copies of the Software, and to permit persons to whom the | |
13 # Software is furnished to do so, subject to the following conditions: | |
14 # | |
15 # The above copyright notice and this permission notice shall be included | |
16 # in all copies or substantial portions of the Software. | |
17 # | |
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 # THE SOFTWARE. | |
25 # | |
26 | |
27 """Read and write raw, unformatted sequence data. The whole file is read | |
28 in as a sequence. Whitespace is removed. | |
29 | |
30 | |
31 --- Example Plain/Raw/Text File --- | |
32 | |
33 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
34 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
35 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
36 -----------------------------------YTSDN---------YSGSGDYDSNK | |
37 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
38 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
39 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
40 -EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD | |
41 """ | |
42 | |
43 from corebio.seq import * | |
44 from corebio.utils import remove_whitespace | |
45 | |
46 example = """ | |
47 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
48 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
49 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
50 -----------------------------------YTSDN---------YSGSGDYDSNK | |
51 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
52 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
53 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
54 -EPC-RDENVHFNRIFLPTIYFIIFLTGIV | |
55 """ | |
56 | |
57 names = ("plain","raw") | |
58 extensions = ('txt', ) | |
59 | |
60 def read(fin, alphabet=None): | |
61 """Read a file of raw sequecne data. | |
62 | |
63 Args: | |
64 fin -- A stream or file to read | |
65 alphabet -- The expected alphabet of the data, if given | |
66 Returns: | |
67 SeqList -- A list of sequences | |
68 Raises: | |
69 ValueError -- If the file is unparsable | |
70 """ | |
71 seqs = [ s for s in iterseq(fin, alphabet)] | |
72 return SeqList(seqs) | |
73 | |
74 | |
75 def iterseq(fin, alphabet=None) : | |
76 """ Read the sequence data and yeild one (and only one) sequence. | |
77 | |
78 Args: | |
79 fin -- A stream or file to read | |
80 alphabet -- The expected alphabet of the data, if given | |
81 Yeilds: | |
82 Seq -- One alphabetic sequence at a time. | |
83 Raises: | |
84 ValueError -- If the file is unparsable | |
85 """ | |
86 | |
87 alphabet = Alphabet(alphabet) | |
88 lines = [] | |
89 for linenum, line in enumerate(fin) : | |
90 if line.isspace(): continue # Blank line | |
91 line = line.strip() | |
92 | |
93 | |
94 if line[0] == '>' : # probable a fasta file. Fail. | |
95 raise ValueError( | |
96 "Parse Error on input line: %d " % (linenum) ) | |
97 line = remove_whitespace(line) | |
98 | |
99 if not alphabet.alphabetic(line) : | |
100 raise ValueError( | |
101 "Character on line: %d not in alphabet: %s : %s" % \ | |
102 (linenum, alphabet, line) ) | |
103 lines.append(line) | |
104 | |
105 yield Seq(''.join(lines), alphabet) | |
106 | |
107 | |
108 | |
109 def write(afile, seqs): | |
110 """Write raw sequence data, one line per sequence. | |
111 | |
112 arguments: | |
113 afile -- A writable stream. | |
114 seqs -- A list of Seq's | |
115 """ | |
116 for s in seqs : | |
117 writeseq(afile, s) | |
118 | |
119 | |
120 def writeseq(afile, seq): | |
121 """ Write a single sequence in raw format. | |
122 | |
123 arguments: | |
124 afile -- A writable stream. | |
125 seq -- A Seq instance | |
126 """ | |
127 print >>afile, seq | |
128 | |
129 |