Mercurial > repos > davidmurphy > codonlogo
comparison corebio/seq_io/array_io.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55bdc2fb9fa |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
4 # | |
5 # This software is distributed under the MIT Open Source License. | |
6 # <http://www.opensource.org/licenses/mit-license.html> | |
7 # | |
8 # Permission is hereby granted, free of charge, to any person obtaining a | |
9 # copy of this software and associated documentation files (the "Software"), | |
10 # to deal in the Software without restriction, including without limitation | |
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 # and/or sell copies of the Software, and to permit persons to whom the | |
13 # Software is furnished to do so, subject to the following conditions: | |
14 # | |
15 # The above copyright notice and this permission notice shall be included | |
16 # in all copies or substantial portions of the Software. | |
17 # | |
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 # THE SOFTWARE. | |
25 # | |
26 | |
27 """Read and write a rectangular array of sequence data. | |
28 | |
29 One sequence per line and nothing else. Each line must contain the same number | |
30 of characters. Blank lines and white space are ignored. | |
31 | |
32 --- Example Array --- | |
33 | |
34 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
35 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
36 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
37 -----------------------------------YTSDN---------YSGSGDYDSNK | |
38 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
39 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
40 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
41 -EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD | |
42 """ | |
43 | |
44 from corebio.seq import * | |
45 from corebio.utils import * | |
46 | |
47 example = """ | |
48 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
49 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
50 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
51 -----------------------------------YTSDN---------YSGSGDYDSNK | |
52 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
53 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
54 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
55 -EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD | |
56 """ | |
57 | |
58 names = ("array",'flatfile') | |
59 extensions = () | |
60 | |
61 def read(fin, alphabet=None): | |
62 """Read a file of raw sequecne alignment data. | |
63 | |
64 Args: | |
65 fin -- A stream or file to read | |
66 alphabet -- The expected alphabet of the data, if given | |
67 Returns: | |
68 SeqList -- A list of sequences | |
69 Raises: | |
70 ValueError -- If the file is unparsable | |
71 """ | |
72 seqs = [ s for s in iterseq(fin, alphabet)] | |
73 return SeqList(seqs) | |
74 | |
75 | |
76 def iterseq(fin, alphabet=None) : | |
77 """ Read one line of sequence data and yeild the sequence. | |
78 | |
79 Args: | |
80 fin -- A stream or file to read | |
81 alphabet -- The expected alphabet of the data, if given | |
82 Yeilds: | |
83 Seq -- One alphabetic sequence at a time. | |
84 Raises: | |
85 ValueError -- If the file is unparsable | |
86 """ | |
87 | |
88 alphabet = Alphabet(alphabet) | |
89 line_length = 0 | |
90 | |
91 for linenum, line in enumerate(fin) : | |
92 if line.isspace(): continue # Blank line | |
93 line = line.strip() | |
94 | |
95 if line[0] == '>' : # probable a fasta file. Fail. | |
96 raise ValueError( | |
97 "Parse Error on input line: %d " % (linenum) ) | |
98 | |
99 line = remove_whitespace(line) | |
100 | |
101 if not alphabet.alphabetic(line) : | |
102 raise ValueError( | |
103 "Character on line: %d not in alphabet: %s : %s" % \ | |
104 (linenum, alphabet, line) ) | |
105 | |
106 if line_length and line_length != len(line) : | |
107 raise ValueError("Line %d has a incommensurate length." % linenum) | |
108 line_length = len(line) | |
109 | |
110 yield Seq(line, alphabet) | |
111 | |
112 | |
113 def write(afile, seqs): | |
114 """Write raw sequence data, one line per sequence. | |
115 | |
116 arguments: | |
117 afile -- A writable stream. | |
118 seqs -- A list of Seq's | |
119 """ | |
120 for s in seqs : | |
121 writeseq(afile, s) | |
122 | |
123 | |
124 def writeseq(afile, seq): | |
125 """ Write a single sequence in raw format. | |
126 | |
127 arguments: | |
128 afile -- A writable stream. | |
129 seq -- A Seq instance | |
130 """ | |
131 print >>afile, seq | |
132 | |
133 |