Mercurial > repos > davidmurphy > codonlogo
comparison corebio/seq_io/array_io.py @ 0:c55bdc2fb9fa
Uploaded
| author | davidmurphy | 
|---|---|
| date | Thu, 27 Oct 2011 12:09:09 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:c55bdc2fb9fa | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
| 4 # | |
| 5 # This software is distributed under the MIT Open Source License. | |
| 6 # <http://www.opensource.org/licenses/mit-license.html> | |
| 7 # | |
| 8 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 9 # copy of this software and associated documentation files (the "Software"), | |
| 10 # to deal in the Software without restriction, including without limitation | |
| 11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
| 12 # and/or sell copies of the Software, and to permit persons to whom the | |
| 13 # Software is furnished to do so, subject to the following conditions: | |
| 14 # | |
| 15 # The above copyright notice and this permission notice shall be included | |
| 16 # in all copies or substantial portions of the Software. | |
| 17 # | |
| 18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 24 # THE SOFTWARE. | |
| 25 # | |
| 26 | |
| 27 """Read and write a rectangular array of sequence data. | |
| 28 | |
| 29 One sequence per line and nothing else. Each line must contain the same number | |
| 30 of characters. Blank lines and white space are ignored. | |
| 31 | |
| 32 --- Example Array --- | |
| 33 | |
| 34 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
| 35 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
| 36 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
| 37 -----------------------------------YTSDN---------YSGSGDYDSNK | |
| 38 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
| 39 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
| 40 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
| 41 -EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD | |
| 42 """ | |
| 43 | |
| 44 from corebio.seq import * | |
| 45 from corebio.utils import * | |
| 46 | |
| 47 example = """ | |
| 48 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
| 49 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
| 50 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
| 51 -----------------------------------YTSDN---------YSGSGDYDSNK | |
| 52 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
| 53 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
| 54 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
| 55 -EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD | |
| 56 """ | |
| 57 | |
| 58 names = ("array",'flatfile') | |
| 59 extensions = () | |
| 60 | |
| 61 def read(fin, alphabet=None): | |
| 62 """Read a file of raw sequecne alignment data. | |
| 63 | |
| 64 Args: | |
| 65 fin -- A stream or file to read | |
| 66 alphabet -- The expected alphabet of the data, if given | |
| 67 Returns: | |
| 68 SeqList -- A list of sequences | |
| 69 Raises: | |
| 70 ValueError -- If the file is unparsable | |
| 71 """ | |
| 72 seqs = [ s for s in iterseq(fin, alphabet)] | |
| 73 return SeqList(seqs) | |
| 74 | |
| 75 | |
| 76 def iterseq(fin, alphabet=None) : | |
| 77 """ Read one line of sequence data and yeild the sequence. | |
| 78 | |
| 79 Args: | |
| 80 fin -- A stream or file to read | |
| 81 alphabet -- The expected alphabet of the data, if given | |
| 82 Yeilds: | |
| 83 Seq -- One alphabetic sequence at a time. | |
| 84 Raises: | |
| 85 ValueError -- If the file is unparsable | |
| 86 """ | |
| 87 | |
| 88 alphabet = Alphabet(alphabet) | |
| 89 line_length = 0 | |
| 90 | |
| 91 for linenum, line in enumerate(fin) : | |
| 92 if line.isspace(): continue # Blank line | |
| 93 line = line.strip() | |
| 94 | |
| 95 if line[0] == '>' : # probable a fasta file. Fail. | |
| 96 raise ValueError( | |
| 97 "Parse Error on input line: %d " % (linenum) ) | |
| 98 | |
| 99 line = remove_whitespace(line) | |
| 100 | |
| 101 if not alphabet.alphabetic(line) : | |
| 102 raise ValueError( | |
| 103 "Character on line: %d not in alphabet: %s : %s" % \ | |
| 104 (linenum, alphabet, line) ) | |
| 105 | |
| 106 if line_length and line_length != len(line) : | |
| 107 raise ValueError("Line %d has a incommensurate length." % linenum) | |
| 108 line_length = len(line) | |
| 109 | |
| 110 yield Seq(line, alphabet) | |
| 111 | |
| 112 | |
| 113 def write(afile, seqs): | |
| 114 """Write raw sequence data, one line per sequence. | |
| 115 | |
| 116 arguments: | |
| 117 afile -- A writable stream. | |
| 118 seqs -- A list of Seq's | |
| 119 """ | |
| 120 for s in seqs : | |
| 121 writeseq(afile, s) | |
| 122 | |
| 123 | |
| 124 def writeseq(afile, seq): | |
| 125 """ Write a single sequence in raw format. | |
| 126 | |
| 127 arguments: | |
| 128 afile -- A writable stream. | |
| 129 seq -- A Seq instance | |
| 130 """ | |
| 131 print >>afile, seq | |
| 132 | |
| 133 | 
