Mercurial > repos > davidmurphy > codonlogo
comparison corebio/seq_io/plain_io.py @ 7:8d676bbd1f2d
Uploaded
| author | davidmurphy |
|---|---|
| date | Mon, 16 Jan 2012 07:03:36 -0500 |
| parents | c55bdc2fb9fa |
| children |
comparison
equal
deleted
inserted
replaced
| 6:4a4aca3d57c9 | 7:8d676bbd1f2d |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 # Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> | |
| 4 # | |
| 5 # This software is distributed under the MIT Open Source License. | |
| 6 # <http://www.opensource.org/licenses/mit-license.html> | |
| 7 # | |
| 8 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 9 # copy of this software and associated documentation files (the "Software"), | |
| 10 # to deal in the Software without restriction, including without limitation | |
| 11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
| 12 # and/or sell copies of the Software, and to permit persons to whom the | |
| 13 # Software is furnished to do so, subject to the following conditions: | |
| 14 # | |
| 15 # The above copyright notice and this permission notice shall be included | |
| 16 # in all copies or substantial portions of the Software. | |
| 17 # | |
| 18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
| 24 # THE SOFTWARE. | |
| 25 # | |
| 26 | |
| 27 """Read and write raw, unformatted sequence data. The whole file is read | |
| 28 in as a sequence. Whitespace is removed. | |
| 29 | |
| 30 | |
| 31 --- Example Plain/Raw/Text File --- | |
| 32 | |
| 33 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
| 34 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
| 35 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
| 36 -----------------------------------YTSDN---------YSGSGDYDSNK | |
| 37 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
| 38 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
| 39 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
| 40 -EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD | |
| 41 """ | |
| 42 | |
| 43 from corebio.seq import * | |
| 44 from corebio.utils import remove_whitespace | |
| 45 | |
| 46 example = """ | |
| 47 --------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF | |
| 48 --------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- | |
| 49 --------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY | |
| 50 -----------------------------------YTSDN---------YSGSGDYDSNK | |
| 51 -SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD | |
| 52 --LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD | |
| 53 -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD | |
| 54 -EPC-RDENVHFNRIFLPTIYFIIFLTGIV | |
| 55 """ | |
| 56 | |
| 57 names = ("plain","raw") | |
| 58 extensions = ('txt', ) | |
| 59 | |
| 60 def read(fin, alphabet=None): | |
| 61 """Read a file of raw sequecne data. | |
| 62 | |
| 63 Args: | |
| 64 fin -- A stream or file to read | |
| 65 alphabet -- The expected alphabet of the data, if given | |
| 66 Returns: | |
| 67 SeqList -- A list of sequences | |
| 68 Raises: | |
| 69 ValueError -- If the file is unparsable | |
| 70 """ | |
| 71 seqs = [ s for s in iterseq(fin, alphabet)] | |
| 72 return SeqList(seqs) | |
| 73 | |
| 74 | |
| 75 def iterseq(fin, alphabet=None) : | |
| 76 """ Read the sequence data and yeild one (and only one) sequence. | |
| 77 | |
| 78 Args: | |
| 79 fin -- A stream or file to read | |
| 80 alphabet -- The expected alphabet of the data, if given | |
| 81 Yeilds: | |
| 82 Seq -- One alphabetic sequence at a time. | |
| 83 Raises: | |
| 84 ValueError -- If the file is unparsable | |
| 85 """ | |
| 86 | |
| 87 alphabet = Alphabet(alphabet) | |
| 88 lines = [] | |
| 89 for linenum, line in enumerate(fin) : | |
| 90 if line.isspace(): continue # Blank line | |
| 91 line = line.strip() | |
| 92 | |
| 93 | |
| 94 if line[0] == '>' : # probable a fasta file. Fail. | |
| 95 raise ValueError( | |
| 96 "Parse Error on input line: %d " % (linenum) ) | |
| 97 line = remove_whitespace(line) | |
| 98 | |
| 99 if not alphabet.alphabetic(line) : | |
| 100 raise ValueError( | |
| 101 "Character on line: %d not in alphabet: %s : %s" % \ | |
| 102 (linenum, alphabet, line) ) | |
| 103 lines.append(line) | |
| 104 | |
| 105 yield Seq(''.join(lines), alphabet) | |
| 106 | |
| 107 | |
| 108 | |
| 109 def write(afile, seqs): | |
| 110 """Write raw sequence data, one line per sequence. | |
| 111 | |
| 112 arguments: | |
| 113 afile -- A writable stream. | |
| 114 seqs -- A list of Seq's | |
| 115 """ | |
| 116 for s in seqs : | |
| 117 writeseq(afile, s) | |
| 118 | |
| 119 | |
| 120 def writeseq(afile, seq): | |
| 121 """ Write a single sequence in raw format. | |
| 122 | |
| 123 arguments: | |
| 124 afile -- A writable stream. | |
| 125 seq -- A Seq instance | |
| 126 """ | |
| 127 print >>afile, seq | |
| 128 | |
| 129 |
