Mercurial > repos > davidmurphy > codonlogo
diff corebio/seq_io/array_io.py @ 0:c55bdc2fb9fa
Uploaded
author | davidmurphy |
---|---|
date | Thu, 27 Oct 2011 12:09:09 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/corebio/seq_io/array_io.py Thu Oct 27 12:09:09 2011 -0400 @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +# Copyright (c) 2005 Gavin E. Crooks <gec@threeplusone.com> +# +# This software is distributed under the MIT Open Source License. +# <http://www.opensource.org/licenses/mit-license.html> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +"""Read and write a rectangular array of sequence data. + +One sequence per line and nothing else. Each line must contain the same number +of characters. Blank lines and white space are ignored. + +--- Example Array --- + +--------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF +--------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- +--------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY +-----------------------------------YTSDN---------YSGSGDYDSNK +-SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD +--LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD +-SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD +-EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD +""" + +from corebio.seq import * +from corebio.utils import * + +example = """ +--------------------------LENSTSPYDYGENESD-------FSDSPPCPQDF +--------------------------LENLEDLF-WELDRLD------NYNDTSLVENH- +--------------------------MSNITDPQMWDFDDLN-------FTGMPPADEDY +-----------------------------------YTSDN---------YSGSGDYDSNK +-SL-------NFDRTFLPALYSLLFLLGLLGNGAVAAVLLSQRTALSSTDTFLLHLAVAD +--LC-PATMASFKAVFVPVAYSLIFLLGVIGNVLVLVILERHRQTRSSTETFLFHLAVAD +-SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD +-EPC-RDENVHFNRIFLPTIYFIIFLTGIVGNGLVILVMGYQKKLRSMTDKYRLHLSVAD +""" + +names = ("array",'flatfile') +extensions = () + +def read(fin, alphabet=None): + """Read a file of raw sequecne alignment data. + + Args: + fin -- A stream or file to read + alphabet -- The expected alphabet of the data, if given + Returns: + SeqList -- A list of sequences + Raises: + ValueError -- If the file is unparsable + """ + seqs = [ s for s in iterseq(fin, alphabet)] + return SeqList(seqs) + + +def iterseq(fin, alphabet=None) : + """ Read one line of sequence data and yeild the sequence. + + Args: + fin -- A stream or file to read + alphabet -- The expected alphabet of the data, if given + Yeilds: + Seq -- One alphabetic sequence at a time. + Raises: + ValueError -- If the file is unparsable + """ + + alphabet = Alphabet(alphabet) + line_length = 0 + + for linenum, line in enumerate(fin) : + if line.isspace(): continue # Blank line + line = line.strip() + + if line[0] == '>' : # probable a fasta file. Fail. + raise ValueError( + "Parse Error on input line: %d " % (linenum) ) + + line = remove_whitespace(line) + + if not alphabet.alphabetic(line) : + raise ValueError( + "Character on line: %d not in alphabet: %s : %s" % \ + (linenum, alphabet, line) ) + + if line_length and line_length != len(line) : + raise ValueError("Line %d has a incommensurate length." % linenum) + line_length = len(line) + + yield Seq(line, alphabet) + + +def write(afile, seqs): + """Write raw sequence data, one line per sequence. + + arguments: + afile -- A writable stream. + seqs -- A list of Seq's + """ + for s in seqs : + writeseq(afile, s) + + +def writeseq(afile, seq): + """ Write a single sequence in raw format. + + arguments: + afile -- A writable stream. + seq -- A Seq instance + """ + print >>afile, seq + + \ No newline at end of file