annotate corebio/seq_io/nexus_io.py @ 15:981eb8c3a756 default tip

Uploaded
author davidmurphy
date Sat, 31 Mar 2012 16:07:07 -0400
parents c55bdc2fb9fa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
1 #!/usr/bin/env python
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
2
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
3 # Copyright 2005 Gavin E. Crooks <gec@threeplusone.com>
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
4 # Copyright 2005-2006 The Regents of the University of California.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
5 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
6 # This software is distributed under the MIT Open Source License.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
7 # <http://www.opensource.org/licenses/mit-license.html>
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
8 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
9 # Permission is hereby granted, free of charge, to any person obtaining a
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
10 # copy of this software and associated documentation files (the "Software"),
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
11 # to deal in the Software without restriction, including without limitation
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
12 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
13 # and/or sell copies of the Software, and to permit persons to whom the
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
14 # Software is furnished to do so, subject to the following conditions:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
15 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
16 # The above copyright notice and this permission notice shall be included
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
17 # in all copies or substantial portions of the Software.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
18 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
25 # THE SOFTWARE.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
26 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
27
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
28 """Read the sequence data from a nexus file.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
29
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
30 This IO code only gives read access to the sequence data.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
31
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
32 Reference:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
33 'NEXUS: An extensible file format for systematic information'
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
34 Maddison, Swofford, Maddison. 1997. Syst. Biol. 46(4):590-621
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
35 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
36
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
37 from corebio.seq import Seq, SeqList, Alphabet
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
38 from corebio.seq_io._nexus import Nexus, safename
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
39
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
40
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
41
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
42
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
43
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
44 names = ( 'nexus', 'paup')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
45 extensions = ('nex', 'nexus', 'paup', 'nxs')
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
46
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
47 def iterseq(fin, alphabet=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
48 """Iterate over the sequences in the file."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
49 # Default implementation
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
50 return iter(read(fin, alphabet) )
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
51
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
52
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
53 def read(fin, alphabet=None):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
54 """ Extract sequence data from a nexus file."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
55 n = Nexus(fin)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
56
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
57 seqs = []
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
58 for taxon in n.taxlabels:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
59 name = safename(taxon)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
60 r = n.matrix[taxon]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
61 if alphabet is None :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
62 s = Seq(r, name = name, alphabet=r.alphabet)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
63 else :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
64 s = Seq(r, name = name, alphabet=alphabet )
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
65 seqs.append(s)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
66
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
67 if len(seqs) == 0 :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
68 # Something went terrible wrong.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
69 raise ValueError("Cannot parse file")
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
70
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
71 return SeqList(seqs)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
72
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
73
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
74
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
75
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
76