0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # Copyright 2005 Gavin E. Crooks <gec@threeplusone.com>
|
|
4 # Copyright 2005-2006 The Regents of the University of California.
|
|
5 #
|
|
6 # This software is distributed under the MIT Open Source License.
|
|
7 # <http://www.opensource.org/licenses/mit-license.html>
|
|
8 #
|
|
9 # Permission is hereby granted, free of charge, to any person obtaining a
|
|
10 # copy of this software and associated documentation files (the "Software"),
|
|
11 # to deal in the Software without restriction, including without limitation
|
|
12 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
13 # and/or sell copies of the Software, and to permit persons to whom the
|
|
14 # Software is furnished to do so, subject to the following conditions:
|
|
15 #
|
|
16 # The above copyright notice and this permission notice shall be included
|
|
17 # in all copies or substantial portions of the Software.
|
|
18 #
|
|
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
25 # THE SOFTWARE.
|
|
26 #
|
|
27
|
|
28 """Read the sequence data from a nexus file.
|
|
29
|
|
30 This IO code only gives read access to the sequence data.
|
|
31
|
|
32 Reference:
|
|
33 'NEXUS: An extensible file format for systematic information'
|
|
34 Maddison, Swofford, Maddison. 1997. Syst. Biol. 46(4):590-621
|
|
35 """
|
|
36
|
|
37 from corebio.seq import Seq, SeqList, Alphabet
|
|
38 from corebio.seq_io._nexus import Nexus, safename
|
|
39
|
|
40
|
|
41
|
|
42
|
|
43
|
|
44 names = ( 'nexus', 'paup')
|
|
45 extensions = ('nex', 'nexus', 'paup', 'nxs')
|
|
46
|
|
47 def iterseq(fin, alphabet=None):
|
|
48 """Iterate over the sequences in the file."""
|
|
49 # Default implementation
|
|
50 return iter(read(fin, alphabet) )
|
|
51
|
|
52
|
|
53 def read(fin, alphabet=None):
|
|
54 """ Extract sequence data from a nexus file."""
|
|
55 n = Nexus(fin)
|
|
56
|
|
57 seqs = []
|
|
58 for taxon in n.taxlabels:
|
|
59 name = safename(taxon)
|
|
60 r = n.matrix[taxon]
|
|
61 if alphabet is None :
|
|
62 s = Seq(r, name = name, alphabet=r.alphabet)
|
|
63 else :
|
|
64 s = Seq(r, name = name, alphabet=alphabet )
|
|
65 seqs.append(s)
|
|
66
|
|
67 if len(seqs) == 0 :
|
|
68 # Something went terrible wrong.
|
|
69 raise ValueError("Cannot parse file")
|
|
70
|
|
71 return SeqList(seqs)
|
|
72
|
|
73
|
|
74
|
|
75
|
|
76
|