diff Population.py @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents 2c498d40ecde
children 8997f2ca8c7a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Population.py	Wed Sep 12 17:10:26 2012 -0400
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+from OrderedDict import OrderedDict
+
+class Individual(object):
+    __slots__ = ['_column', '_name', '_alias']
+
+    def __init__(self, column, name, alias=None):
+        self._column = column
+        self._name = name
+        self._alias = alias
+
+    @property
+    def column(self):
+        return self._column
+
+    @property
+    def name(self):
+        return self._name if self._alias is None else self._alias
+
+    @property
+    def alias(self):
+        return self._alias
+
+    @alias.setter
+    def alias(self, alias):
+        self._alias = alias
+
+    @property
+    def real_name(self):
+        return self._name
+
+    def __eq__(self, other):
+        return self._column == other._column and self._name == other._name
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias)
+
+
+class Population(object):
+    def __init__(self, name=None):
+        self._columns = OrderedDict()
+        self._name = name
+
+    @property
+    def name(self):
+        return self._name
+
+    @name.setter
+    def name(self, name):
+        self._name = name
+
+    def add_individual(self, individual, alias=None):
+        if individual.column not in self._columns:
+            self._columns[individual.column] = individual
+        elif self._columns[individual.column] == individual:
+            # should should this be an error?
+            # should we replace the alias using this entry?
+            pass
+        else:
+            raise 'Duplicate column: {0}'.format(individual)
+
+    def is_superset(self, other):
+        for column, other_individual in other._columns.items():
+            our_individual = self._columns.get(column)
+            if our_individual is None or our_individual != other_individual:
+                return False
+        return True
+
+    def is_disjoint(self, other):
+        for column, our_individual in self._columns.items():
+            other_individual = other._columns.get(column)
+            if other_individual is not None and other_individual == our_individual:
+                return False
+        return True
+
+    def column_list(self):
+        return self._columns.keys()
+
+    def individual_with_column(self, column):
+        if column in self._columns:
+            return self._columns[column]
+        return None
+
+    def tag_list(self, delimiter=':'):
+        entries = []
+        for column, individual in self._columns.items():
+            entry = '{0}{1}{2}'.format(column, delimiter, individual.name)
+            entries.append(entry)
+        return entries
+
+    def to_string(self, delimiter=':', separator=' ', replace_names_with=None):
+        entries = []
+        for column, individual in self._columns.items():
+            value = individual.name
+            if replace_names_with is not None:
+                value = replace_names_with
+            entry = '{0}{1}{2}'.format(column, delimiter, value)
+            entries.append(entry)
+        return separator.join(entries)
+
+    def __str__(self):
+        return self.to_string()
+
+    def from_population_file(self, filename):
+        with open(filename) as fh:
+            for line in fh:
+                line = line.rstrip('\r\n')
+                column, name, alias = line.split('\t')
+                alias = alias.strip()
+                individual = Individual(column, name)
+                if alias:
+                    individual.alias = alias
+                self.add_individual(individual)
+
+    def from_tag_list(self, tag_list):
+        for tag in tag_list:
+            column, name = tag.split(':')
+            individual = Individual(column, name)
+            self.add_individual(individual)
+
+    def individual_names(self):
+        for column, individual in self._columns.items():
+            yield individual.name
+