# HG changeset patch
# User miller-lab
# Date 1348846556 14400
# Node ID 8ae67e9fb6ffce9dc7e5721e2d6b276d34b47296
# Parent  fdb4240fb5654872ff7d4364745a58e920371a44
Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]

diff -r fdb4240fb565 -r 8ae67e9fb6ff BeautifulSoup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BeautifulSoup.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,2014 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup parses a (possibly invalid) XML or HTML document into a
+tree representation. It provides methods and Pythonic idioms that make
+it easy to navigate, search, and modify the tree.
+
+A well-formed XML/HTML document yields a well-formed data
+structure. An ill-formed XML/HTML document yields a correspondingly
+ill-formed data structure. If your document is only locally
+well-formed, you can use this library to find and process the
+well-formed part of it.
+
+Beautiful Soup works with Python 2.2 and up. It has no external
+dependencies, but you'll have more success at converting data to UTF-8
+if you also install these three packages:
+
+* chardet, for auto-detecting character encodings
+  http://chardet.feedparser.org/
+* cjkcodecs and iconv_codec, which add more encodings to the ones supported
+  by stock Python.
+  http://cjkpython.i18n.org/
+
+Beautiful Soup defines classes for two main parsing strategies:
+
+ * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
+   language that kind of looks like XML.
+
+ * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
+   or invalid. This class has web browser-like heuristics for
+   obtaining a sensible parse tree in the face of common HTML errors.
+
+Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
+the encoding of an HTML or XML document, and converting it to
+Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/documentation.html
+
+Here, have some legalese:
+
+Copyright (c) 2004-2010, Leonard Richardson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following
+    disclaimer in the documentation and/or other materials provided
+    with the distribution.
+
+  * Neither the name of the the Beautiful Soup Consortium and All
+    Night Kosher Bakery nor the names of its contributors may be
+    used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
+
+"""
+from __future__ import generators
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "3.2.0"
+__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson"
+__license__ = "New-style BSD"
+
+from sgmllib import SGMLParser, SGMLParseError
+import codecs
+import markupbase
+import types
+import re
+import sgmllib
+try:
+  from htmlentitydefs import name2codepoint
+except ImportError:
+  name2codepoint = {}
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+#These hacks make Beautiful Soup able to parse XML with namespaces
+sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
+markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
+
+DEFAULT_OUTPUT_ENCODING = "utf-8"
+
+def _match_css_class(str):
+    """Build a RE to match the given CSS class."""
+    return re.compile(r"(^|.*\s)%s($|\s)" % str)
+
+# First, the classes that represent markup elements.
+
+class PageElement(object):
+    """Contains the navigational information for some part of the page
+    (either a tag or a piece of text)"""
+
+    def setup(self, parent=None, previous=None):
+        """Sets up the initial relations between this element and
+        other elements."""
+        self.parent = parent
+        self.previous = previous
+        self.next = None
+        self.previousSibling = None
+        self.nextSibling = None
+        if self.parent and self.parent.contents:
+            self.previousSibling = self.parent.contents[-1]
+            self.previousSibling.nextSibling = self
+
+    def replaceWith(self, replaceWith):
+        oldParent = self.parent
+        myIndex = self.parent.index(self)
+        if hasattr(replaceWith, "parent")\
+                  and replaceWith.parent is self.parent:
+            # We're replacing this element with one of its siblings.
+            index = replaceWith.parent.index(replaceWith)
+            if index and index < myIndex:
+                # Furthermore, it comes before this element. That
+                # means that when we extract it, the index of this
+                # element will change.
+                myIndex = myIndex - 1
+        self.extract()
+        oldParent.insert(myIndex, replaceWith)
+
+    def replaceWithChildren(self):
+        myParent = self.parent
+        myIndex = self.parent.index(self)
+        self.extract()
+        reversedChildren = list(self.contents)
+        reversedChildren.reverse()
+        for child in reversedChildren:
+            myParent.insert(myIndex, child)
+
+    def extract(self):
+        """Destructively rips this element out of the tree."""
+        if self.parent:
+            try:
+                del self.parent.contents[self.parent.index(self)]
+            except ValueError:
+                pass
+
+        #Find the two elements that would be next to each other if
+        #this element (and any children) hadn't been parsed. Connect
+        #the two.
+        lastChild = self._lastRecursiveChild()
+        nextElement = lastChild.next
+
+        if self.previous:
+            self.previous.next = nextElement
+        if nextElement:
+            nextElement.previous = self.previous
+        self.previous = None
+        lastChild.next = None
+
+        self.parent = None
+        if self.previousSibling:
+            self.previousSibling.nextSibling = self.nextSibling
+        if self.nextSibling:
+            self.nextSibling.previousSibling = self.previousSibling
+        self.previousSibling = self.nextSibling = None
+        return self
+
+    def _lastRecursiveChild(self):
+        "Finds the last element beneath this object to be parsed."
+        lastChild = self
+        while hasattr(lastChild, 'contents') and lastChild.contents:
+            lastChild = lastChild.contents[-1]
+        return lastChild
+
+    def insert(self, position, newChild):
+        if isinstance(newChild, basestring) \
+            and not isinstance(newChild, NavigableString):
+            newChild = NavigableString(newChild)
+
+        position =  min(position, len(self.contents))
+        if hasattr(newChild, 'parent') and newChild.parent is not None:
+            # We're 'inserting' an element that's already one
+            # of this object's children.
+            if newChild.parent is self:
+                index = self.index(newChild)
+                if index > position:
+                    # Furthermore we're moving it further down the
+                    # list of this object's children. That means that
+                    # when we extract this element, our target index
+                    # will jump down one.
+                    position = position - 1
+            newChild.extract()
+
+        newChild.parent = self
+        previousChild = None
+        if position == 0:
+            newChild.previousSibling = None
+            newChild.previous = self
+        else:
+            previousChild = self.contents[position-1]
+            newChild.previousSibling = previousChild
+            newChild.previousSibling.nextSibling = newChild
+            newChild.previous = previousChild._lastRecursiveChild()
+        if newChild.previous:
+            newChild.previous.next = newChild
+
+        newChildsLastElement = newChild._lastRecursiveChild()
+
+        if position >= len(self.contents):
+            newChild.nextSibling = None
+
+            parent = self
+            parentsNextSibling = None
+            while not parentsNextSibling:
+                parentsNextSibling = parent.nextSibling
+                parent = parent.parent
+                if not parent: # This is the last element in the document.
+                    break
+            if parentsNextSibling:
+                newChildsLastElement.next = parentsNextSibling
+            else:
+                newChildsLastElement.next = None
+        else:
+            nextChild = self.contents[position]
+            newChild.nextSibling = nextChild
+            if newChild.nextSibling:
+                newChild.nextSibling.previousSibling = newChild
+            newChildsLastElement.next = nextChild
+
+        if newChildsLastElement.next:
+            newChildsLastElement.next.previous = newChildsLastElement
+        self.contents.insert(position, newChild)
+
+    def append(self, tag):
+        """Appends the given tag to the contents of this tag."""
+        self.insert(len(self.contents), tag)
+
+    def findNext(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears after this Tag in the document."""
+        return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
+
+    def findAllNext(self, name=None, attrs={}, text=None, limit=None,
+                    **kwargs):
+        """Returns all items that match the given criteria and appear
+        after this Tag in the document."""
+        return self._findAll(name, attrs, text, limit, self.nextGenerator,
+                             **kwargs)
+
+    def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears after this Tag in the document."""
+        return self._findOne(self.findNextSiblings, name, attrs, text,
+                             **kwargs)
+
+    def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,
+                         **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear after this Tag in the document."""
+        return self._findAll(name, attrs, text, limit,
+                             self.nextSiblingGenerator, **kwargs)
+    fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
+
+    def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears before this Tag in the document."""
+        return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)
+
+    def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,
+                        **kwargs):
+        """Returns all items that match the given criteria and appear
+        before this Tag in the document."""
+        return self._findAll(name, attrs, text, limit, self.previousGenerator,
+                           **kwargs)
+    fetchPrevious = findAllPrevious # Compatibility with pre-3.x
+
+    def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears before this Tag in the document."""
+        return self._findOne(self.findPreviousSiblings, name, attrs, text,
+                             **kwargs)
+
+    def findPreviousSiblings(self, name=None, attrs={}, text=None,
+                             limit=None, **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear before this Tag in the document."""
+        return self._findAll(name, attrs, text, limit,
+                             self.previousSiblingGenerator, **kwargs)
+    fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
+
+    def findParent(self, name=None, attrs={}, **kwargs):
+        """Returns the closest parent of this Tag that matches the given
+        criteria."""
+        # NOTE: We can't use _findOne because findParents takes a different
+        # set of arguments.
+        r = None
+        l = self.findParents(name, attrs, 1)
+        if l:
+            r = l[0]
+        return r
+
+    def findParents(self, name=None, attrs={}, limit=None, **kwargs):
+        """Returns the parents of this Tag that match the given
+        criteria."""
+
+        return self._findAll(name, attrs, None, limit, self.parentGenerator,
+                             **kwargs)
+    fetchParents = findParents # Compatibility with pre-3.x
+
+    #These methods do the real heavy lifting.
+
+    def _findOne(self, method, name, attrs, text, **kwargs):
+        r = None
+        l = method(name, attrs, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+
+    def _findAll(self, name, attrs, text, limit, generator, **kwargs):
+        "Iterates over a generator looking for things that match."
+
+        if isinstance(name, SoupStrainer):
+            strainer = name
+        # (Possibly) special case some findAll*(...) searches
+        elif text is None and not limit and not attrs and not kwargs:
+            # findAll*(True)
+            if name is True:
+                return [element for element in generator()
+                        if isinstance(element, Tag)]
+            # findAll*('tag-name')
+            elif isinstance(name, basestring):
+                return [element for element in generator()
+                        if isinstance(element, Tag) and
+                        element.name == name]
+            else:
+                strainer = SoupStrainer(name, attrs, text, **kwargs)
+        # Build a SoupStrainer
+        else:
+            strainer = SoupStrainer(name, attrs, text, **kwargs)
+        results = ResultSet(strainer)
+        g = generator()
+        while True:
+            try:
+                i = g.next()
+            except StopIteration:
+                break
+            if i:
+                found = strainer.search(i)
+                if found:
+                    results.append(found)
+                    if limit and len(results) >= limit:
+                        break
+        return results
+
+    #These Generators can be used to navigate starting from both
+    #NavigableStrings and Tags.
+    def nextGenerator(self):
+        i = self
+        while i is not None:
+            i = i.next
+            yield i
+
+    def nextSiblingGenerator(self):
+        i = self
+        while i is not None:
+            i = i.nextSibling
+            yield i
+
+    def previousGenerator(self):
+        i = self
+        while i is not None:
+            i = i.previous
+            yield i
+
+    def previousSiblingGenerator(self):
+        i = self
+        while i is not None:
+            i = i.previousSibling
+            yield i
+
+    def parentGenerator(self):
+        i = self
+        while i is not None:
+            i = i.parent
+            yield i
+
+    # Utility methods
+    def substituteEncoding(self, str, encoding=None):
+        encoding = encoding or "utf-8"
+        return str.replace("%SOUP-ENCODING%", encoding)
+
+    def toEncoding(self, s, encoding=None):
+        """Encodes an object to a string in some encoding, or to Unicode.
+        ."""
+        if isinstance(s, unicode):
+            if encoding:
+                s = s.encode(encoding)
+        elif isinstance(s, str):
+            if encoding:
+                s = s.encode(encoding)
+            else:
+                s = unicode(s)
+        else:
+            if encoding:
+                s  = self.toEncoding(str(s), encoding)
+            else:
+                s = unicode(s)
+        return s
+
+class NavigableString(unicode, PageElement):
+
+    def __new__(cls, value):
+        """Create a new NavigableString.
+
+        When unpickling a NavigableString, this method is called with
+        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
+        passed in to the superclass's __new__ or the superclass won't know
+        how to handle non-ASCII characters.
+        """
+        if isinstance(value, unicode):
+            return unicode.__new__(cls, value)
+        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+
+    def __getnewargs__(self):
+        return (NavigableString.__str__(self),)
+
+    def __getattr__(self, attr):
+        """text.string gives you text. This is for backwards
+        compatibility for Navigable*String, but for CData* it lets you
+        get the string without the CData wrapper."""
+        if attr == 'string':
+            return self
+        else:
+            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+
+    def __unicode__(self):
+        return str(self).decode(DEFAULT_OUTPUT_ENCODING)
+
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        if encoding:
+            return self.encode(encoding)
+        else:
+            return self
+
+class CData(NavigableString):
+
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding)
+
+class ProcessingInstruction(NavigableString):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        output = self
+        if "%SOUP-ENCODING%" in output:
+            output = self.substituteEncoding(output, encoding)
+        return "<?%s?>" % self.toEncoding(output, encoding)
+
+class Comment(NavigableString):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return "<!--%s-->" % NavigableString.__str__(self, encoding)
+
+class Declaration(NavigableString):
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return "<!%s>" % NavigableString.__str__(self, encoding)
+
+class Tag(PageElement):
+
+    """Represents a found HTML tag with its attributes and contents."""
+
+    def _invert(h):
+        "Cheap function to invert a hash."
+        i = {}
+        for k,v in h.items():
+            i[v] = k
+        return i
+
+    XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
+                                      "quot" : '"',
+                                      "amp" : "&",
+                                      "lt" : "<",
+                                      "gt" : ">" }
+
+    XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
+
+    def _convertEntities(self, match):
+        """Used in a call to re.sub to replace HTML, XML, and numeric
+        entities with the appropriate Unicode characters. If HTML
+        entities are being converted, any unrecognized entities are
+        escaped."""
+        x = match.group(1)
+        if self.convertHTMLEntities and x in name2codepoint:
+            return unichr(name2codepoint[x])
+        elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
+            if self.convertXMLEntities:
+                return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
+            else:
+                return u'&%s;' % x
+        elif len(x) > 0 and x[0] == '#':
+            # Handle numeric entities
+            if len(x) > 1 and x[1] == 'x':
+                return unichr(int(x[2:], 16))
+            else:
+                return unichr(int(x[1:]))
+
+        elif self.escapeUnrecognizedEntities:
+            return u'&amp;%s;' % x
+        else:
+            return u'&%s;' % x
+
+    def __init__(self, parser, name, attrs=None, parent=None,
+                 previous=None):
+        "Basic constructor."
+
+        # We don't actually store the parser object: that lets extracted
+        # chunks be garbage-collected
+        self.parserClass = parser.__class__
+        self.isSelfClosing = parser.isSelfClosingTag(name)
+        self.name = name
+        if attrs is None:
+            attrs = []
+        elif isinstance(attrs, dict):
+            attrs = attrs.items()
+        self.attrs = attrs
+        self.contents = []
+        self.setup(parent, previous)
+        self.hidden = False
+        self.containsSubstitutions = False
+        self.convertHTMLEntities = parser.convertHTMLEntities
+        self.convertXMLEntities = parser.convertXMLEntities
+        self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities
+
+        # Convert any HTML, XML, or numeric entities in the attribute values.
+        convert = lambda(k, val): (k,
+                                   re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
+                                          self._convertEntities,
+                                          val))
+        self.attrs = map(convert, self.attrs)
+
+    def getString(self):
+        if (len(self.contents) == 1
+            and isinstance(self.contents[0], NavigableString)):
+            return self.contents[0]
+
+    def setString(self, string):
+        """Replace the contents of the tag with a string"""
+        self.clear()
+        self.append(string)
+
+    string = property(getString, setString)
+
+    def getText(self, separator=u""):
+        if not len(self.contents):
+            return u""
+        stopNode = self._lastRecursiveChild().next
+        strings = []
+        current = self.contents[0]
+        while current is not stopNode:
+            if isinstance(current, NavigableString):
+                strings.append(current.strip())
+            current = current.next
+        return separator.join(strings)
+
+    text = property(getText)
+
+    def get(self, key, default=None):
+        """Returns the value of the 'key' attribute for the tag, or
+        the value given for 'default' if it doesn't have that
+        attribute."""
+        return self._getAttrMap().get(key, default)
+
+    def clear(self):
+        """Extract all children."""
+        for child in self.contents[:]:
+            child.extract()
+
+    def index(self, element):
+        for i, child in enumerate(self.contents):
+            if child is element:
+                return i
+        raise ValueError("Tag.index: element not in tag")
+
+    def has_key(self, key):
+        return self._getAttrMap().has_key(key)
+
+    def __getitem__(self, key):
+        """tag[key] returns the value of the 'key' attribute for the tag,
+        and throws an exception if it's not there."""
+        return self._getAttrMap()[key]
+
+    def __iter__(self):
+        "Iterating over a tag iterates over its contents."
+        return iter(self.contents)
+
+    def __len__(self):
+        "The length of a tag is the length of its list of contents."
+        return len(self.contents)
+
+    def __contains__(self, x):
+        return x in self.contents
+
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
+    def __setitem__(self, key, value):
+        """Setting tag[key] sets the value of the 'key' attribute for the
+        tag."""
+        self._getAttrMap()
+        self.attrMap[key] = value
+        found = False
+        for i in range(0, len(self.attrs)):
+            if self.attrs[i][0] == key:
+                self.attrs[i] = (key, value)
+                found = True
+        if not found:
+            self.attrs.append((key, value))
+        self._getAttrMap()[key] = value
+
+    def __delitem__(self, key):
+        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        for item in self.attrs:
+            if item[0] == key:
+                self.attrs.remove(item)
+                #We don't break because bad HTML can define the same
+                #attribute multiple times.
+            self._getAttrMap()
+            if self.attrMap.has_key(key):
+                del self.attrMap[key]
+
+    def __call__(self, *args, **kwargs):
+        """Calling a tag like a function is the same as calling its
+        findAll() method. Eg. tag('a') returns a list of all the A tags
+        found within this tag."""
+        return apply(self.findAll, args, kwargs)
+
+    def __getattr__(self, tag):
+        #print "Getattr %s.%s" % (self.__class__, tag)
+        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
+            return self.find(tag[:-3])
+        elif tag.find('__') != 0:
+            return self.find(tag)
+        raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+
+    def __eq__(self, other):
+        """Returns true iff this tag has the same name, the same attributes,
+        and the same contents (recursively) as the given tag.
+
+        NOTE: right now this will return false if two tags have the
+        same attributes in a different order. Should this be fixed?"""
+        if other is self:
+            return True
+        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
+            return False
+        for i in range(0, len(self.contents)):
+            if self.contents[i] != other.contents[i]:
+                return False
+        return True
+
+    def __ne__(self, other):
+        """Returns true iff this tag is not identical to the other tag,
+        as defined in __eq__."""
+        return not self == other
+
+    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        """Renders this tag as a string."""
+        return self.__str__(encoding)
+
+    def __unicode__(self):
+        return self.__str__(None)
+
+    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+                                           + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           + ")")
+
+    def _sub_entity(self, x):
+        """Used with a regular expression to substitute the
+        appropriate XML entity for an XML special character."""
+        return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
+
+    def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                prettyPrint=False, indentLevel=0):
+        """Returns a string or Unicode representation of this tag and
+        its contents. To get Unicode, pass None for encoding.
+
+        NOTE: since Python's HTML parser consumes whitespace, this
+        method is not certain to reproduce the whitespace present in
+        the original string."""
+
+        encodedName = self.toEncoding(self.name, encoding)
+
+        attrs = []
+        if self.attrs:
+            for key, val in self.attrs:
+                fmt = '%s="%s"'
+                if isinstance(val, basestring):
+                    if self.containsSubstitutions and '%SOUP-ENCODING%' in val:
+                        val = self.substituteEncoding(val, encoding)
+
+                    # The attribute value either:
+                    #
+                    # * Contains no embedded double quotes or single quotes.
+                    #   No problem: we enclose it in double quotes.
+                    # * Contains embedded single quotes. No problem:
+                    #   double quotes work here too.
+                    # * Contains embedded double quotes. No problem:
+                    #   we enclose it in single quotes.
+                    # * Embeds both single _and_ double quotes. This
+                    #   can't happen naturally, but it can happen if
+                    #   you modify an attribute value after parsing
+                    #   the document. Now we have a bit of a
+                    #   problem. We solve it by enclosing the
+                    #   attribute in single quotes, and escaping any
+                    #   embedded single quotes to XML entities.
+                    if '"' in val:
+                        fmt = "%s='%s'"
+                        if "'" in val:
+                            # TODO: replace with apos when
+                            # appropriate.
+                            val = val.replace("'", "&squot;")
+
+                    # Now we're okay w/r/t quotes. But the attribute
+                    # value might also contain angle brackets, or
+                    # ampersands that aren't part of entities. We need
+                    # to escape those to XML entities too.
+                    val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
+
+                attrs.append(fmt % (self.toEncoding(key, encoding),
+                                    self.toEncoding(val, encoding)))
+        close = ''
+        closeTag = ''
+        if self.isSelfClosing:
+            close = ' /'
+        else:
+            closeTag = '</%s>' % encodedName
+
+        indentTag, indentContents = 0, 0
+        if prettyPrint:
+            indentTag = indentLevel
+            space = (' ' * (indentTag-1))
+            indentContents = indentTag + 1
+        contents = self.renderContents(encoding, prettyPrint, indentContents)
+        if self.hidden:
+            s = contents
+        else:
+            s = []
+            attributeString = ''
+            if attrs:
+                attributeString = ' ' + ' '.join(attrs)
+            if prettyPrint:
+                s.append(space)
+            s.append('<%s%s%s>' % (encodedName, attributeString, close))
+            if prettyPrint:
+                s.append("\n")
+            s.append(contents)
+            if prettyPrint and contents and contents[-1] != "\n":
+                s.append("\n")
+            if prettyPrint and closeTag:
+                s.append(space)
+            s.append(closeTag)
+            if prettyPrint and closeTag and self.nextSibling:
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def decompose(self):
+        """Recursively destroys the contents of this tree."""
+        self.extract()
+        if len(self.contents) == 0:
+            return
+        current = self.contents[0]
+        while current is not None:
+            next = current.next
+            if isinstance(current, Tag):
+                del current.contents[:]
+            current.parent = None
+            current.previous = None
+            current.previousSibling = None
+            current.next = None
+            current.nextSibling = None
+            current = next
+
+    def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        return self.__str__(encoding, True)
+
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        """Renders the contents of this tag as a string in the given
+        encoding. If encoding is None, returns a Unicode string.."""
+        s=[]
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.__str__(encoding)
+            elif isinstance(c, Tag):
+                s.append(c.__str__(encoding, prettyPrint, indentLevel))
+            if text and prettyPrint:
+                text = text.strip()
+            if text:
+                if prettyPrint:
+                    s.append(" " * (indentLevel-1))
+                s.append(text)
+                if prettyPrint:
+                    s.append("\n")
+        return ''.join(s)
+
+    #Soup methods
+
+    def find(self, name=None, attrs={}, recursive=True, text=None,
+             **kwargs):
+        """Return only the first child of this Tag matching the given
+        criteria."""
+        r = None
+        l = self.findAll(name, attrs, recursive, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findChild = find
+
+    def findAll(self, name=None, attrs={}, recursive=True, text=None,
+                limit=None, **kwargs):
+        """Extracts a list of Tag objects that match the given
+        criteria.  You can specify the name of the Tag and any
+        attributes you want the Tag to have.
+
+        The value of a key-value pair in the 'attrs' map can be a
+        string, a list of strings, a regular expression object, or a
+        callable that takes a string and returns whether or not the
+        string matches for some custom definition of 'matches'. The
+        same is true of the tag name."""
+        generator = self.recursiveChildGenerator
+        if not recursive:
+            generator = self.childGenerator
+        return self._findAll(name, attrs, text, limit, generator, **kwargs)
+    findChildren = findAll
+
+    # Pre-3.x compatibility methods
+    first = find
+    fetch = findAll
+
+    def fetchText(self, text=None, recursive=True, limit=None):
+        return self.findAll(text=text, recursive=recursive, limit=limit)
+
+    def firstText(self, text=None, recursive=True):
+        return self.find(text=text, recursive=recursive)
+
+    #Private methods
+
+    def _getAttrMap(self):
+        """Initializes a map representation of this tag's attributes,
+        if not already initialized."""
+        if not getattr(self, 'attrMap'):
+            self.attrMap = {}
+            for (key, value) in self.attrs:
+                self.attrMap[key] = value
+        return self.attrMap
+
+    #Generator methods
+    def childGenerator(self):
+        # Just use the iterator from the contents
+        return iter(self.contents)
+
+    def recursiveChildGenerator(self):
+        if not len(self.contents):
+            raise StopIteration
+        stopNode = self._lastRecursiveChild().next
+        current = self.contents[0]
+        while current is not stopNode:
+            yield current
+            current = current.next
+
+
+# Next, a couple classes to represent queries and their results.
+class SoupStrainer:
+    """Encapsulates a number of ways of matching a markup element (tag or
+    text)."""
+
+    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        self.name = name
+        if isinstance(attrs, basestring):
+            kwargs['class'] = _match_css_class(attrs)
+            attrs = None
+        if kwargs:
+            if attrs:
+                attrs = attrs.copy()
+                attrs.update(kwargs)
+            else:
+                attrs = kwargs
+        self.attrs = attrs
+        self.text = text
+
+    def __str__(self):
+        if self.text:
+            return self.text
+        else:
+            return "%s|%s" % (self.name, self.attrs)
+
+    def searchTag(self, markupName=None, markupAttrs={}):
+        found = None
+        markup = None
+        if isinstance(markupName, Tag):
+            markup = markupName
+            markupAttrs = markup
+        callFunctionWithTagData = callable(self.name) \
+                                and not isinstance(markupName, Tag)
+
+        if (not self.name) \
+               or callFunctionWithTagData \
+               or (markup and self._matches(markup, self.name)) \
+               or (not markup and self._matches(markupName, self.name)):
+            if callFunctionWithTagData:
+                match = self.name(markupName, markupAttrs)
+            else:
+                match = True
+                markupAttrMap = None
+                for attr, matchAgainst in self.attrs.items():
+                    if not markupAttrMap:
+                         if hasattr(markupAttrs, 'get'):
+                            markupAttrMap = markupAttrs
+                         else:
+                            markupAttrMap = {}
+                            for k,v in markupAttrs:
+                                markupAttrMap[k] = v
+                    attrValue = markupAttrMap.get(attr)
+                    if not self._matches(attrValue, matchAgainst):
+                        match = False
+                        break
+            if match:
+                if markup:
+                    found = markup
+                else:
+                    found = markupName
+        return found
+
+    def search(self, markup):
+        #print 'looking for %s in %s' % (self, markup)
+        found = None
+        # If given a list of items, scan it for a text element that
+        # matches.
+        if hasattr(markup, "__iter__") \
+                and not isinstance(markup, Tag):
+            for element in markup:
+                if isinstance(element, NavigableString) \
+                       and self.search(element):
+                    found = element
+                    break
+        # If it's a Tag, make sure its name or attributes match.
+        # Don't bother with Tags if we're searching for text.
+        elif isinstance(markup, Tag):
+            if not self.text:
+                found = self.searchTag(markup)
+        # If it's text, make sure the text matches.
+        elif isinstance(markup, NavigableString) or \
+                 isinstance(markup, basestring):
+            if self._matches(markup, self.text):
+                found = markup
+        else:
+            raise Exception, "I don't know how to match against a %s" \
+                  % markup.__class__
+        return found
+
+    def _matches(self, markup, matchAgainst):
+        #print "Matching %s against %s" % (markup, matchAgainst)
+        result = False
+        if matchAgainst is True:
+            result = markup is not None
+        elif callable(matchAgainst):
+            result = matchAgainst(markup)
+        else:
+            #Custom match methods take the tag as an argument, but all
+            #other ways of matching match the tag name as a string.
+            if isinstance(markup, Tag):
+                markup = markup.name
+            if markup and not isinstance(markup, basestring):
+                markup = unicode(markup)
+            #Now we know that chunk is either a string, or None.
+            if hasattr(matchAgainst, 'match'):
+                # It's a regexp object.
+                result = markup and matchAgainst.search(markup)
+            elif hasattr(matchAgainst, '__iter__'): # list-like
+                result = markup in matchAgainst
+            elif hasattr(matchAgainst, 'items'):
+                result = markup.has_key(matchAgainst)
+            elif matchAgainst and isinstance(markup, basestring):
+                if isinstance(markup, unicode):
+                    matchAgainst = unicode(matchAgainst)
+                else:
+                    matchAgainst = str(matchAgainst)
+
+            if not result:
+                result = matchAgainst == markup
+        return result
+
+class ResultSet(list):
+    """A ResultSet is just a list that keeps track of the SoupStrainer
+    that created it."""
+    def __init__(self, source):
+        list.__init__([])
+        self.source = source
+
+# Now, some helper functions.
+
+def buildTagMap(default, *args):
+    """Turns a list of maps, lists, or scalars into a single map.
+    Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and
+    NESTING_RESET_TAGS maps out of lists and partial maps."""
+    built = {}
+    for portion in args:
+        if hasattr(portion, 'items'):
+            #It's a map. Merge it.
+            for k,v in portion.items():
+                built[k] = v
+        elif hasattr(portion, '__iter__'): # is a list
+            #It's a list. Map each item to the default.
+            for k in portion:
+                built[k] = default
+        else:
+            #It's a scalar. Map it to the default.
+            built[portion] = default
+    return built
+
+# Now, the parser classes.
+
+class BeautifulStoneSoup(Tag, SGMLParser):
+
+    """This class contains the basic parser and search code. It defines
+    a parser that knows nothing about tag behavior except for the
+    following:
+
+      You can't close a tag without closing all the tags it encloses.
+      That is, "<foo><bar></foo>" actually means
+      "<foo><bar></bar></foo>".
+
+    [Another possible explanation is "<foo><bar /></foo>", but since
+    this class defines no SELF_CLOSING_TAGS, it will never use that
+    explanation.]
+
+    This class is useful for parsing XML or made-up markup languages,
+    or when BeautifulSoup makes an assumption counter to what you were
+    expecting."""
+
+    SELF_CLOSING_TAGS = {}
+    NESTABLE_TAGS = {}
+    RESET_NESTING_TAGS = {}
+    QUOTE_TAGS = {}
+    PRESERVE_WHITESPACE_TAGS = []
+
+    MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
+                       lambda x: x.group(1) + ' />'),
+                      (re.compile('<!\s+([^<>]*)>'),
+                       lambda x: '<!' + x.group(1) + '>')
+                      ]
+
+    ROOT_TAG_NAME = u'[document]'
+
+    HTML_ENTITIES = "html"
+    XML_ENTITIES = "xml"
+    XHTML_ENTITIES = "xhtml"
+    # TODO: This only exists for backwards-compatibility
+    ALL_ENTITIES = XHTML_ENTITIES
+
+    # Used when determining whether a text node is all whitespace and
+    # can be replaced with a single space. A text node that contains
+    # fancy Unicode spaces (usually non-breaking) should be left
+    # alone.
+    STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
+
+    def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
+                 markupMassage=True, smartQuotesTo=XML_ENTITIES,
+                 convertEntities=None, selfClosingTags=None, isHTML=False):
+        """The Soup object is initialized as the 'root tag', and the
+        provided markup (which can be a string or a file-like object)
+        is fed into the underlying parser.
+
+        sgmllib will process most bad HTML, and the BeautifulSoup
+        class has some tricks for dealing with some HTML that kills
+        sgmllib, but Beautiful Soup can nonetheless choke or lose data
+        if your data uses self-closing tags or declarations
+        incorrectly.
+
+        By default, Beautiful Soup uses regexes to sanitize input,
+        avoiding the vast majority of these problems. If the problems
+        don't apply to you, pass in False for markupMassage, and
+        you'll get better performance.
+
+        The default parser massage techniques fix the two most common
+        instances of invalid HTML that choke sgmllib:
+
+         <br/> (No space between name of closing tag and tag close)
+         <! --Comment--> (Extraneous whitespace in declaration)
+
+        You can pass in a custom list of (RE object, replace method)
+        tuples to get Beautiful Soup to scrub your input the way you
+        want."""
+
+        self.parseOnlyThese = parseOnlyThese
+        self.fromEncoding = fromEncoding
+        self.smartQuotesTo = smartQuotesTo
+        self.convertEntities = convertEntities
+        # Set the rules for how we'll deal with the entities we
+        # encounter
+        if self.convertEntities:
+            # It doesn't make sense to convert encoded characters to
+            # entities even while you're converting entities to Unicode.
+            # Just convert it all to Unicode.
+            self.smartQuotesTo = None
+            if convertEntities == self.HTML_ENTITIES:
+                self.convertXMLEntities = False
+                self.convertHTMLEntities = True
+                self.escapeUnrecognizedEntities = True
+            elif convertEntities == self.XHTML_ENTITIES:
+                self.convertXMLEntities = True
+                self.convertHTMLEntities = True
+                self.escapeUnrecognizedEntities = False
+            elif convertEntities == self.XML_ENTITIES:
+                self.convertXMLEntities = True
+                self.convertHTMLEntities = False
+                self.escapeUnrecognizedEntities = False
+        else:
+            self.convertXMLEntities = False
+            self.convertHTMLEntities = False
+            self.escapeUnrecognizedEntities = False
+
+        self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)
+        SGMLParser.__init__(self)
+
+        if hasattr(markup, 'read'):        # It's a file-type object.
+            markup = markup.read()
+        self.markup = markup
+        self.markupMassage = markupMassage
+        try:
+            self._feed(isHTML=isHTML)
+        except StopParsing:
+            pass
+        self.markup = None                 # The markup can now be GCed
+
+    def convert_charref(self, name):
+        """This method fixes a bug in Python's SGMLParser."""
+        try:
+            n = int(name)
+        except ValueError:
+            return
+        if not 0 <= n <= 127 : # ASCII ends at 127, not 255
+            return
+        return self.convert_codepoint(n)
+
+    def _feed(self, inDocumentEncoding=None, isHTML=False):
+        # Convert the document to Unicode.
+        markup = self.markup
+        if isinstance(markup, unicode):
+            if not hasattr(self, 'originalEncoding'):
+                self.originalEncoding = None
+        else:
+            dammit = UnicodeDammit\
+                     (markup, [self.fromEncoding, inDocumentEncoding],
+                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
+            markup = dammit.unicode
+            self.originalEncoding = dammit.originalEncoding
+            self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
+        if markup:
+            if self.markupMassage:
+                if not hasattr(self.markupMassage, "__iter__"):
+                    self.markupMassage = self.MARKUP_MASSAGE
+                for fix, m in self.markupMassage:
+                    markup = fix.sub(m, markup)
+                # TODO: We get rid of markupMassage so that the
+                # soup object can be deepcopied later on. Some
+                # Python installations can't copy regexes. If anyone
+                # was relying on the existence of markupMassage, this
+                # might cause problems.
+                del(self.markupMassage)
+        self.reset()
+
+        SGMLParser.feed(self, markup)
+        # Close out any unfinished strings and close all the open tags.
+        self.endData()
+        while self.currentTag.name != self.ROOT_TAG_NAME:
+            self.popTag()
+
+    def __getattr__(self, methodName):
+        """This method routes method call requests to either the SGMLParser
+        superclass or the Tag superclass, depending on the method name."""
+        #print "__getattr__ called on %s.%s" % (self.__class__, methodName)
+
+        if methodName.startswith('start_') or methodName.startswith('end_') \
+               or methodName.startswith('do_'):
+            return SGMLParser.__getattr__(self, methodName)
+        elif not methodName.startswith('__'):
+            return Tag.__getattr__(self, methodName)
+        else:
+            raise AttributeError
+
+    def isSelfClosingTag(self, name):
+        """Returns true iff the given string is the name of a
+        self-closing tag according to this parser."""
+        return self.SELF_CLOSING_TAGS.has_key(name) \
+               or self.instanceSelfClosingTags.has_key(name)
+
+    def reset(self):
+        Tag.__init__(self, self, self.ROOT_TAG_NAME)
+        self.hidden = 1
+        SGMLParser.reset(self)
+        self.currentData = []
+        self.currentTag = None
+        self.tagStack = []
+        self.quoteStack = []
+        self.pushTag(self)
+
+    def popTag(self):
+        tag = self.tagStack.pop()
+
+        #print "Pop", tag.name
+        if self.tagStack:
+            self.currentTag = self.tagStack[-1]
+        return self.currentTag
+
+    def pushTag(self, tag):
+        #print "Push", tag.name
+        if self.currentTag:
+            self.currentTag.contents.append(tag)
+        self.tagStack.append(tag)
+        self.currentTag = self.tagStack[-1]
+
+    def endData(self, containerClass=NavigableString):
+        if self.currentData:
+            currentData = u''.join(self.currentData)
+            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
+                not set([tag.name for tag in self.tagStack]).intersection(
+                    self.PRESERVE_WHITESPACE_TAGS)):
+                if '\n' in currentData:
+                    currentData = '\n'
+                else:
+                    currentData = ' '
+            self.currentData = []
+            if self.parseOnlyThese and len(self.tagStack) <= 1 and \
+                   (not self.parseOnlyThese.text or \
+                    not self.parseOnlyThese.search(currentData)):
+                return
+            o = containerClass(currentData)
+            o.setup(self.currentTag, self.previous)
+            if self.previous:
+                self.previous.next = o
+            self.previous = o
+            self.currentTag.contents.append(o)
+
+
+    def _popToTag(self, name, inclusivePop=True):
+        """Pops the tag stack up to and including the most recent
+        instance of the given tag. If inclusivePop is false, pops the tag
+        stack up to but *not* including the most recent instqance of
+        the given tag."""
+        #print "Popping to %s" % name
+        if name == self.ROOT_TAG_NAME:
+            return
+
+        numPops = 0
+        mostRecentTag = None
+        for i in range(len(self.tagStack)-1, 0, -1):
+            if name == self.tagStack[i].name:
+                numPops = len(self.tagStack)-i
+                break
+        if not inclusivePop:
+            numPops = numPops - 1
+
+        for i in range(0, numPops):
+            mostRecentTag = self.popTag()
+        return mostRecentTag
+
+    def _smartPop(self, name):
+
+        """We need to pop up to the previous tag of this type, unless
+        one of this tag's nesting reset triggers comes between this
+        tag and the previous tag of this type, OR unless this tag is a
+        generic nesting trigger and another generic nesting trigger
+        comes between this tag and the previous tag of this type.
+
+        Examples:
+         <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'.
+         <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'.
+         <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'.
+
+         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
+         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
+         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
+        """
+
+        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
+        isNestable = nestingResetTriggers != None
+        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
+        popTo = None
+        inclusive = True
+        for i in range(len(self.tagStack)-1, 0, -1):
+            p = self.tagStack[i]
+            if (not p or p.name == name) and not isNestable:
+                #Non-nestable tags get popped to the top or to their
+                #last occurance.
+                popTo = name
+                break
+            if (nestingResetTriggers is not None
+                and p.name in nestingResetTriggers) \
+                or (nestingResetTriggers is None and isResetNesting
+                    and self.RESET_NESTING_TAGS.has_key(p.name)):
+
+                #If we encounter one of the nesting reset triggers
+                #peculiar to this tag, or we encounter another tag
+                #that causes nesting to reset, pop up to but not
+                #including that tag.
+                popTo = p.name
+                inclusive = False
+                break
+            p = p.parent
+        if popTo:
+            self._popToTag(popTo, inclusive)
+
+    def unknown_starttag(self, name, attrs, selfClosing=0):
+        #print "Start tag %s: %s" % (name, attrs)
+        if self.quoteStack:
+            #This is not a real tag.
+            #print "<%s> is not real!" % name
+            attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs])
+            self.handle_data('<%s%s>' % (name, attrs))
+            return
+        self.endData()
+
+        if not self.isSelfClosingTag(name) and not selfClosing:
+            self._smartPop(name)
+
+        if self.parseOnlyThese and len(self.tagStack) <= 1 \
+               and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
+            return
+
+        tag = Tag(self, name, attrs, self.currentTag, self.previous)
+        if self.previous:
+            self.previous.next = tag
+        self.previous = tag
+        self.pushTag(tag)
+        if selfClosing or self.isSelfClosingTag(name):
+            self.popTag()
+        if name in self.QUOTE_TAGS:
+            #print "Beginning quote (%s)" % name
+            self.quoteStack.append(name)
+            self.literal = 1
+        return tag
+
+    def unknown_endtag(self, name):
+        #print "End tag %s" % name
+        if self.quoteStack and self.quoteStack[-1] != name:
+            #This is not a real end tag.
+            #print "</%s> is not real!" % name
+            self.handle_data('</%s>' % name)
+            return
+        self.endData()
+        self._popToTag(name)
+        if self.quoteStack and self.quoteStack[-1] == name:
+            self.quoteStack.pop()
+            self.literal = (len(self.quoteStack) > 0)
+
+    def handle_data(self, data):
+        self.currentData.append(data)
+
+    def _toStringSubclass(self, text, subclass):
+        """Adds a certain piece of text to the tree as a NavigableString
+        subclass."""
+        self.endData()
+        self.handle_data(text)
+        self.endData(subclass)
+
+    def handle_pi(self, text):
+        """Handle a processing instruction as a ProcessingInstruction
+        object, possibly one with a %SOUP-ENCODING% slot into which an
+        encoding will be plugged later."""
+        if text[:3] == "xml":
+            text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
+        self._toStringSubclass(text, ProcessingInstruction)
+
+    def handle_comment(self, text):
+        "Handle comments as Comment objects."
+        self._toStringSubclass(text, Comment)
+
+    def handle_charref(self, ref):
+        "Handle character references as data."
+        if self.convertEntities:
+            data = unichr(int(ref))
+        else:
+            data = '&#%s;' % ref
+        self.handle_data(data)
+
+    def handle_entityref(self, ref):
+        """Handle entity references as data, possibly converting known
+        HTML and/or XML entity references to the corresponding Unicode
+        characters."""
+        data = None
+        if self.convertHTMLEntities:
+            try:
+                data = unichr(name2codepoint[ref])
+            except KeyError:
+                pass
+
+        if not data and self.convertXMLEntities:
+                data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
+
+        if not data and self.convertHTMLEntities and \
+            not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
+                # TODO: We've got a problem here. We're told this is
+                # an entity reference, but it's not an XML entity
+                # reference or an HTML entity reference. Nonetheless,
+                # the logical thing to do is to pass it through as an
+                # unrecognized entity reference.
+                #
+                # Except: when the input is "&carol;" this function
+                # will be called with input "carol". When the input is
+                # "AT&T", this function will be called with input
+                # "T". We have no way of knowing whether a semicolon
+                # was present originally, so we don't know whether
+                # this is an unknown entity or just a misplaced
+                # ampersand.
+                #
+                # The more common case is a misplaced ampersand, so I
+                # escape the ampersand and omit the trailing semicolon.
+                data = "&amp;%s" % ref
+        if not data:
+            # This case is different from the one above, because we
+            # haven't already gone through a supposedly comprehensive
+            # mapping of entities to Unicode characters. We might not
+            # have gone through any mapping at all. So the chances are
+            # very high that this is a real entity, and not a
+            # misplaced ampersand.
+            data = "&%s;" % ref
+        self.handle_data(data)
+
+    def handle_decl(self, data):
+        "Handle DOCTYPEs and the like as Declaration objects."
+        self._toStringSubclass(data, Declaration)
+
+    def parse_declaration(self, i):
+        """Treat a bogus SGML declaration as raw data. Treat a CDATA
+        declaration as a CData object."""
+        j = None
+        if self.rawdata[i:i+9] == '<![CDATA[':
+             k = self.rawdata.find(']]>', i)
+             if k == -1:
+                 k = len(self.rawdata)
+             data = self.rawdata[i+9:k]
+             j = k+3
+             self._toStringSubclass(data, CData)
+        else:
+            try:
+                j = SGMLParser.parse_declaration(self, i)
+            except SGMLParseError:
+                toHandle = self.rawdata[i:]
+                self.handle_data(toHandle)
+                j = i + len(toHandle)
+        return j
+
+class BeautifulSoup(BeautifulStoneSoup):
+
+    """This parser knows the following facts about HTML:
+
+    * Some tags have no closing tag and should be interpreted as being
+      closed as soon as they are encountered.
+
+    * The text inside some tags (ie. 'script') may contain tags which
+      are not really part of the document and which should be parsed
+      as text, not tags. If you want to parse the text as tags, you can
+      always fetch it and parse it explicitly.
+
+    * Tag nesting rules:
+
+      Most tags can't be nested at all. For instance, the occurance of
+      a <p> tag should implicitly close the previous <p> tag.
+
+       <p>Para1<p>Para2
+        should be transformed into:
+       <p>Para1</p><p>Para2
+
+      Some tags can be nested arbitrarily. For instance, the occurance
+      of a <blockquote> tag should _not_ implicitly close the previous
+      <blockquote> tag.
+
+       Alice said: <blockquote>Bob said: <blockquote>Blah
+        should NOT be transformed into:
+       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
+
+      Some tags can be nested, but the nesting is reset by the
+      interposition of other tags. For instance, a <tr> tag should
+      implicitly close the previous <tr> tag within the same <table>,
+      but not close a <tr> tag in another table.
+
+       <table><tr>Blah<tr>Blah
+        should be transformed into:
+       <table><tr>Blah</tr><tr>Blah
+        but,
+       <tr>Blah<table><tr>Blah
+        should NOT be transformed into
+       <tr>Blah<table></tr><tr>Blah
+
+    Differing assumptions about tag nesting rules are a major source
+    of problems with the BeautifulSoup class. If BeautifulSoup is not
+    treating as nestable a tag your page author treats as nestable,
+    try ICantBelieveItsBeautifulSoup, MinimalSoup, or
+    BeautifulStoneSoup before writing your own subclass."""
+
+    def __init__(self, *args, **kwargs):
+        if not kwargs.has_key('smartQuotesTo'):
+            kwargs['smartQuotesTo'] = self.HTML_ENTITIES
+        kwargs['isHTML'] = True
+        BeautifulStoneSoup.__init__(self, *args, **kwargs)
+
+    SELF_CLOSING_TAGS = buildTagMap(None,
+                                    ('br' , 'hr', 'input', 'img', 'meta',
+                                    'spacer', 'link', 'frame', 'base', 'col'))
+
+    PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
+
+    QUOTE_TAGS = {'script' : None, 'textarea' : None}
+
+    #According to the HTML standard, each of these inline tags can
+    #contain another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
+                            'center')
+
+    #According to the HTML standard, these block tags can contain
+    #another tag of the same type. Furthermore, it's common
+    #to actually use these tags this way.
+    NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
+
+    #Lists can contain other lists, but there are restrictions.
+    NESTABLE_LIST_TAGS = { 'ol' : [],
+                           'ul' : [],
+                           'li' : ['ul', 'ol'],
+                           'dl' : [],
+                           'dd' : ['dl'],
+                           'dt' : ['dl'] }
+
+    #Tables can contain other tables, but there are restrictions.
+    NESTABLE_TABLE_TAGS = {'table' : [],
+                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
+                           'td' : ['tr'],
+                           'th' : ['tr'],
+                           'thead' : ['table'],
+                           'tbody' : ['table'],
+                           'tfoot' : ['table'],
+                           }
+
+    NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
+
+    #If one of these tags is encountered, all tags up to the next tag of
+    #this type are popped.
+    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
+                                     NON_NESTABLE_BLOCK_TAGS,
+                                     NESTABLE_LIST_TAGS,
+                                     NESTABLE_TABLE_TAGS)
+
+    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
+                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
+
+    # Used to detect the charset in a META tag; see start_meta
+    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+
+    def start_meta(self, attrs):
+        """Beautiful Soup can detect a charset included in a META tag,
+        try to convert the document to that charset, and re-parse the
+        document from the beginning."""
+        httpEquiv = None
+        contentType = None
+        contentTypeIndex = None
+        tagNeedsEncodingSubstitution = False
+
+        for i in range(0, len(attrs)):
+            key, value = attrs[i]
+            key = key.lower()
+            if key == 'http-equiv':
+                httpEquiv = value
+            elif key == 'content':
+                contentType = value
+                contentTypeIndex = i
+
+        if httpEquiv and contentType: # It's an interesting meta tag.
+            match = self.CHARSET_RE.search(contentType)
+            if match:
+                if (self.declaredHTMLEncoding is not None or
+                    self.originalEncoding == self.fromEncoding):
+                    # An HTML encoding was sniffed while converting
+                    # the document to Unicode, or an HTML encoding was
+                    # sniffed during a previous pass through the
+                    # document, or an encoding was specified
+                    # explicitly and it worked. Rewrite the meta tag.
+                    def rewrite(match):
+                        return match.group(1) + "%SOUP-ENCODING%"
+                    newAttr = self.CHARSET_RE.sub(rewrite, contentType)
+                    attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],
+                                               newAttr)
+                    tagNeedsEncodingSubstitution = True
+                else:
+                    # This is our first pass through the document.
+                    # Go through it again with the encoding information.
+                    newCharset = match.group(3)
+                    if newCharset and newCharset != self.originalEncoding:
+                        self.declaredHTMLEncoding = newCharset
+                        self._feed(self.declaredHTMLEncoding)
+                        raise StopParsing
+                    pass
+        tag = self.unknown_starttag("meta", attrs)
+        if tag and tagNeedsEncodingSubstitution:
+            tag.containsSubstitutions = True
+
+class StopParsing(Exception):
+    pass
+
+class ICantBelieveItsBeautifulSoup(BeautifulSoup):
+
+    """The BeautifulSoup class is oriented towards skipping over
+    common HTML errors like unclosed tags. However, sometimes it makes
+    errors of its own. For instance, consider this fragment:
+
+     <b>Foo<b>Bar</b></b>
+
+    This is perfectly valid (if bizarre) HTML. However, the
+    BeautifulSoup class will implicitly close the first b tag when it
+    encounters the second 'b'. It will think the author wrote
+    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
+    there's no real-world reason to bold something that's already
+    bold. When it encounters '</b></b>' it will close two more 'b'
+    tags, for a grand total of three tags closed instead of two. This
+    can throw off the rest of your document structure. The same is
+    true of a number of other tags, listed below.
+
+    It's much more common for someone to forget to close a 'b' tag
+    than to actually use nested 'b' tags, and the BeautifulSoup class
+    handles the common case. This class handles the not-co-common
+    case: where you can't believe someone wrote what they did, but
+    it's valid HTML and BeautifulSoup screwed up by assuming it
+    wouldn't be."""
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
+     ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
+      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
+      'big')
+
+    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',)
+
+    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
+                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
+
+class MinimalSoup(BeautifulSoup):
+    """The MinimalSoup class is for parsing HTML that contains
+    pathologically bad markup. It makes no assumptions about tag
+    nesting, but it does know which tags are self-closing, that
+    <script> tags contain Javascript and should not be parsed, that
+    META tags may contain encoding information, and so on.
+
+    This also makes it better for subclassing than BeautifulStoneSoup
+    or BeautifulSoup."""
+
+    RESET_NESTING_TAGS = buildTagMap('noscript')
+    NESTABLE_TAGS = {}
+
+class BeautifulSOAP(BeautifulStoneSoup):
+    """This class will push a tag with only a single string child into
+    the tag's parent as an attribute. The attribute's name is the tag
+    name, and the value is the string child. An example should give
+    the flavor of the change:
+
+    <foo><bar>baz</bar></foo>
+     =>
+    <foo bar="baz"><bar>baz</bar></foo>
+
+    You can then access fooTag['bar'] instead of fooTag.barTag.string.
+
+    This is, of course, useful for scraping structures that tend to
+    use subelements instead of attributes, such as SOAP messages. Note
+    that it modifies its input, so don't print the modified version
+    out.
+
+    I'm not sure how many people really want to use this class; let me
+    know if you do. Mainly I like the name."""
+
+    def popTag(self):
+        if len(self.tagStack) > 1:
+            tag = self.tagStack[-1]
+            parent = self.tagStack[-2]
+            parent._getAttrMap()
+            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
+                isinstance(tag.contents[0], NavigableString) and
+                not parent.attrMap.has_key(tag.name)):
+                parent[tag.name] = tag.contents[0]
+        BeautifulStoneSoup.popTag(self)
+
+#Enterprise class names! It has come to our attention that some people
+#think the names of the Beautiful Soup parser classes are too silly
+#and "unprofessional" for use in enterprise screen-scraping. We feel
+#your pain! For such-minded folk, the Beautiful Soup Consortium And
+#All-Night Kosher Bakery recommends renaming this file to
+#"RobustParser.py" (or, in cases of extreme enterprisiness,
+#"RobustParserBeanInterface.class") and using the following
+#enterprise-friendly class aliases:
+class RobustXMLParser(BeautifulStoneSoup):
+    pass
+class RobustHTMLParser(BeautifulSoup):
+    pass
+class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
+    pass
+class RobustInsanelyWackAssHTMLParser(MinimalSoup):
+    pass
+class SimplifyingSOAPParser(BeautifulSOAP):
+    pass
+
+######################################################
+#
+# Bonus library: Unicode, Dammit
+#
+# This class forces XML data into a standard format (usually to UTF-8
+# or Unicode).  It is heavily based on code from Mark Pilgrim's
+# Universal Feed Parser. It does not rewrite the XML or HTML to
+# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi
+# (XML) and BeautifulSoup.start_meta (HTML).
+
+# Autodetects character encodings.
+# Download from http://chardet.feedparser.org/
+try:
+    import chardet
+#    import chardet.constants
+#    chardet.constants._debug = 1
+except ImportError:
+    chardet = None
+
+# cjkcodecs and iconv_codec make Python know about more character encodings.
+# Both are available from http://cjkpython.i18n.org/
+# They're built in if you use Python 2.4.
+try:
+    import cjkcodecs.aliases
+except ImportError:
+    pass
+try:
+    import iconv_codec
+except ImportError:
+    pass
+
+class UnicodeDammit:
+    """A class for detecting the encoding of a *ML document and
+    converting it to a Unicode string. If the source encoding is
+    windows-1252, can replace MS smart quotes with their HTML or XML
+    equivalents."""
+
+    # This dictionary maps commonly seen values for "charset" in HTML
+    # meta tags to the corresponding Python codec names. It only covers
+    # values that aren't in Python's aliases and can't be determined
+    # by the heuristics in find_codec.
+    CHARSET_ALIASES = { "macintosh" : "mac-roman",
+                        "x-sjis" : "shift-jis" }
+
+    def __init__(self, markup, overrideEncodings=[],
+                 smartQuotesTo='xml', isHTML=False):
+        self.declaredHTMLEncoding = None
+        self.markup, documentEncoding, sniffedEncoding = \
+                     self._detectEncoding(markup, isHTML)
+        self.smartQuotesTo = smartQuotesTo
+        self.triedEncodings = []
+        if markup == '' or isinstance(markup, unicode):
+            self.originalEncoding = None
+            self.unicode = unicode(markup)
+            return
+
+        u = None
+        for proposedEncoding in overrideEncodings:
+            u = self._convertFrom(proposedEncoding)
+            if u: break
+        if not u:
+            for proposedEncoding in (documentEncoding, sniffedEncoding):
+                u = self._convertFrom(proposedEncoding)
+                if u: break
+
+        # If no luck and we have auto-detection library, try that:
+        if not u and chardet and not isinstance(self.markup, unicode):
+            u = self._convertFrom(chardet.detect(self.markup)['encoding'])
+
+        # As a last resort, try utf-8 and windows-1252:
+        if not u:
+            for proposed_encoding in ("utf-8", "windows-1252"):
+                u = self._convertFrom(proposed_encoding)
+                if u: break
+
+        self.unicode = u
+        if not u: self.originalEncoding = None
+
+    def _subMSChar(self, orig):
+        """Changes a MS smart quote character to an XML or HTML
+        entity."""
+        sub = self.MS_CHARS.get(orig)
+        if isinstance(sub, tuple):
+            if self.smartQuotesTo == 'xml':
+                sub = '&#x%s;' % sub[1]
+            else:
+                sub = '&%s;' % sub[0]
+        return sub
+
+    def _convertFrom(self, proposed):
+        proposed = self.find_codec(proposed)
+        if not proposed or proposed in self.triedEncodings:
+            return None
+        self.triedEncodings.append(proposed)
+        markup = self.markup
+
+        # Convert smart quotes to HTML if coming from an encoding
+        # that might have them.
+        if self.smartQuotesTo and proposed.lower() in("windows-1252",
+                                                      "iso-8859-1",
+                                                      "iso-8859-2"):
+            markup = re.compile("([\x80-\x9f])").sub \
+                     (lambda(x): self._subMSChar(x.group(1)),
+                      markup)
+
+        try:
+            # print "Trying to convert document to %s" % proposed
+            u = self._toUnicode(markup, proposed)
+            self.markup = u
+            self.originalEncoding = proposed
+        except Exception, e:
+            # print "That didn't work!"
+            # print e
+            return None
+        #print "Correct encoding: %s" % proposed
+        return self.markup
+
+    def _toUnicode(self, data, encoding):
+        '''Given a string and its encoding, decodes the string into Unicode.
+        %encoding is a string recognized by encodings.aliases'''
+
+        # strip Byte Order Mark (if present)
+        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
+               and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16be'
+            data = data[2:]
+        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
+                 and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16le'
+            data = data[2:]
+        elif data[:3] == '\xef\xbb\xbf':
+            encoding = 'utf-8'
+            data = data[3:]
+        elif data[:4] == '\x00\x00\xfe\xff':
+            encoding = 'utf-32be'
+            data = data[4:]
+        elif data[:4] == '\xff\xfe\x00\x00':
+            encoding = 'utf-32le'
+            data = data[4:]
+        newdata = unicode(data, encoding)
+        return newdata
+
+    def _detectEncoding(self, xml_data, isHTML=False):
+        """Given a document, tries to detect its XML encoding."""
+        xml_encoding = sniffed_xml_encoding = None
+        try:
+            if xml_data[:4] == '\x4c\x6f\xa7\x94':
+                # EBCDIC
+                xml_data = self._ebcdic_to_ascii(xml_data)
+            elif xml_data[:4] == '\x00\x3c\x00\x3f':
+                # UTF-16BE
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
+                     and (xml_data[2:4] != '\x00\x00'):
+                # UTF-16BE with BOM
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
+            elif xml_data[:4] == '\x3c\x00\x3f\x00':
+                # UTF-16LE
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
+                     (xml_data[2:4] != '\x00\x00'):
+                # UTF-16LE with BOM
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
+            elif xml_data[:4] == '\x00\x00\x00\x3c':
+                # UTF-32BE
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == '\x3c\x00\x00\x00':
+                # UTF-32LE
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
+            elif xml_data[:4] == '\x00\x00\xfe\xff':
+                # UTF-32BE with BOM
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == '\xff\xfe\x00\x00':
+                # UTF-32LE with BOM
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
+            elif xml_data[:3] == '\xef\xbb\xbf':
+                # UTF-8 with BOM
+                sniffed_xml_encoding = 'utf-8'
+                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
+            else:
+                sniffed_xml_encoding = 'ascii'
+                pass
+        except:
+            xml_encoding_match = None
+        xml_encoding_match = re.compile(
+            '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data)
+        if not xml_encoding_match and isHTML:
+            regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I)
+            xml_encoding_match = regexp.search(xml_data)
+        if xml_encoding_match is not None:
+            xml_encoding = xml_encoding_match.groups()[0].lower()
+            if isHTML:
+                self.declaredHTMLEncoding = xml_encoding
+            if sniffed_xml_encoding and \
+               (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
+                                 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
+                                 'utf-16', 'utf-32', 'utf_16', 'utf_32',
+                                 'utf16', 'u16')):
+                xml_encoding = sniffed_xml_encoding
+        return xml_data, xml_encoding, sniffed_xml_encoding
+
+
+    def find_codec(self, charset):
+        return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
+               or (charset and self._codec(charset.replace("-", ""))) \
+               or (charset and self._codec(charset.replace("-", "_"))) \
+               or charset
+
+    def _codec(self, charset):
+        if not charset: return charset
+        codec = None
+        try:
+            codecs.lookup(charset)
+            codec = charset
+        except (LookupError, ValueError):
+            pass
+        return codec
+
+    EBCDIC_TO_ASCII_MAP = None
+    def _ebcdic_to_ascii(self, s):
+        c = self.__class__
+        if not c.EBCDIC_TO_ASCII_MAP:
+            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
+                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
+                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
+                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
+                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
+                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
+                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
+                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
+                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
+                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
+                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
+                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
+                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
+                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
+                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
+                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
+                    250,251,252,253,254,255)
+            import string
+            c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
+            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+        return s.translate(c.EBCDIC_TO_ASCII_MAP)
+
+    MS_CHARS = { '\x80' : ('euro', '20AC'),
+                 '\x81' : ' ',
+                 '\x82' : ('sbquo', '201A'),
+                 '\x83' : ('fnof', '192'),
+                 '\x84' : ('bdquo', '201E'),
+                 '\x85' : ('hellip', '2026'),
+                 '\x86' : ('dagger', '2020'),
+                 '\x87' : ('Dagger', '2021'),
+                 '\x88' : ('circ', '2C6'),
+                 '\x89' : ('permil', '2030'),
+                 '\x8A' : ('Scaron', '160'),
+                 '\x8B' : ('lsaquo', '2039'),
+                 '\x8C' : ('OElig', '152'),
+                 '\x8D' : '?',
+                 '\x8E' : ('#x17D', '17D'),
+                 '\x8F' : '?',
+                 '\x90' : '?',
+                 '\x91' : ('lsquo', '2018'),
+                 '\x92' : ('rsquo', '2019'),
+                 '\x93' : ('ldquo', '201C'),
+                 '\x94' : ('rdquo', '201D'),
+                 '\x95' : ('bull', '2022'),
+                 '\x96' : ('ndash', '2013'),
+                 '\x97' : ('mdash', '2014'),
+                 '\x98' : ('tilde', '2DC'),
+                 '\x99' : ('trade', '2122'),
+                 '\x9a' : ('scaron', '161'),
+                 '\x9b' : ('rsaquo', '203A'),
+                 '\x9c' : ('oelig', '153'),
+                 '\x9d' : '?',
+                 '\x9e' : ('#x17E', '17E'),
+                 '\x9f' : ('Yuml', ''),}
+
+#######################################################################
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+    import sys
+    soup = BeautifulSoup(sys.stdin)
+    print soup.prettify()
diff -r fdb4240fb565 -r 8ae67e9fb6ff LocationFile.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LocationFile.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+import sys
+
+def die( message ):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+def open_or_die( filename, mode='r', message=None ):
+    if message is None:
+        message = 'Error opening {0}'.format( filename )
+    try:
+        fh = open( filename, mode )
+    except IOError, err:
+        die( '{0}: {1}'.format( message, err.strerror ) )
+    return fh
+
+class LocationFile( object ):
+    def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ):
+        self.filename = filename
+        if comment_chars is None:
+            self.comment_chars = ( '#' )
+        else:
+            self.comment_chars = tuple( comment_chars )
+        self.delimiter = delimiter
+        self.key_column = key_column
+        self._map = {}
+        self._populate_map()
+
+    def _populate_map( self ):
+        try:
+            with open( self.filename ) as fh:
+                line_number = 0
+                for line in fh:
+                    line_number += 1
+                    line = line.rstrip( '\r\n' )
+                    if not line.startswith( self.comment_chars ):
+                        elems = line.split( self.delimiter )
+                        if len( elems ) <= self.key_column:
+                            die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) )
+                        else:
+                            key = elems.pop( self.key_column )
+                            if key in self._map:
+                                if self._map[key] != elems:
+                                    die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) )
+                            else:
+                                self._map[key] = elems
+        except IOError, err:
+            die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) )
+
+    def get_values( self, key ):
+        if key in self._map:
+            rval = self._map[key]
+            if len( rval ) == 1:
+                return rval[0]
+            else:
+                return rval
+        else:
+            die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) )
+
+    def get_values_if_exists( self, key ):
+        if key in self._map:
+            rval = self._map[key]
+            if len( rval ) == 1:
+                return rval[0]
+            else:
+                return rval
+        else:
+            return None
diff -r fdb4240fb565 -r 8ae67e9fb6ff OrderedDict.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/OrderedDict.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,259 @@
+# http://code.activestate.com/recipes/576693/
+# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
+# Passes Python2.7's test suite and incorporates all the latest updates.
+
+try:
+    from thread import get_ident as _get_ident
+except ImportError:
+    from dummy_thread import get_ident as _get_ident
+
+try:
+    from _abcoll import KeysView, ValuesView, ItemsView
+except ImportError:
+    pass
+
+
+class OrderedDict(dict):
+    'Dictionary that remembers insertion order'
+    # An inherited dict maps keys to values.
+    # The inherited dict provides __getitem__, __len__, __contains__, and get.
+    # The remaining methods are order-aware.
+    # Big-O running times for all methods are the same as for regular dictionaries.
+
+    # The internal self.__map dictionary maps keys to links in a doubly linked list.
+    # The circular doubly linked list starts and ends with a sentinel element.
+    # The sentinel element never gets deleted (this simplifies the algorithm).
+    # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
+
+    def __init__(self, *args, **kwds):
+        '''Initialize an ordered dictionary.  Signature is the same as for
+        regular dictionaries, but keyword arguments are not recommended
+        because their insertion order is arbitrary.
+
+        '''
+        if len(args) > 1:
+            raise TypeError('expected at most 1 arguments, got %d' % len(args))
+        try:
+            self.__root
+        except AttributeError:
+            self.__root = root = []                     # sentinel node
+            root[:] = [root, root, None]
+            self.__map = {}
+        self.__update(*args, **kwds)
+
+    def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
+        'od.__setitem__(i, y) <==> od[i]=y'
+        # Setting a new item creates a new link which goes at the end of the linked
+        # list, and the inherited dictionary is updated with the new key/value pair.
+        if key not in self:
+            root = self.__root
+            last = root[0]
+            last[1] = root[0] = self.__map[key] = [last, root, key]
+        dict_setitem(self, key, value)
+
+    def __delitem__(self, key, dict_delitem=dict.__delitem__):
+        'od.__delitem__(y) <==> del od[y]'
+        # Deleting an existing item uses self.__map to find the link which is
+        # then removed by updating the links in the predecessor and successor nodes.
+        dict_delitem(self, key)
+        link_prev, link_next, key = self.__map.pop(key)
+        link_prev[1] = link_next
+        link_next[0] = link_prev
+
+    def __iter__(self):
+        'od.__iter__() <==> iter(od)'
+        root = self.__root
+        curr = root[1]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[1]
+
+    def __reversed__(self):
+        'od.__reversed__() <==> reversed(od)'
+        root = self.__root
+        curr = root[0]
+        while curr is not root:
+            yield curr[2]
+            curr = curr[0]
+
+    def clear(self):
+        'od.clear() -> None.  Remove all items from od.'
+        try:
+            for node in self.__map.itervalues():
+                del node[:]
+            root = self.__root
+            root[:] = [root, root, None]
+            self.__map.clear()
+        except AttributeError:
+            pass
+        dict.clear(self)
+
+    def popitem(self, last=True):
+        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
+        Pairs are returned in LIFO order if last is true or FIFO order if false.
+
+        '''
+        if not self:
+            raise KeyError('dictionary is empty')
+        root = self.__root
+        if last:
+            link = root[0]
+            link_prev = link[0]
+            link_prev[1] = root
+            root[0] = link_prev
+        else:
+            link = root[1]
+            link_next = link[1]
+            root[1] = link_next
+            link_next[0] = root
+        key = link[2]
+        del self.__map[key]
+        value = dict.pop(self, key)
+        return key, value
+
+    # -- the following methods do not depend on the internal structure --
+
+    def keys(self):
+        'od.keys() -> list of keys in od'
+        return list(self)
+
+    def values(self):
+        'od.values() -> list of values in od'
+        return [self[key] for key in self]
+
+    def items(self):
+        'od.items() -> list of (key, value) pairs in od'
+        return [(key, self[key]) for key in self]
+
+    def iterkeys(self):
+        'od.iterkeys() -> an iterator over the keys in od'
+        return iter(self)
+
+    def itervalues(self):
+        'od.itervalues -> an iterator over the values in od'
+        for k in self:
+            yield self[k]
+
+    def iteritems(self):
+        'od.iteritems -> an iterator over the (key, value) items in od'
+        for k in self:
+            yield (k, self[k])
+
+    def update(*args, **kwds):
+        '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
+
+        If E is a dict instance, does:           for k in E: od[k] = E[k]
+        If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
+        Or if E is an iterable of items, does:   for k, v in E: od[k] = v
+        In either case, this is followed by:     for k, v in F.items(): od[k] = v
+
+        '''
+        if len(args) > 2:
+            raise TypeError('update() takes at most 2 positional '
+                            'arguments (%d given)' % (len(args),))
+        elif not args:
+            raise TypeError('update() takes at least 1 argument (0 given)')
+        self = args[0]
+        # Make progressively weaker assumptions about "other"
+        other = ()
+        if len(args) == 2:
+            other = args[1]
+        if isinstance(other, dict):
+            for key in other:
+                self[key] = other[key]
+        elif hasattr(other, 'keys'):
+            for key in other.keys():
+                self[key] = other[key]
+        else:
+            for key, value in other:
+                self[key] = value
+        for key, value in kwds.items():
+            self[key] = value
+
+    __update = update  # let subclasses override update without breaking __init__
+
+    __marker = object()
+
+    def pop(self, key, default=__marker):
+        '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
+        If key is not found, d is returned if given, otherwise KeyError is raised.
+
+        '''
+        if key in self:
+            result = self[key]
+            del self[key]
+            return result
+        if default is self.__marker:
+            raise KeyError(key)
+        return default
+
+    def setdefault(self, key, default=None):
+        'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
+        if key in self:
+            return self[key]
+        self[key] = default
+        return default
+
+    def __repr__(self, _repr_running={}):
+        'od.__repr__() <==> repr(od)'
+        call_key = id(self), _get_ident()
+        if call_key in _repr_running:
+            return '...'
+        _repr_running[call_key] = 1
+        try:
+            if not self:
+                return '%s()' % (self.__class__.__name__,)
+            return '%s(%r)' % (self.__class__.__name__, self.items())
+        finally:
+            del _repr_running[call_key]
+
+    def __reduce__(self):
+        'Return state information for pickling'
+        items = [[k, self[k]] for k in self]
+        inst_dict = vars(self).copy()
+        for k in vars(OrderedDict()):
+            inst_dict.pop(k, None)
+        if inst_dict:
+            return (self.__class__, (items,), inst_dict)
+        return self.__class__, (items,)
+
+    def copy(self):
+        'od.copy() -> a shallow copy of od'
+        return self.__class__(self)
+
+    @classmethod
+    def fromkeys(cls, iterable, value=None):
+        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
+        and values equal to v (which defaults to None).
+
+        '''
+        d = cls()
+        for key in iterable:
+            d[key] = value
+        return d
+
+    def __eq__(self, other):
+        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
+        while comparison to a regular mapping is order-insensitive.
+
+        '''
+        if isinstance(other, OrderedDict):
+            return len(self)==len(other) and self.items() == other.items()
+        return dict.__eq__(self, other)
+
+    def __ne__(self, other):
+        return not self == other
+
+    # -- the following methods are only used in Python 2.7 --
+
+    def viewkeys(self):
+        "od.viewkeys() -> a set-like object providing a view on od's keys"
+        return KeysView(self)
+
+    def viewvalues(self):
+        "od.viewvalues() -> an object providing a view on od's values"
+        return ValuesView(self)
+
+    def viewitems(self):
+        "od.viewitems() -> a set-like object providing a view on od's items"
+        return ItemsView(self)
diff -r fdb4240fb565 -r 8ae67e9fb6ff Population.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Population.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+from OrderedDict import OrderedDict
+
+class Individual(object):
+    __slots__ = ['_column', '_name', '_alias']
+
+    def __init__(self, column, name, alias=None):
+        self._column = column
+        self._name = name
+        self._alias = alias
+
+    @property
+    def column(self):
+        return self._column
+
+    @property
+    def name(self):
+        return self._name if self._alias is None else self._alias
+
+    @property
+    def alias(self):
+        return self._alias
+
+    @alias.setter
+    def alias(self, alias):
+        self._alias = alias
+
+    @property
+    def real_name(self):
+        return self._name
+
+    def __eq__(self, other):
+        return self._column == other._column and self._name == other._name
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return 'Individual: column={0} name={1} alias={2}'.format(self._column, self._name, self._alias)
+
+
+class Population(object):
+    def __init__(self, name=None):
+        self._columns = OrderedDict()
+        self._name = name
+
+    @property
+    def name(self):
+        return self._name
+
+    @name.setter
+    def name(self, name):
+        self._name = name
+
+    def add_individual(self, individual, alias=None):
+        if individual.column not in self._columns:
+            self._columns[individual.column] = individual
+        elif self._columns[individual.column] == individual:
+            # should should this be an error?
+            # should we replace the alias using this entry?
+            pass
+        else:
+            raise 'Duplicate column: {0}'.format(individual)
+
+    def is_superset(self, other):
+        for column, other_individual in other._columns.items():
+            our_individual = self._columns.get(column)
+            if our_individual is None or our_individual != other_individual:
+                return False
+        return True
+
+    def is_disjoint(self, other):
+        for column, our_individual in self._columns.items():
+            other_individual = other._columns.get(column)
+            if other_individual is not None and other_individual == our_individual:
+                return False
+        return True
+
+    def column_list(self):
+        return self._columns.keys()
+
+    def individual_with_column(self, column):
+        if column in self._columns:
+            return self._columns[column]
+        return None
+
+    def tag_list(self, delimiter=':'):
+        entries = []
+        for column, individual in self._columns.items():
+            entry = '{0}{1}{2}'.format(column, delimiter, individual.name)
+            entries.append(entry)
+        return entries
+
+    def to_string(self, delimiter=':', separator=' ', replace_names_with=None):
+        entries = []
+        for column, individual in self._columns.items():
+            value = individual.name
+            if replace_names_with is not None:
+                value = replace_names_with
+            entry = '{0}{1}{2}'.format(column, delimiter, value)
+            entries.append(entry)
+        return separator.join(entries)
+
+    def __str__(self):
+        return self.to_string()
+
+    def from_population_file(self, filename):
+        with open(filename) as fh:
+            for line in fh:
+                line = line.rstrip('\r\n')
+                column, name, alias = line.split('\t')
+                alias = alias.strip()
+                individual = Individual(column, name)
+                if alias:
+                    individual.alias = alias
+                self.add_individual(individual)
+
+    def from_tag_list(self, tag_list):
+        for tag in tag_list:
+            column, name = tag.split(':')
+            individual = Individual(column, name)
+            self.add_individual(individual)
+
+    def individual_names(self):
+        for column, individual in self._columns.items():
+            yield individual.name
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,13 @@
+Source code for the executables needed by these tools can be found in
+the genome_diversity directory.
+
+Additionally, you'll need the following python modules:
+    matplotlib (we used version 1.1.0) http://pypi.python.org/packages/source/m/matplotlib/
+    mechanize  (we used version 0.2.5) http://pypi.python.org/packages/source/m/mechanize/
+    networkx   (we used version 1.6)   http://pypi.python.org/packages/source/n/networkx/
+
+And the following software:
+    ADMIXTURE  (we used version 1.22)  http://www.genetics.ucla.edu/software/admixture/
+    EIGENSOFT  (we used version 3.0)   http://genepath.med.harvard.edu/~reich/Software.htm
+    PHAST      (we used version 1.2.1) http://compgen.bscb.cornell.edu/phast/
+    QuickTree  (we used version 1.1)   http://www.sanger.ac.uk/resources/software/quicktree/
diff -r fdb4240fb565 -r 8ae67e9fb6ff add_fst_column.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/add_fst_column.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+#  <command interpreter="python">
+#    add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source.choice" "$data_source.min_value" "$retain" "$discard_fixed" "$biased" "$output"
+#    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+#        #set $arg = '%s:%s' % ($individual_col, $individual)
+#        "$arg"
+#    #end for
+#  </command>
+
+import sys
+import subprocess
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) < 12:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, p1_input, p2_input, genotypes, min_reads, min_qual, retain, discard_fixed, biased, output = sys.argv[1:11]
+individual_metadata = sys.argv[11:]
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+if not p_total.is_superset(p1):
+    print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table'
+    sys.exit(1)
+
+p2 = Population()
+p2.from_population_file(p2_input)
+if not p_total.is_superset(p2):
+    print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table'
+    sys.exit(1)
+
+################################################################################
+
+prog = 'Fst_column'
+
+args = []
+args.append(prog)
+args.append(input)
+args.append(genotypes)
+args.append(min_reads)
+args.append(min_qual)
+args.append(retain)
+args.append(discard_fixed)
+args.append(biased)
+
+columns = p1.column_list()
+for column in columns:
+    args.append('{0}:1'.format(column))
+
+columns = p2.column_list()
+for column in columns:
+    args.append('{0}:2'.format(column))
+
+fh = open(output, 'w')
+
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
+rc = p.wait()
+fh.close()
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff add_fst_column.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/add_fst_column.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,91 @@
+<tool id="gd_add_fst_column" name="Per-SNP FSTs" version="1.0.0">
+  <description>: Compute a fixation index score for each SNP</description>
+
+  <command interpreter="python">
+    add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output"
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP table" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
+
+    <param name="data_source" type="select" format="integer" label="Data source">
+      <option value="0" selected="true">sequence coverage</option>
+      <option value="1">estimated genotype</option>
+    </param>
+
+    <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" />
+    <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" />
+
+    <param name="retain" type="select" label="Special treatment">
+      <option value="0" selected="true">Skip row</option>
+      <option value="1">Set FST = -1</option>
+    </param>
+
+    <param name="discard_fixed" type="select" label="Apparently fixed SNPs">
+      <option value="0">Retain SNPs that appear fixed in the two populations</option>
+      <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option>
+    </param>
+
+    <param name="biased" type="select" label="FST estimator">
+      <option value="0" selected="true">Wright's original definition</option>
+      <option value="1">Weir's unbiased estimator</option>
+    </param>
+
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_snp" metadata_source="input" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="data_source" value="0" />
+      <param name="min_reads" value="3" />
+      <param name="min_qual" value="0" />
+      <param name="retain" value="0" />
+      <param name="discard_fixed" value="1" />
+      <param name="biased" value="0" />
+      <output name="output" file="test_out/add_fst_column/add_fst_column.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**What it does**
+
+The user specifies a SNP table and two "populations" of individuals,
+both previously defined using the Specify Individuals tool.
+No individual can be in both populations.  Other choices are as follows.
+
+Data source.  The allele frequencies of a SNP in the two populations can be
+estimated either by the total number of reads of each allele, or by adding
+the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the data source, the user sets lower bounds on amount
+of data required at a SNP.  For estimating the Fst using read counts,
+the bound is the minimum count of reads of the two alleles in a population.
+For estimations based on genotype, the bound is the minimum reported genotype
+quality per individual.
+
+The user specifies whether the SNPs that violate the lower bound should be
+ignored or the Fst set to -1.
+
+The user specifies whether SNPs where both populations appear to be fixed
+for the same allele should be retained or discarded.
+
+Finally, the user chooses which definition of Fst to use:  Wright's original
+definition or Weir's unbiased estimator.
+
+A column is appended to the SNP table giving the Fst for each retained SNP.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff aggregate_gd_indivs.xml
--- a/aggregate_gd_indivs.xml	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-<tool id="gd_sum_gd_snp" name="Aggregate Individuals" version="1.0.0">
-  <description>: Append summary columns for a population</description>
-
-  <command interpreter="python">
-    modify_snp_table.py "$input" "$p1_input" "$output" "-1" "-1" "-1" "-1"
-    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
-        #set $arg = '%s:%s' % ($individual_col, $individual)
-        "$arg"
-    #end for
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
-    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
-  </inputs>
-
-  <outputs>
-    <data name="output" format="gd_snp" metadata_source="input" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
-      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
-      <param name="choice" value="1" />
-      <param name="lo_coverage" value="0" />
-      <param name="hi_coverage" value="1000" />
-      <param name="low_ind_cov" value="3" />
-      <param name="lo_quality" value="30" />
-      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
-    </test>
-  </tests>
-
-  <help>
-
-**Dataset formats**
-
-The input datasets are in gd_snp_ and gd_indivs_ formats.
-The output dataset is in gd_snp_ format.  (`Dataset missing?`_)
-
-.. _gd_snp: ./static/formatHelp.html#gd_snp
-.. _gd_indivs: ./static/formatHelp.html#gd_indivs
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-The user specifies that some of the individuals in a gd_snp dataset form a
-"population", by supplying a list that has been previously created using the
-Specify Individuals tool.  The program appends a
-new "entity" (set of four columns) to the gd_snp table, analogous to the columns
-for an individual but containing summary data for the population as a group.
-These four columns give the total counts for the two alleles, the "genotype" for
-the population, and the maximum quality value, taken over all individuals in the
-population.  If all defined genotypes in the population are 2 (agree with the
-reference), then the population's genotype is 2, and similarly for 0; otherwise
-the genotype is 1 (unless all individuals have undefined genotype, in which case
-it is -1).
-
------
-
-**Example**
-
-- input gd_snp::
-
-    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
-    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0
-    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
-    etc.
-
-- input individuals::
-
-    9   PB1
-    13  PB2
-    17  PB3
-
-- output::
-
-    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0   29  0  2  72
-    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0    3  0  2  30
-    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0   13  0  2  42
-    etc.
-
-  </help>
-</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff average_fst.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/average_fst.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import sys
+import subprocess
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) < 12:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, p1_input, p2_input, data_source, min_total_count, discard_fixed, biased, output, shuffles, p0_input = sys.argv[1:11]
+individual_metadata = sys.argv[11:]
+
+try:
+    shuffle_count = int(shuffles)
+except:
+    shuffle_count = 0
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+if not p_total.is_superset(p1):
+    print >> sys.stderr, 'There is an individual in population 1 that is not in the SNP table'
+    sys.exit(1)
+
+p2 = Population()
+p2.from_population_file(p2_input)
+if not p_total.is_superset(p2):
+    print >> sys.stderr, 'There is an individual in population 2 that is not in the SNP table'
+    sys.exit(1)
+
+p0 = None
+if shuffle_count > 0:
+    p0 = Population()
+    p0.from_population_file(p0_input)
+    if not p_total.is_superset(p0):
+        print >> sys.stderr, 'There is an individual in population 0 that is not in the SNP table'
+        sys.exit(1)
+
+################################################################################
+
+prog = 'Fst_ave'
+
+args = []
+args.append(prog)
+args.append(input)
+args.append(data_source)
+args.append(min_total_count)
+args.append(discard_fixed)
+args.append(biased)
+args.append(shuffles)
+
+columns = p1.column_list()
+for column in columns:
+    args.append('{0}:1'.format(column))
+
+columns = p2.column_list()
+for column in columns:
+    args.append('{0}:2'.format(column))
+
+if p0 is not None:
+    columns = p0.column_list()
+    for column in columns:
+        args.append('{0}:0'.format(column))
+
+fh = open(output, 'w')
+
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
+rc = p.wait()
+fh.close()
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff average_fst.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/average_fst.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,113 @@
+<tool id="gd_average_fst" name="Overall FST" version="1.0.0">
+  <description>: Estimate the relative fixation index between two populations</description>
+
+  <command interpreter="python">
+    average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output"
+    #if $use_randomization.ur_choice == '1'
+      "$use_randomization.shuffles" "$use_randomization.p0_input"
+    #else
+      "0" "/dev/null"
+    #end if
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP table" />
+    <param name="p1_input" type="data" format="gd_indivs" label="Population 1 individuals" />
+    <param name="p2_input" type="data" format="gd_indivs" label="Population 2 individuals" />
+
+    <conditional name="data_source">
+      <param name="ds_choice" type="select" format="integer" label="Data source">
+          <option value="0" selected="true">sequence coverage and ..</option>
+          <option value="1">estimated genotype and ..</option>
+      </param>
+      <when value="0">
+        <param name="min_value" type="integer" min="1" value="1" label="Minimum total read count for a population" />
+      </when>
+      <when value="1">
+        <param name="min_value" type="integer" min="1" value="1" label="Minimum individual genotype quality" />
+      </when>
+    </conditional>
+
+    <param name="discard_fixed" type="select" label="Apparently fixed SNPs">
+      <option value="0">Retain SNPs that appear fixed in the two populations</option>
+      <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option>
+    </param>
+
+    <param name="biased" type="select" label="FST estimator">
+      <option value="0" selected="true">Wright's original definition</option>
+      <option value="1">Weir's unbiased estimator</option>
+    </param>
+
+    <conditional name="use_randomization">
+      <param name="ur_choice" type="select" format="integer" label="Use randomization">
+        <option value="0" selected="true">No</option>
+        <option value="1">Yes</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="shuffles" type="integer" min="0" value="0" label="Shuffles" />
+        <param name="p0_input" type="data" format="gd_indivs" label="Individuals for randomization" />
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="p2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="ds_choice" value="0" />
+      <param name="min_value" value="3" />
+      <param name="discard_fixed" value="1" />
+      <param name="biased" value="0" />
+      <param name="ur_choice" value="0" />
+      <output name="output" file="test_out/average_fst/average_fst.txt" />
+    </test>
+  </tests>
+
+  <help>
+
+**What it does**
+
+The user specifies a SNP table and two "populations" of individuals,
+both previously defined using the Specify Individuals tool.
+No individual can be in both populations.  Other choices are as follows.
+
+Data source.  The allele frequencies of a SNP in the two populations can be
+estimated either by the total number of reads of each allele, or by adding
+the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the data source, the user sets lower bounds on amount
+of data required at a SNP.  For estimating the Fst using read counts,
+the bound is the minimum count of reads of the two alleles in a population.
+For estimations based on genotype, the bound is the minimum reported genotype
+quality per individual.  SNPs not meeting these lower bounds are ignored.
+
+The user specifies whether SNPs where both populations appear to be fixed
+for the same allele should be retained or discarded.
+
+The user chooses which definition of Fst to use: Wright's original definition
+or Weir's unbiased estimator.
+
+Finally, the user decides whether to use randomizations.  If so, then the
+user specifies how many randomly generated population pairs (retaining
+the numbers of individuals of the originals) to generate, as well as the
+"population" of additional individuals (not in the first two populations)
+that can be used in the randomization process.
+
+The program prints the average Fst for the original populations and the
+number of SNPs used to compute it.  If randomizations were requested,
+it prints the average Fst for each randomly generated population pair,
+ending with a summary that includes the maximum and average value, and the
+highest-scoring population pair.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff calclenchange.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calclenchange.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       calclenchange.py
+#       
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#       
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#       
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#       
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,mechanize,os,sys
+from decimal import Decimal,getcontext
+from xml.etree.ElementTree import ElementTree,tostring
+import networkx as nx
+from copy import copy
+
+#method to rank the the pthways by mut. freq.
+def rankdN(ltfreqs):
+	ordvals=sorted(ltfreqs)#sort and reverse freqs.
+	#~ 
+	outrnk=[]
+	tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value
+	if tmpOri=='C':
+		if tmpMut!='C':
+			tmpChng0='C-%s'%tmpMut
+		else:
+			tmpChng0=Decimal('0')
+	crank=1
+	outrnk.append([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank),tmpPthw])
+	totalnvals=len(ordvals)
+	cnt=0
+	while totalnvals>cnt:
+		cnt+=1
+		tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop()
+		if tmpOri=='C':
+			if tmpMut!='C':
+				tmpChng='C-%s'%tmpMut
+			else:
+				tmpChng=Decimal('0')
+		if tmpChng!=tmpChng0:
+			crank=len(outrnk)+1
+			tmpChng0=tmpChng
+		outrnk.append([str(tmpChng),str(tmpOri),str(tmpMut),str(crank),tmpPthw])
+	return outrnk
+
+#method to rank the the pthways by mut. freq.
+def rankdAvr(ltfreqs):
+	ordvals=sorted(ltfreqs)#sort and reverse freqs.
+	#~ 
+	outrnk={}
+	tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value
+	if tmpOri=='I':
+		if tmpMut!='I':
+			tmpChng0='I-%s'%tmpMut
+		else:
+			tmpChng0=Decimal('0')
+	crank=1
+	outrnk[tmpPthw]='\t'.join([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank)])
+	totalnvals=len(ordvals)
+	cnt=0
+	while totalnvals>cnt:
+		cnt+=1
+		tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop()
+		if tmpOri=='I':
+			if tmpMut!='I':
+				tmpChng='I-%s'%tmpMut
+			else:
+				tmpChng=Decimal('0')
+		if tmpChng!=tmpChng0:
+			crank=len(outrnk)+1
+			tmpChng0=tmpChng
+		outrnk[tmpPthw]='\t'.join([str(tmpChng),str(tmpOri),str(tmpMut),str(crank)])
+	return outrnk
+
+#this method takes as input a list of pairs of edges(beginNod,endNod) and returns a list of nodes with indegree 0 and outdegree 0
+def returnstartanendnodes(edges):
+	listID0st=set()#starts
+	listOD0en=set()#end
+	for beginNod,endNod in edges:# O(n)
+		listID0st.add(beginNod)
+		listOD0en.add(endNod)
+	startNdsID0=listID0st.difference(listOD0en)
+	endNdsOD0=listOD0en.difference(listID0st)
+	return startNdsID0,endNdsOD0
+
+#~ Method to return nodes and edges
+def returnNodesNEdgesfKXML(fpthwKGXML):
+	#~ 
+	tree = ElementTree()
+	ptree=tree.parse(fpthwKGXML)
+	#~ 
+	title=ptree.get('title')
+	prots=ptree.findall('entry')
+	reactns=ptree.findall('reaction')
+	#~ 
+	edges,ndstmp=set(),set()
+	nreactns=len(reactns)
+	cr=0#count reacts
+	while nreactns>cr:
+		cr+=1
+		reactn=reactns.pop()
+		mainid=reactn.get('id')
+		ndstmp.add(mainid)#add node
+		reacttyp=reactn.get('type')
+		sbstrts=reactn.findall('substrate')
+		while len(sbstrts)>0:
+			csbstrt=sbstrts.pop()
+			csbtsid=csbstrt.get('id')
+			ndstmp.add(csbtsid)#add node
+			if reacttyp=='irreversible':
+				edges.add((csbtsid,mainid))#add edges
+			elif reacttyp=='reversible':
+				edges.add((mainid,csbtsid))#add edges
+				edges.add((csbtsid,mainid))#add edges
+		#~ 
+		prdcts=reactn.findall('product')
+		while len(prdcts)>0:
+			prdct=prdcts.pop()
+			prodctid=prdct.get('id')
+			ndstmp.add(prodctid)#add node
+			if reacttyp=='irreversible':
+				edges.add((mainid,prodctid))#add edges
+			elif reacttyp=='reversible':
+				edges.add((mainid,prodctid))#add edges
+				edges.add((prodctid,mainid))#add edges
+	#~ Nodes
+	nprots=len(prots)
+	cp=0#count prots
+	dnodes={}
+	while nprots>cp:
+		cp+=1
+		prot=prots.pop()
+		tmpProtnm=prot.get('id')
+		if tmpProtnm in ndstmp:
+			dnodes[prot.get('id')]=set(prot.get('name').split())#each genename for each Id
+	return dnodes,edges,title
+
+#~ make calculation on pathways
+def rtrnAvrgLen(edges,strNds,endNds):
+	wG=nx.DiGraph()#reference graph
+	wG.add_edges_from(edges)
+	dPairsSrcSnks=nx.all_pairs_shortest_path_length(wG)#dictionary between sources and sink and length
+	nstartNdsID0=len(strNds)
+	cstrtNds=0
+	nPaths=0
+	lPathLen=[]
+	while nstartNdsID0>cstrtNds:
+		cStartNd=strNds.pop()#current start node
+		dEndNdsLen=dPairsSrcSnks.pop(cStartNd)
+		for cendNd in dEndNdsLen:
+			if cendNd in endNds:
+				lPathLen.append(dEndNdsLen[cendNd])
+				nPaths+=1
+		cstrtNds+=1
+	AvrgPthLen=0
+	if nPaths!=0:
+		AvrgPthLen=Decimal(sum(lPathLen))/Decimal(str(nPaths))
+	return nPaths,AvrgPthLen
+
+def main():
+	parser = argparse.ArgumentParser(description='Rank pathways based on the change in length and number of paths connecting sources and sinks.')
+	parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database')
+	parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file')
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the diference between column 2 and column 3, Column 2 is the pathway average length (between sources and sinks) including the genes in the input list, Column 3 is the pathway average length EXCLUDING the genes in the input list, Column 4 is the rank based on column 1. Column 5 is the diference between column 6 and column 7, Column 6 is the number of paths between sources and sinks, including the genes in the input list, Column 7 is the number of paths between sources and sinks EXCLUDING the genes in the input list, Column 8 is the rank based on column 5. Column 9 I the pathway name' )
+	parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name')
+	parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	#~ 
+	#~Open arguments 
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,loc_file,species,output,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn
+	posKEGGclmn-=1#correct pos
+	Kgeneposcolmn-=1
+	#~ Get the extra variables
+	crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0]
+	sppPrefx,dinput=crDB[1],crDB[2]
+	#~ set decimal positions
+	getcontext().prec = 3
+	#make a dictionary of valid genes
+	dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()])
+	sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1])
+	while True:#to crrect names with more than one gene
+		try:
+			mgenes=sdGenes.pop()
+			pthwsAssotd=dKEGGcPthws.pop(mgenes)
+			mgenes=mgenes.split('.')
+			for eachg in mgenes:
+				dKEGGcPthws[eachg]=pthwsAssotd
+		except:
+			break
+	#~ 
+	lPthwsF=[x for x in os.listdir(dinput) if x.find('.xml')>-1 if x not in ['cfa04070.xml']]
+	nPthws=len(lPthwsF)
+	cPthw=0
+	lPthwPthN=[]#the output list for number of paths
+	lPthwPthAvr=[]#the output list for the length of paths
+	#~ 
+	while cPthw<nPthws:
+		cPthw+=1
+		KEGGpathw=lPthwsF.pop()
+		comdKEGGpathw=KEGGpathw.split('.')[0]
+		tmpddGenrcgenPresent=set()
+		sKEGGc=dKEGGcPthws.keys()
+		lsKEGGc=len(sKEGGc)
+		ctPthw=0
+		while ctPthw < lsKEGGc:#to save memory
+			eachK=sKEGGc.pop()
+			alPthws=dKEGGcPthws[eachK]
+			if comdKEGGpathw in alPthws:
+				tmpddGenrcgenPresent.add(':'.join([sppPrefx,eachK]))
+			ctPthw+=1
+		#~ Make graph calculations	
+		dnodes,edges,title=returnNodesNEdgesfKXML(open(os.path.join(dinput,KEGGpathw)))
+		startNdsID0,endNdsOD0=returnstartanendnodes(edges)
+		startNdsOri=copy(startNdsID0)
+		#~ 
+		nPaths='C'#stands for circuit
+		AvrgPthLen='I'#stand for infinite
+		if len(startNdsID0)>0 and len(endNdsOD0)>0:
+			nPaths,AvrgPthLen=rtrnAvrgLen(edges,startNdsID0,endNdsOD0)
+		#~ work with the genes in the list
+		genestodel=set()
+		lnodes=len(dnodes)
+		sNds=set(dnodes)
+		ctPthw=0
+		while ctPthw<lnodes:
+			ctPthw+=1
+			cNod=sNds.pop()
+			sgenes=dnodes.pop(cNod)
+			if len(sgenes.intersection(tmpddGenrcgenPresent))==len(sgenes):
+				genestodel.add(cNod)
+		#~ del nodes from graph edges
+		wnPaths,wAvrgPthLen=copy(nPaths),copy(AvrgPthLen)
+		if len(genestodel)>0:
+			wedges=set([x for x in edges if len(set(x).intersection(genestodel))==0])
+			wstartNds,wendNds=returnstartanendnodes(wedges)
+			if nPaths!='C':
+				wstartNds=[x for x in wstartNds if x in startNdsOri]
+				wendNds=[x for x in wendNds if x in endNdsOD0]
+			if len(wstartNds)>0 and len(wendNds)>0:
+				wnPaths,wAvrgPthLen=rtrnAvrgLen(wedges,wstartNds,wendNds)
+		#~ Calculate the differences
+		orNP,mutNP,oriLen,mutLen=nPaths,wnPaths,AvrgPthLen,wAvrgPthLen
+		if nPaths=='C':
+			orNP=Decimal('1000')
+			oriLen=Decimal('1000')
+		if wnPaths=='C':
+			mutNP=Decimal('1000')
+			mutLen=Decimal('1000')
+		lPthwPthN.append([orNP-mutNP,nPaths,wnPaths,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen
+		lPthwPthAvr.append([oriLen-mutLen,AvrgPthLen,wAvrgPthLen,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen
+	doutrnkPthN=rankdN(lPthwPthN)
+	doutrnkPthAvr=rankdAvr(lPthwPthAvr)
+	#~ 
+	sall=['\t'.join([doutrnkPthAvr[x[4]],'\t'.join(x)]) for x in doutrnkPthN]
+	salef=open(output,'w')
+	salef.write('\n'.join(sall))
+	salef.close()
+	return 0
+	
+
+if __name__ == '__main__':
+	main()
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff calctfreq.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calctfreq.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       calcfreq.py
+#       
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#       
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#       
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#       
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,os,sys
+from decimal import Decimal,getcontext
+from LocationFile import LocationFile
+
+#method to rank the the pthways by mut. freq.
+def rankd(ltfreqs):
+	ordvals=sorted(ltfreqs)#sort and reverse freqs.
+	#~ 
+	outrnk=[]
+	tmpFreq0,tmpCount,tmpPthw=ordvals.pop()#the highest possible value
+	crank=1
+	outrnk.append('\t'.join([str(tmpCount),str(tmpFreq0),str(crank),tmpPthw]))
+	totalnvals=len(ordvals)
+	cnt=0
+	while totalnvals>cnt:
+		cnt+=1
+		tmpFreq,tmpCount,tmpPthw=ordvals.pop()
+		if tmpFreq!=tmpFreq0:
+			crank=len(outrnk)+1
+			tmpFreq0=tmpFreq
+		outrnk.append('\t'.join([str(tmpCount),str(tmpFreq),str(crank),tmpPthw]))
+	return outrnk
+		
+
+def main():
+	parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the count of genes in the list, Column 2 is the percentage of the pathway genes present on the list. Column 3 is the rank based on column 2')
+	parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name')
+	parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code')
+	parser.add_argument('--loc_file',metavar='location file',type=str,help='location file')
+	parser.add_argument('--species',metavar='species',type=str,help='species')
+	#~Open arguments 
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,outputf,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn
+	locf,species=fulargs.loc_file,fulargs.species
+	#make a dictionary of valid genes
+	posKEGGclmn-=1
+	Kgeneposcolmn-=1
+	dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set(x.split('\t')[posKEGGclmn].split('.'))) for x in open(inputf).read().splitlines()[1:] if x.strip()])
+	sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1])
+	while True:#to correct names with more than one gene
+		try:
+			mgenes=sdGenes.pop()
+			pthwsAssotd=dKEGGcPthws.pop(mgenes)
+			mgenes=mgenes.split('.')
+			for eachg in mgenes:
+				dKEGGcPthws[eachg]=pthwsAssotd
+		except:
+			break
+	#~ Count genes
+	getcontext().prec=2#set 2 decimal places
+
+	location_file = LocationFile(locf)
+	prefix, kxml_dir_path, dict_file = location_file.get_values(species)
+	dPthContsTotls = {}
+	try:
+	    with open(dict_file) as fh:
+	        for line in fh:
+	            line = line.rstrip('\r\n')
+	            value, key = line.split('\t')
+	            dPthContsTotls[key] = int(value)
+	except IOError, err:
+	    print >> sys.stderr, 'Error opening dict file {0}: {1}'.format(dict_file, err.strerror)
+	    sys.exit(1)
+
+	dPthContsTmp=dict([(x,0) for x in dPthContsTotls.keys()])#create a list of genes
+	sdGenes=set([x for x in dKEGGcPthws.keys()])#list of all genes
+	cntGens=0
+	ltGens=len(sdGenes)
+	while cntGens<ltGens:
+		cGen=sdGenes.pop()
+		sKEGGcPthws=dKEGGcPthws.pop(cGen)
+		for eachP in sKEGGcPthws:
+			if eachP!='N':
+				dPthContsTmp[eachP]+=1
+		cntGens+=1
+	#~ Calculate Freqs.
+	ltfreqs=[((Decimal(dPthContsTmp[x])/Decimal(dPthContsTotls[x])),Decimal(dPthContsTmp[x]),x) for x in dPthContsTotls]
+	tabllfreqs='\n'.join(rankd(ltfreqs))
+	salef=open(outputf,'w')
+	salef.write(tabllfreqs)
+	salef.close()
+	return 0
+	
+
+if __name__ == '__main__':
+	main()
diff -r fdb4240fb565 -r 8ae67e9fb6ff cdblib.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cdblib.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+
+'''
+Manipulate DJB's Constant Databases. These are 2 level disk-based hash tables
+that efficiently handle many keys, while remaining space-efficient.
+
+    http://cr.yp.to/cdb.html
+
+When generated databases are only used with Python code, consider using hash()
+rather than djb_hash() for a tidy speedup.
+'''
+
+from _struct import Struct
+from itertools import chain
+
+
+def py_djb_hash(s):
+    '''Return the value of DJB's hash function for the given 8-bit string.'''
+    h = 5381
+    for c in s:
+        h = (((h << 5) + h) ^ ord(c)) & 0xffffffff
+    return h
+
+try:
+    from _cdblib import djb_hash
+except ImportError:
+    djb_hash = py_djb_hash
+
+read_2_le4 = Struct('<LL').unpack
+write_2_le4 = Struct('<LL').pack
+
+
+class Reader(object):
+    '''A dictionary-like object for reading a Constant Database accessed
+    through a string or string-like sequence, such as mmap.mmap().'''
+
+    def __init__(self, data, hashfn=djb_hash):
+        '''Create an instance reading from a sequence and using hashfn to hash
+        keys.'''
+        if len(data) < 2048:
+            raise IOError('CDB too small')
+
+        self.data = data
+        self.hashfn = hashfn
+
+        self.index = [read_2_le4(data[i:i+8]) for i in xrange(0, 2048, 8)]
+        self.table_start = min(p[0] for p in self.index)
+        # Assume load load factor is 0.5 like official CDB.
+        self.length = sum(p[1] >> 1 for p in self.index)
+
+    def iteritems(self):
+        '''Like dict.iteritems(). Items are returned in insertion order.'''
+        pos = 2048
+        while pos < self.table_start:
+            klen, dlen = read_2_le4(self.data[pos:pos+8])
+            pos += 8
+
+            key = self.data[pos:pos+klen]
+            pos += klen
+
+            data = self.data[pos:pos+dlen]
+            pos += dlen
+
+            yield key, data
+
+    def items(self):
+        '''Like dict.items().'''
+        return list(self.iteritems())
+
+    def iterkeys(self):
+        '''Like dict.iterkeys().'''
+        return (p[0] for p in self.iteritems())
+    __iter__ = iterkeys
+
+    def itervalues(self):
+        '''Like dict.itervalues().'''
+        return (p[1] for p in self.iteritems())
+
+    def keys(self):
+        '''Like dict.keys().'''
+        return [p[0] for p in self.iteritems()]
+
+    def values(self):
+        '''Like dict.values().'''
+        return [p[1] for p in self.iteritems()]
+
+    def __getitem__(self, key):
+        '''Like dict.__getitem__().'''
+        value = self.get(key)
+        if value is None:
+            raise KeyError(key)
+        return value
+
+    def has_key(self, key):
+        '''Return True if key exists in the database.'''
+        return self.get(key) is not None
+    __contains__ = has_key
+
+    def __len__(self):
+        '''Return the number of records in the database.'''
+        return self.length
+
+    def gets(self, key):
+        '''Yield values for key in insertion order.'''
+        # Truncate to 32 bits and remove sign.
+        h = self.hashfn(key) & 0xffffffff
+        start, nslots = self.index[h & 0xff]
+
+        if nslots:
+            end = start + (nslots << 3)
+            slot_off = start + (((h >> 8) % nslots) << 3)
+
+            for pos in chain(xrange(slot_off, end, 8),
+                             xrange(start, slot_off, 8)):
+                rec_h, rec_pos = read_2_le4(self.data[pos:pos+8])
+
+                if not rec_h:
+                    break
+                elif rec_h == h:
+                    klen, dlen = read_2_le4(self.data[rec_pos:rec_pos+8])
+                    rec_pos += 8
+
+                    if self.data[rec_pos:rec_pos+klen] == key:
+                        rec_pos += klen
+                        yield self.data[rec_pos:rec_pos+dlen]
+
+    def get(self, key, default=None):
+        '''Get the first value for key, returning default if missing.'''
+        # Avoid exception catch when handling default case; much faster.
+        return chain(self.gets(key), (default,)).next()
+
+    def getint(self, key, default=None, base=0):
+        '''Get the first value for key converted it to an int, returning
+        default if missing.'''
+        value = self.get(key, default)
+        if value is not default:
+            return int(value, base)
+        return value
+
+    def getints(self, key, base=0):
+        '''Yield values for key in insertion order after converting to int.'''
+        return (int(v, base) for v in self.gets(key))
+
+    def getstring(self, key, default=None, encoding='utf-8'):
+        '''Get the first value for key decoded as unicode, returning default if
+        not found.'''
+        value = self.get(key, default)
+        if value is not default:
+            return value.decode(encoding)
+        return value
+
+    def getstrings(self, key, encoding='utf-8'):
+        '''Yield values for key in insertion order after decoding as
+        unicode.'''
+        return (v.decode(encoding) for v in self.gets(key))
+
+
+class Writer(object):
+    '''Object for building new Constant Databases, and writing them to a
+    seekable file-like object.'''
+
+    def __init__(self, fp, hashfn=djb_hash):
+        '''Create an instance writing to a file-like object, using hashfn to
+        hash keys.'''
+        self.fp = fp
+        self.hashfn = hashfn
+
+        fp.write('\x00' * 2048)
+        self._unordered = [[] for i in xrange(256)]
+
+    def put(self, key, value=''):
+        '''Write a string key/value pair to the output file.'''
+        assert type(key) is str and type(value) is str
+
+        pos = self.fp.tell()
+        self.fp.write(write_2_le4(len(key), len(value)))
+        self.fp.write(key)
+        self.fp.write(value)
+
+        h = self.hashfn(key) & 0xffffffff
+        self._unordered[h & 0xff].append((h, pos))
+
+    def puts(self, key, values):
+        '''Write more than one value for the same key to the output file.
+        Equivalent to calling put() in a loop.'''
+        for value in values:
+            self.put(key, value)
+
+    def putint(self, key, value):
+        '''Write an integer as a base-10 string associated with the given key
+        to the output file.'''
+        self.put(key, str(value))
+
+    def putints(self, key, values):
+        '''Write zero or more integers for the same key to the output file.
+        Equivalent to calling putint() in a loop.'''
+        self.puts(key, (str(value) for value in values))
+
+    def putstring(self, key, value, encoding='utf-8'):
+        '''Write a unicode string associated with the given key to the output
+        file after encoding it as UTF-8 or the given encoding.'''
+        self.put(key, unicode.encode(value, encoding))
+
+    def putstrings(self, key, values, encoding='utf-8'):
+        '''Write zero or more unicode strings to the output file. Equivalent to
+        calling putstring() in a loop.'''
+        self.puts(key, (unicode.encode(value, encoding) for value in values))
+
+    def finalize(self):
+        '''Write the final hash tables to the output file, and write out its
+        index. The output file remains open upon return.'''
+        index = []
+        for tbl in self._unordered:
+            length = len(tbl) << 1
+            ordered = [(0, 0)] * length
+            for pair in tbl:
+                where = (pair[0] >> 8) % length
+                for i in chain(xrange(where, length), xrange(0, where)):
+                    if not ordered[i][0]:
+                        ordered[i] = pair
+                        break
+
+            index.append((self.fp.tell(), length))
+            for pair in ordered:
+                self.fp.write(write_2_le4(*pair))
+
+        self.fp.seek(0)
+        for pair in index:
+            self.fp.write(write_2_le4(*pair))
+        self.fp = None # prevent double finalize()
diff -r fdb4240fb565 -r 8ae67e9fb6ff commits.log
--- a/commits.log	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-
-:7b775e5b68b4
-cathy  2012-09-28  00:55
-Galaxy didn't like my RST syntax.  :-/
-
-:9b5b4f73bd98
-cathy  2012-09-28  00:08
-Tweaks by Cathy, e.g. adjusting text where renamed tools are mentioned.
-Also riemerized through first section, "Initial Analysis".
-
-:93eeef51be96
-cathy  2012-09-27  14:03
-Fixed datatype bugs in the Filter SNPs and Aggregate Individuals tools.
-
-:119e1e904cc4
-cathy  2012-09-26  15:38
-Restored modify_snp_table.py from the archive, since it's still used by the
-Filter SNPs and Aggregate Individuals tools.
-
-:cc508d55cc9d
-cathy  2012-09-26  15:16
-Tweaked description for the Prepare Input tool.
-
-:cdb8430b1659
-cathy  2012-09-26  15:10
-Added ": " at the beginning of each description to separate it from the name.
-
-:3286bdea6b3d
-cathy  2012-09-26  13:01
-Clarified tool names and descriptions.
-
-:8a9bdfc0d31f
-cathy  2012-09-19  17:15
-Edited docs in aggregate_gd_indivs.xml: clarified "What it does", reformatted
-example data.
-
-:f7c6a18af605
-cathy  2012-09-19  11:31
-Edited docs in specify.xml: clarified "What it does", reformatted example data.
-
diff -r fdb4240fb565 -r 8ae67e9fb6ff coverage_distributions.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coverage_distributions.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+
+import os
+import errno
+import sys
+import shutil
+import subprocess
+from Population import Population
+import gd_composite
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+################################################################################
+
+if len(sys.argv) < 7:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, data_source, output, extra_files_path = sys.argv[1:5]
+
+individual_metadata = []
+population_info = []
+p1_input = None
+all_individuals = False
+
+for arg in sys.argv[5:]:
+    if arg == 'all_individuals':
+        all_individuals = True
+    elif len(arg) > 12 and arg[:12] == 'individuals:':
+        p1_input = arg[12:]
+    elif len(arg) > 11:
+        if arg[:11] == 'population:':
+            file, name = arg[11:].split(':', 1)
+            population_info.append((file, name))
+        elif arg[:11] == 'individual:':
+            individual_metadata.append(arg[11:])
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+################################################################################
+
+mkdir_p(extra_files_path)
+
+################################################################################
+
+prog = 'coverage'
+
+args = []
+args.append(prog)
+args.append(input)
+args.append(data_source)
+
+user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
+args.append(user_coverage_file)
+
+population_list = []
+
+if all_individuals:
+    tags = p_total.tag_list()
+elif p1_input is not None:
+    p1 = Population()
+    this_pop = Population()
+    this_pop.from_population_file(p1_input)
+    population_list.append(this_pop)
+    p1.from_population_file(p1_input)
+    if not p_total.is_superset(p1):
+        print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+        sys.exit(1)
+    tags = p1.tag_list()
+else:
+    tags = []
+    for population_file, population_name in population_info:
+        population = Population()
+        this_pop = Population()
+        this_pop.from_population_file(population_file)
+        population_list.append(this_pop)
+        population.from_population_file(population_file)
+        if not p_total.is_superset(population):
+            print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name)
+            sys.exit(1)
+        columns = population.column_list()
+        for column in columns:
+            tags.append('{0}:{1}'.format(column, population_name))
+
+for tag in tags:
+    args.append(tag)
+
+## text output
+coverage_file = 'coverage.txt'
+fh = open(coverage_file, 'w')
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
+rc = p.wait()
+fh.close()
+
+## graphical output
+fh = open(coverage_file)
+coverage2_file = 'coverage2.txt'
+ofh = open(coverage2_file, 'w')
+
+for line in fh:
+    line = line.rstrip('\r\n')
+    elems = line.split('\t')
+    name = elems.pop(0)
+    values = [ elems[0] ]
+    for idx in range(1, len(elems)):
+        val = str(float(elems[idx]) - float(elems[idx-1]))
+        values.append(val)
+    print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
+
+fh.close()
+ofh.close()
+
+################################################################################
+
+prog = 'R'
+
+args = []
+args.append(prog)
+args.append('--vanilla')
+args.append('--quiet')
+
+_realpath = os.path.realpath(__file__)
+_script_dir = os.path.dirname(_realpath)
+r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
+
+ifh = open(r_script_file)
+ofh = open('/dev/null', 'w')
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
+rc = p.wait()
+ifh.close()
+ofh.close()
+
+pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
+shutil.copy2('coverage.pdf', pdf_file)
+os.remove('coverage.pdf')
+os.remove(coverage2_file)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Coverage distributions Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
+out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_txt)
+
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+
+info_page.add_input_parameter(in_data_source)
+
+if population_list:
+    misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+    info_page.add_misc(misc_populations)
+else:
+    misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
+    info_page.add_misc(misc_individuals)
+
+
+
+
+with open (output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff coverage_distributions.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coverage_distributions.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,122 @@
+<tool id="gd_coverage_distributions" name="Coverage Distributions" version="1.0.0">
+  <description>: Examine sequence coverage for SNPs</description>
+
+  <command interpreter="python">
+    coverage_distributions.py "$input" "0" "$output" "$output.files_path"
+    #if $individuals.choice == '0'
+      "all_individuals"
+    #else if $individuals.choice == '1'
+      #set $arg = 'individuals:%s' % str($individuals.p1_input)
+        "$arg"
+    #else if $individuals.choice == '2'
+      #for $population in $individuals.populations
+        #set $arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name))
+        "$arg"
+      #end for
+    #end if
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+      #set $individual_arg = 'individual:%s:%s' % ($individual_col, $individual)
+      "$individual_arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Compute for">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Individuals in a population</option>
+        <option value="2">Totals of populations</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+      </when>
+      <when value="2">
+        <repeat name="populations" title="Population" min="1">
+          <param name="p_input" type="data" format="gd_indivs" label="individuals" />
+        </repeat>
+      </when>
+    </conditional> 
+
+    <!--
+    <param name="data_source" type="select" label="Data source">
+      <option value="0" selected="true">Sequence coverage</option>
+      <option value="1">Genotype quality</option>
+    </param>
+    -->
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/coverage_distributions/coverage.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="coverage.pdf" value="test_out/coverage_distributions/coverage.pdf" compare="sim_size" delta = "1000"/>
+        <extra_files type="file" name="coverage.txt" value="test_out/coverage_distributions/coverage.txt" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_snp_ format.
+The output is a composite dataset, containing both a text table and a PDF plot.
+(`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool reports distributions of a SNP reliability indicator, in this case
+sequence coverage, for individuals or populations.  
+The coverage can be computed for all individuals, a subset of individuals,
+or totals for populations defined by the Specify Individuals tool.
+The results are reported as a text table giving the cumulative distributions,
+and as a plot.
+
+-----
+
+**Examples**
+
+- input::
+
+    chr1  14929  A  G  999    21  30  1  127   7  11   1  28   7  29   0   5   2  5   1  17  10  14  1  81   17  74  1   42  15  22  1  125   29  84  1   88   6  10  1  11  30  23  1  79  19  1  2  71  24  0   2   99  41  10   2    2
+    chr1  17451  C  T  6.88  119   1  2  255  12   0   2  63  35   0   2  59  14  0   2  72  19   1  2  57  101   1  2  255  38   8  1   20  125   0  2  255  13   0  2  62  42   0  2  51  44  0  2  64  26  0   2  108  59   0   2  194
+    chr1  30922  G  T  999     0  23  0   66   0   0  -1   0   0   0  -1   0   0  0  -1   0   0   2  0   3    0  14  0   39  14  16  1  153    0  45  0  132   6   0  2  48  19   0  2  87   3  0  2  32   0  0  -1   0    0   0  -1    0
+    etc.
+
+- text output::
+
+                0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
+     John West  0  0  0  0  0  0  0  0  1  1  1  1  2  2  3  3  4  4  5  6
+       NA12892  0  2  5 11 20 31 43 55 67 77 84 90 93 96 97 98 99 99 99 99
+       NA12891  0  0  0  0  0  1  1  2  3  5  6  9 11 15 19 23 29 35 41 47
+       NA12249  1  4 11 23 38 54 68 79 88 93 96 98 99 99 99 99 99 99 99 99
+       NA12342  0  0  1  1  2  4  6  9 13 18 23 29 36 43 50 58 65 71 77 82
+           KB1  0  0  0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  2  2
+           ABT  0  0  0  0  0  0  1  1  1  2  3  4  5  6  8 10 12 14 18 21
+       NA18507  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  1
+       NA19238  0  0  0  1  2  4  6 10 14 19 25 32 39 47 55 62 69 76 81 86
+       NA19239  0  0  0  0  1  1  2  4  5  8 11 15 19 24 31 37 44 51 58 65
+            YH  2  4  6  7  8  8  9 10 11 12 14 17 19 22 25 29 32 36 40 45
+        KOREAN  0  0  1  1  3  4  5  7 10 12 15 19 22 27 31 37 42 48 54 60
+           JPT  0  0  0  0  0  0  0  0  1  1  1  2  2  3  4  5  7  8 10 12
+           etc.
+
+graphical output:
+
+.. image:: ${static_path}/images/gd_coverage.png
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff coverage_plot.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coverage_plot.r	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,31 @@
+x <- read.table('coverage2.txt', skip=1, sep='\t')
+
+individuals <- dim(x)[1]
+max_cov <- dim(x)[2] - 2
+max_val <- max(x[-1]) / 100
+colors <- rainbow(individuals)
+
+line_width = 3
+xt = t(x)
+
+xvals <- c(0:max_cov)
+values <- as.numeric(as.vector(xt[,1][-1]))/100
+
+pdf(file='coverage.pdf', onefile=TRUE, width=10, height=6);
+
+plot(xvals, values, type='l', ylim=c(0, max_val), xlim=c(0, max_cov), col=colors[1], lwd=line_width, xlab="Coverage", ylab="Proportion")
+
+if (individuals > 1) {
+    for (i in 2:individuals) {
+        values <- as.numeric(as.vector(xt[,i][-1]))/100;
+        lines(xvals, values, col=colors[i], lwd=line_width);
+    }
+}
+
+
+names <- as.vector(t(x[1]))
+legend(x='topright', legend=names, fill=colors, bty='n')
+
+dev.off()
+
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff datatypes_conf.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="wsf.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="gd_indivs" type="galaxy.datatypes.wsf:Individuals" display_in_upload="true"/>
+    <datatype extension="gd_ped" type="galaxy.datatypes.wsf:Wped" display_in_upload="true"/>
+    <datatype extension="gd_snp" type="galaxy.datatypes.wsf:GDSnp" display_in_upload="true"/>
+    <datatype extension="gd_sap" type="galaxy.datatypes.wsf:GDSap" display_in_upload="true"/>
+  </registration>
+  <sniffers/>
+</datatypes>
diff -r fdb4240fb565 -r 8ae67e9fb6ff dpmix.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dpmix.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+
+import errno
+import sys
+import os
+import subprocess
+from Population import Population
+import gd_composite
+from dpmix_plot import make_dpmix_plot
+from LocationFile import LocationFile
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def run_program(prog, args, stdout_file=None, space_to_tab=False):
+    #print "args: ", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if stdout_file is not None:
+        with open(stdout_file, 'w') as ofh:
+            lines = stdoutdata.split('\n')
+            for line in lines:
+                line = line.strip()
+                if line:
+                    if space_to_tab:
+                        line = line.replace(' ', '\t')
+                    print >> ofh, line
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+if len(sys.argv) < 15:
+    print "usage"
+    sys.exit(1)
+
+input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:14]
+individual_metadata = sys.argv[14:]
+
+chrom = 'all'
+add_logs = '0'
+
+loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
+location_file = LocationFile(loc_path)
+heterochrom_path = location_file.get_values_if_exists(dbkey)
+if heterochrom_path is None:
+    heterochrom_path = '/dev/null'
+
+population_list = []
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+ap1 = Population(name='Ancestral population 1')
+ap1.from_population_file(ap1_input)
+population_list.append(ap1)
+if not p_total.is_superset(ap1):
+    print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table'
+    sys.exit(1)
+
+ap2 = Population(name='Ancestral population 2')
+ap2.from_population_file(ap2_input)
+population_list.append(ap2)
+if not p_total.is_superset(ap2):
+    print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table'
+    sys.exit(1)
+
+p = Population(name='Potentially admixed')
+p.from_population_file(p_input)
+population_list.append(p)
+if not p_total.is_superset(p):
+    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+    sys.exit(1)
+
+mkdir_p(output2_dir)
+
+################################################################################
+# Create tabular file
+################################################################################
+
+misc_file = os.path.join(output2_dir, 'misc.txt')
+
+prog = 'dpmix'
+args = [ prog ]
+args.append(input)
+args.append(ref_column)
+args.append(chrom)
+args.append(data_source)
+args.append(add_logs)
+args.append(switch_penalty)
+args.append(heterochrom_path)
+args.append(misc_file)
+
+columns = ap1.column_list()
+for column in columns:
+    args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
+
+columns = ap2.column_list()
+for column in columns:
+    args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name))
+
+columns = p.column_list()
+for column in columns:
+    args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
+
+run_program(None, args, stdout_file=output, space_to_tab=True)
+
+################################################################################
+# Create pdf file
+################################################################################
+
+pdf_file = os.path.join(output2_dir, 'dpmix.pdf')
+make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir)
+
+################################################################################
+# Create html
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('dpmix Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file)
+out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_misc)
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)
+
+info_page.add_input_parameter(in_data_source)
+info_page.add_input_parameter(in_switch_penalty)
+
+misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+
+info_page.add_misc(misc_populations)
+
+with open(output2, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+sys.exit(0)
+
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff dpmix.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dpmix.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,93 @@
+<tool id="gd_dpmix" name="Admixture" version="1.0.0">
+  <description>: Map genomic intervals resembling specified ancestral populations</description>
+
+  <command interpreter="python">
+    dpmix.py "$input" "$data_source" "$switch_penalty" "$ap1_input" "$ap2_input" "$p_input" "$output" "$output2" "$output2.files_path" "$input.dataset.metadata.dbkey" "$input.dataset.metadata.ref" "$GALAXY_DATA_INDEX_DIR" "gd.heterochromatic.loc"
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+      #set $arg = '%s:%s' % ($individual_col, $individual)
+      "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="Dataset">
+      <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+    </param>
+    <param name="ap1_input" type="data" format="gd_indivs" label="Ancestral population 1 individuals" />
+    <param name="ap2_input" type="data" format="gd_indivs" label="Ancestral population 2 individuals" />
+    <param name="p_input" type="data" format="gd_indivs" label="Potentially admixed individuals" />
+
+    <param name="data_source" type="select" format="integer" label="Data source">
+      <option value="0" selected="true">sequence coverage</option>
+      <option value="1">estimated genotype</option>
+    </param>
+
+    <param name="switch_penalty" type="integer" min="0" value="10" label="Switch penalty" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+    <data name="output2" format="html" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="ap1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
+      <param name="ap2_input" value="test_in/b.gd_indivs" ftype="gd_indivs" />
+      <param name="p_input" value="test_in/c.gd_indivs" ftype="gd_indivs" />
+      <param name="data_source" value="0" />
+      <param name="switch_penalty" value="10" />
+
+      <output name="output" file="test_out/dpmix/dpmix.tabular" />
+
+      <output name="output2" file="test_out/dpmix/dpmix.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="dpmix.pdf" value="test_out/dpmix/dpmix.pdf" compare="sim_size" delta = "10000" />
+        <extra_files type="file" name="misc.txt" value="test_out/dpmix/misc.txt" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_ and gd_indivs_ formats.  It is important for
+the Individuals datasets to have unique names and for there to be no overlap
+between the two populations.  Rename these datasets if
+needed to make them unique.  
+There are two output datasets, one tabular_ and one composite. (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user specifies two "ancestral" populations (i.e., sources for
+chromosomes) and a set of potentially admixed individuals, and chooses
+between the sequence coverage or the estimated genotypes to measure
+the similarity of genomic intervals in admixed individuals to the two
+classes of ancestral chromosomes.  The user also picks a "switch penalty",
+typically between 10 and 100.  For each potentially admixed individual,
+the program divides the genome into three "genotypes": (0) homozygous
+for the first ancestral population (i.e., both chromosomes from that
+population), (1) heterozygous, or (2) homozygous for the second ancestral
+population.  Parts of a chromosome that are labeled as "heterochromatic"
+are given the non-genotype, 3.  Smaller values of the switch penalty
+(corresponding to more ancient admixture events) generally lead to the
+reconstruction of more frequent changes between genotypes.
+
+There are two output datasets generated.  A tabular dataset with chromosome,
+start, stop, and pairs of columns containing the "genotypes" from above
+and label from the admixed individual.  The second dataset is a composite
+dataset with general information from the run and a link to a pdf which
+graphically shows the ancestral population along each of the chromosomes.
+The second link is to a text file with summary information of the 
+"genotypes" over the whole genome.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff dpmix_plot.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dpmix_plot.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,297 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import math
+import matplotlib as mpl
+mpl.use('PDF')
+import matplotlib.pyplot as plt
+from matplotlib.path import Path
+import matplotlib.patches as patches
+
+################################################################################
+
+def build_chrom_len_dict(dbkey, galaxy_data_index_dir):
+    chrom_len_root = os.path.join(galaxy_data_index_dir, 'shared/ucsc/chrom')
+    chrom_len_file = '{0}.len'.format(dbkey)
+    chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
+
+    chrom_len = {}
+
+    try:
+        with open(chrom_len_path) as fh:
+            for line in fh:
+                line = line.rstrip('\r\n')
+                elems = line.split()
+                if len(elems) == 2:
+                    chrom = elems[0]
+                    length = int(elems[1])
+                    chrom_len[chrom] = length
+    except:
+        pass
+
+    return chrom_len
+
+def parse_input_file(input_file):
+    chroms = []
+    individuals = []
+    data = {}
+    chrom_len = {}
+
+    with open(input_file) as fh:
+        for line in fh:
+            line = line.strip()
+            if line:
+                elems = line.split()
+                chrom = elems[0]
+                p1, p2, state = map(int, elems[1:4])
+                id = elems[4]
+
+                if chrom not in chroms:
+                    chroms.append(chrom)
+
+                if id not in individuals:
+                    individuals.append(id)
+
+                data.setdefault(chrom, {})
+                data[chrom].setdefault(id, [])
+                data[chrom][id].append((p1, p2, state))
+
+                if p2 > chrom_len.setdefault(chrom, 0):
+                    chrom_len[chrom] = p2
+
+    return chroms, individuals, data, chrom_len
+
+def check_chroms(chroms, chrom_len, dbkey):
+    error = 0
+    for chrom in chroms:
+        if chrom not in chrom_len:
+            print >> sys.stderr, "Can't find length for {0} chromosome {1}".format(dbkey, chrom)
+            error = 1
+    if error:
+        sys.exit(1)
+
+def check_data(data, chrom_len, dbkey):
+    error = 0
+    for chrom in data:
+        chrom_beg = 0
+        chrom_end = chrom_len[chrom]
+        for individual in data[chrom]:
+            for p1, p2, state in data[chrom][individual]:
+                if p1 >= p2:
+                    print >> sys.stderr, "Bad data line: begin >= end: {0} {1} {2} {3}".format(chrom, p1, p2, state, individual)
+                    error = 1
+                if p1 < chrom_beg or p2 > chrom_end:
+                    print >> sys.stderr, "Bad data line: outside {0} boundaries[{1} - {2}]: {3} {4} {5} {6}".format(dbkey, chrom_beg, chrom_end, chrom, p1, p2, state, individual)
+                    error = 1
+    if error:
+        sys.exit(1)
+
+def make_rectangle(p1, p2, color, bottom=0.0, top=1.0):
+    verts = [
+        (p1, bottom),   # left, bottom
+        (p1, top),      # left, top
+        (p2, top),      # right, top
+        (p2, bottom),   # right, bottom
+        (0.0, 0.0)      # ignored
+    ]
+
+    codes = [
+        Path.MOVETO,
+        Path.LINETO,
+        Path.LINETO,
+        Path.LINETO,
+        Path.CLOSEPOLY
+    ]
+
+    path = Path(verts, codes)
+    return patches.PathPatch(path, facecolor=color, lw=0)
+
+def make_split_rectangle(p1, p2, top_color, bottom_color):
+    patch1 = make_rectangle(p1, p2, bottom_color, top=0.5)
+    patch2 = make_rectangle(p1, p2, top_color, bottom=0.5)
+    return [patch1, patch2]
+
+def make_state_rectangle(p1, p2, state, chrom, individual):
+    if state == 0:
+        return [ make_rectangle(p1, p2, 'r') ]
+    elif state == 1:
+        return make_split_rectangle(p1, p2, 'r', 'g')
+    elif state == 2:
+        return [ make_rectangle(p1, p2, 'g') ]
+    elif state == 3:
+        return [ make_rectangle(p1, p2, '#c7c7c7') ]
+    else:
+        print >> sys.stderr, "Unknown state: {0}: {1} {2} {3} {4}".format(state, chrom, p1, p2, state, individual)
+        sys.exit(1)
+
+def nicenum(num, round=False):
+    if num == 0:
+        return 0.0
+
+    exp = int(math.floor(math.log10(num)))
+    f = num / math.pow(10, exp)
+
+    if round:
+        if f < 1.5:
+            nf = 1.0
+        elif f < 3.0:
+            nf = 2.0
+        elif f < 7.0:
+            nf = 5.0
+        else:
+            nf = 10.0
+    else:
+        if f <= 1.0:
+            nf = 1.0
+        elif f <= 2.0:
+            nf = 2.0
+        elif f <= 5.0:
+            nf = 5.0
+        else:
+            nf = 10.0
+
+    return nf * pow(10, exp)
+
+def tick_foo(beg, end, loose=False):
+    ntick = 10
+
+    range = nicenum(end - beg, round=False)
+    d = nicenum(range/(ntick - 1), round=True)
+    digits = int(math.floor(math.log10(d)))
+
+    if loose:
+        graph_min = math.floor(beg/d) * d
+        graph_max = math.ceil(end/d) * d
+    else:
+        graph_min = beg
+        graph_max = end
+
+    nfrac = max([-1 * digits, 0])
+    vals = []
+
+    stop = graph_max
+    if loose:
+        stop = graph_max + (0.5 * d)
+
+    x = graph_min
+    while x <= stop:
+        vals.append(int(x))
+        x += d
+
+    vals = vals[1:]
+
+#    if not loose:
+#        if vals[-1] < graph_max:
+#            vals.append(int(graph_max))
+
+    labels = []
+    for val in vals:
+        labels.append('{0}'.format(int(val/math.pow(10, digits))))
+
+#   labels.append('{0:.1f}'.format(vals[-1]/math.pow(10, digits)))
+
+    return vals, labels
+
+################################################################################
+
+def make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir):
+    fs_chrom_len = build_chrom_len_dict(input_dbkey, galaxy_data_index_dir)
+    chroms, individuals, data, chrom_len = parse_input_file(input_file)
+
+    for chrom in chrom_len.keys():
+        if chrom in fs_chrom_len:
+            chrom_len[chrom] = fs_chrom_len[chrom]
+
+    #check_chroms(chroms, chrom_len, input_dbkey)
+    check_data(data, chrom_len, input_dbkey)
+
+    ## units below are inches
+    top_space = 0.10
+    chrom_space = 0.25
+    chrom_height = 0.25
+    ind_space = 0.10
+    ind_height = 0.25
+
+    total_height = 0.0
+    at_top = True
+    for chrom in chroms:
+        if at_top:
+            total_height += (top_space + chrom_height)
+            at_top = False
+        else:
+            total_height += (top_space + chrom_space + chrom_height)
+    
+        individual_count = 0
+        for individual in individuals:
+            if individual in data[chrom]:
+                individual_count += 1
+        total_height += individual_count * (ind_space + ind_height)
+
+    width = 7.5
+    height = math.ceil(total_height)
+
+    bottom = 1.0
+
+    fig = plt.figure(figsize=(width, height))
+
+    at_top = True
+    for_webb = False
+
+    for chrom in chroms:
+        length = chrom_len[chrom]
+        vals, labels = tick_foo(0, length)
+
+        if at_top:
+            bottom -= (top_space + chrom_height)/height
+            at_top = False
+        else:
+            bottom -= (top_space + chrom_space + chrom_height)/height
+
+        if not for_webb:
+            ax = fig.add_axes([0.0, bottom, 1.0, chrom_height/height])
+            plt.axis('off')
+            plt.text(0.5, 0.5, chrom, fontsize=14, ha='center')
+
+        individual_count = 0
+        for individual in individuals:
+            if individual in data[chrom]:
+                individual_count += 1
+
+        i = 0
+        for individual in individuals:
+            if individual in data[chrom]:
+                i += 1
+
+                bottom -= (ind_space + ind_height)/height
+                if not for_webb:
+                    # [left, bottom, width, height]
+                    ax1 = fig.add_axes([0.0, bottom, 0.09, ind_height/height])
+                    plt.axis('off')
+                    plt.text(1.0, 0.5, individual, fontsize=10, ha='right', va='center')
+                # [left, bottom, width, height]
+                ax2 = fig.add_axes([0.10, bottom, 0.88, ind_height/height], frame_on=False)
+                ax2.set_xlim(0, length)
+                ax2.set_ylim(0, 1)
+                if i != individual_count:
+                    plt.axis('off')
+                else:
+                    if not for_webb:
+                        ax2.tick_params(top=False, left=False, right=False, labelleft=False)
+                        ax2.set_xticks(vals)
+                        ax2.set_xticklabels(labels)
+                    else:
+                        plt.axis('off')
+                for p1, p2, state in sorted(data[chrom][individual]):
+                    for patch in make_state_rectangle(p1, p2, state, chrom, individual):
+                        ax2.add_patch(patch)
+
+    plt.savefig(output_file)
+
+################################################################################
+
+if __name__ == '__main__':
+    input_dbkey, input_file, output_file, galaxy_data_index_dir = sys.argv[1:5]
+    make_dpmix_plot(input_dbkey, input_file, output_file, galaxy_data_index_dir)
+    sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff echo.bash
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/echo.bash	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+if [ $# -lt 3 ]; then
+    echo "usage"
+    exit 1
+fi
+
+input="$1"
+output="$2"
+shift 2
+
+for individual in "$@"; do
+    echo "$individual" >> "$output"
+done
+
+exit 0
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff evaluate_population_numbers.bash
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/evaluate_population_numbers.bash	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+if [ $# -ne 3 ]; then
+    echo "usage"
+    exit 1
+fi
+
+input_ped_file="$1"
+output_file="$2"
+max_populations="$3"
+
+ADMIXTURE=admixture
+
+for (( i=1; $i <= $max_populations; i++ )); do
+    $ADMIXTURE --cv "$input_ped_file" $i 2>&1 | grep CV | perl -ne 's/CV error/CVE/; print;' >> "$output_file"
+done
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff evaluate_population_numbers.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/evaluate_population_numbers.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,83 @@
+<tool id="gd_evaluate_population_numbers" name="Population Complexity" version="1.0.0">
+  <description>: Evaluate possible numbers of ancestral populations</description>
+
+  <command interpreter="bash">
+    evaluate_population_numbers.bash "${input.extra_files_path}/admix.ped" "$output" "$max_populations"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_ped" label="Dataset" />
+    <param name="max_populations" type="integer" min="1" value="5" label="Maximum number of populations" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="fake" ftype="gd_ped" >
+        <metadata name="base_name" value="admix" />
+        <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" />
+        <composite_data value="test_out/prepare_population_structure/admix.ped" />
+        <composite_data value="test_out/prepare_population_structure/admix.map" />
+        <edit_attributes type="name" value="fake" />
+      </param>
+      <param name="max_populations" value="2" />
+
+      <output name="output" file="test_out/evaluate_population_numbers/evaluate_population_numbers.txt" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_ped_ format.
+The output dataset is text.  (`Dataset missing?`_)
+
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a gd_ped dataset generated by the Prepare Input tool.
+For all possible numbers K of ancestral
+populations, from 1 up to a user-specified maximum, this tool produces values
+that indicate how well the data can be explained as genotypes from individuals
+derived from K ancestral populations.  These values are computed by a 5-fold
+cross-validation procedure, so that a good choice for K will exhibit a low
+cross-validation error (CVE) compared with other potential settings for K.
+
+-----
+
+**Acknowledgments**
+
+We use the program "Admixture", downloaded from
+
+http://www.genetics.ucla.edu/software/admixture/
+
+and described in the paper "Fast model-based estimation of ancestry in
+unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange,
+Genome Research 19 (2009), pp. 1655-1664. Admixture is called with the "--cv"
+flag to produce these values.
+
+----- 
+
+**Example**
+
+- output with max populations of 6::
+
+    CVE (K=1): 1.10120
+    CVE (K=2): 1.34683
+    CVE (K=3): 1.80611
+    CVE (K=4): 1.96339
+    CVE (K=5): 1.21522
+    CVE (K=6): 0.51501
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff extract_flanking_dna.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_flanking_dna.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function( parse_arguments=None ):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: ( None, arguments )
+    def main_decorator( to_decorate ):
+        def decorated_main( arguments=None ):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments( arguments )
+            rc = 1
+            try:
+                rc = to_decorate( options, arguments )
+            except Exception, err:
+                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
+                traceback.print_exc()
+            finally:
+                sys.exit( rc )
+        return decorated_main
+    return main_decorator
+
+def parse_arguments( arguments ):
+    parser = OptionParser()
+    parser.add_option('--input',
+                        type='string', dest='input',
+                        help='file of selected SNPs')
+    parser.add_option('--output',
+                        type='string', dest='output',
+                        help='output file')
+    parser.add_option('--snps_loc',
+                        type='string', dest='snps_loc',
+                        help='snps .loc file')
+    parser.add_option('--scaffold_col',
+                        type="int", dest='scaffold_col',
+                        help='scaffold column in the input file')
+    parser.add_option('--pos_col',
+                        type="int", dest='pos_col',
+                        help='position column in the input file')
+    parser.add_option('--output_format',
+                        type="string", dest='output_format',
+                        help='output format, fasta or primer3')
+    parser.add_option('--species',
+                        type="string", dest='species',
+                        help='species')
+    return parser.parse_args( arguments[1:] )
+
+
+@main_function( parse_arguments )
+def main( options, arguments ):
+    if not options.input:
+        raise RuntimeError( 'missing --input option' )
+    if not options.output:
+        raise RuntimeError( 'missing --output option' )
+    if not options.snps_loc:
+        raise RuntimeError( 'missing --snps_loc option' )
+    if not options.scaffold_col:
+        raise RuntimeError( 'missing --scaffold_col option' )
+    if not options.pos_col:
+        raise RuntimeError( 'missing --pos_col option' )
+    if not options.output_format:
+        raise RuntimeError( 'missing --output_format option' )
+    if not options.species:
+        raise RuntimeError( 'missing --species option' )
+    
+    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
+
+    out_fh = gd._openfile( options.output, 'w' )
+
+    snpcalls_file = gd.get_filename_from_loc( options.species, options.snps_loc )
+    file_root, file_ext = os.path.splitext( snpcalls_file )
+    snpcalls_index_file = file_root + ".cdb"
+    snpcalls = gd.SnpcallsFile( data_file=snpcalls_file, index_file=snpcalls_index_file )
+
+    while snps.next():
+        seq, pos = snps.get_seq_pos()
+        flanking_dna = snpcalls.get_flanking_dna( sequence=seq, position=pos, format=options.output_format )
+        if flanking_dna:
+            out_fh.write( flanking_dna )
+
+    out_fh.close()
+
+if __name__ == "__main__":
+    main()
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff extract_flanking_dna.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_flanking_dna.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,95 @@
+<tool id="gd_extract_flanking_dna" name="Flanking Sequence" version="1.0.0">
+  <description>: Fetch DNA sequence for intervals surrounding the given SNPs</description>
+
+  <command interpreter="python">
+    extract_flanking_dna.py "--input=$input" "--output=$output" "--snps_loc=${GALAXY_DATA_INDEX_DIR}/gd.snps.loc"
+    #if $override_metadata.choice == "0":
+      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
+    #else
+      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
+    #end if
+    "--output_format=$output_format"
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
+    <param name="output_format" type="select" format="integer" label="output format">
+        <option value="fasta" selected="true">FastA format</option>
+        <option value="primer3">Primer3 input</option>
+    </param>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="choose columns">
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
+        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
+        <param name="species" type="select" label="Choose species">
+          <options from_file="gd.species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="output"/>
+  </outputs>
+
+  <!-- Need snpcalls files from Webb before uncommenting
+  <tests>
+    <test>
+      <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" />
+      <param name="output_format" value="primer3" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/extract_flanking_dna/extract_flanking_dna.txt" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**What it does**
+
+  This tool reports a DNA segment containing each SNP, with up to 200 nucleotides on
+  either side of the SNP position, which is indicated by "n". Fewer nucleotides
+  are reported if the SNP is near an end of the assembled genome fragment.
+
+-----
+
+**Example**
+
+- input file::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+- output file::
+
+    > chr2_75111355_75112576 314 A C
+    TATCTTCATTTTTATTATAGACTCTCTGAACCAATTTGCCCTGAGGCAGACTTTTTAAAGTACTGTGTAATGTATGAAGTCCTTCTGCTCAAGCAAATCATTGGCATGAAAACAGTTGCAAACTTATTGTGAGAGAAGAGTCCAAGAGTTTTAACAGTCTGTAAGTATATAGCCTGTGAGTTTGATTTCCTTCTTGTTTTTnTTCCAGAAACATGATCAGGGGCAAGTTCTATTGGATATAGTCTTCAAGCATCTTGATTTGACTGAGCGTGACTATTTTGGTTTGCAGTTGACTGACGATTCCACTGATAACCCAGTAAGTTTAAGCTGTTGTCTTTCATTGTCATTGCAATTTTTCTGTCTTTATACTAGGTCCTTTCTGATTTACATTGTTCACTGATT
+    > chr8_93901796_93905612 2471 A C
+    GCTGCCGCTGGATTTACTTCTGCTTGGGTCGAGAGCGGGCTGGATGGGTGAAGAGTGGGCTCCCCGGCCCCTGACCAGGCAGGTGCAGACAAGTCGGAAGAAGGCCCGCCGCATCTCCTTGCTGGCCAGCGTGTAGATGACGGGGTTCATGGCAGAGTTGAGCACGGCCAGCACGATGAACCACTGGGCCTTGAACAGGATnGCGCACTCCTTCACCTTGCAGGCCACATCCACAAGGAAAAGGATGAAGAGTGGGGACCAGCAGGCGATGAACACGCTCACCACGATCACCACGGTCCGCAGCAGGGCCATGGACCGCTCTGAGTTGTGCGGGCTGGCCACCCTGCGGCTGCTGGACTTCACCAGGAAGTAGATGCGTGCGTACAGGATCACGATGGTCAC
+    > chr10_7434473_7435447 524 T C
+    ATTATTAACAGAAACATTTCTTTTTCATTACCCAGGGGTTACACTGGTCGTTGATGTTAATCAGTTTTTGGAGAAGGAGAAGCAAAGTGATATTTTGTCTGTTCTGAAGCCTGCCGTTGGTAATACAAATGACGTAATCCCTGAATGTGCTGACAGGTACCATGACGCCCTGGCAAAAGCAAAAGAGCAAAAATCTAGAAGnGGTAAGCATCTTCACTGTTTAGCACAAATTAAATAGCACTTTGAATATGATGATTTCTGTGGTATTGTGTTATCTTACTTTTGAGACAAATAATCGCTTTCAAATGAATATTTCTGAATGTTTGTCATCTCTGGCAAGGAAATTTTTTAGTGTTTCTTTTCCTTTTTTGTCTTTTGGAAATCTGTGATTAACTTGGTGGC
+    > chr14_80021455_80022064 138 G A
+    ACCCAGGGATCAAACCCAGGTCTCCCGCATTGCAGGCGGATTCTTTACTGTCTGAGCCTCCAGGGAAGCCCTCGGGGCTGAAGGGATGGTTATGAAGGTGAGAAACAGGGGCCACCTGTCCCCAAGGTACCTTGCGACnTGCCATCTGCGCTCCACCAGTAAATGGACGTCTTCGATCCTTCTGTTGTTGGCGTAGTGCAAACGTTTGGGAAGGTGCTGTTTCAAGTAAGGCTTAAAGTGCTGGTCTGGTTTTTTACACTGAAATATAAATGGACATTGGATTTTGCAATGGAGAGTCTTCTAGAAGAGTCCAAGACATTCTCTCCAGAAAGCTGAAGG
+    > chr15_64470252_64471048 89 G A
+    TGTGTGTGTGTGTGTGTGTGTGTGCCTGTGTCTGTACATGCACACCACGTGGCCTCACCCAGTGCCCTCAGCTCCATGGTGATGTCCACnTAGCCGTGCTCCGCGCTGTAGTACATGGCCTCCTGGAGGGCCTTGGTGCGCGTCCGGCTCAGGCGCATGGGCCCCTCGCTGCCGCTGCCCTGGCTGGATGCATCGCTCTCTTCCACGCCCTCAGCCAGGATCTCCTCCAGGGACAGCACATCTGCTTTGGCCTGCTGTGGCTGAGTCAGGAGCTTCCTCAGGACGTTCCT
+    etc.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff extract_primers.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_primers.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function( parse_arguments=None ):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: ( None, arguments )
+    def main_decorator( to_decorate ):
+        def decorated_main( arguments=None ):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments( arguments )
+            rc = 1
+            try:
+                rc = to_decorate( options, arguments )
+            except Exception, err:
+                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
+                traceback.print_exc()
+            finally:
+                sys.exit( rc )
+        return decorated_main
+    return main_decorator
+
+def parse_arguments( arguments ):
+    parser = OptionParser()
+    parser.add_option('--input',
+                        type='string', dest='input',
+                        help='file of selected SNPs')
+    parser.add_option('--output',
+                        type='string', dest='output',
+                        help='output file')
+    parser.add_option('--primers_loc',
+                        type='string', dest='primers_loc',
+                        help='primers .loc file')
+    parser.add_option('--scaffold_col',
+                        type="int", dest='scaffold_col',
+                        help='scaffold column in the input file')
+    parser.add_option('--pos_col',
+                        type="int", dest='pos_col',
+                        help='position column in the input file')
+    parser.add_option('--species',
+                        type="string", dest='species',
+                        help='species')
+    return parser.parse_args( arguments[1:] )
+
+
+@main_function( parse_arguments )
+def main( options, arguments ):
+    if not options.input:
+        raise RuntimeError( 'missing --input option' )
+    if not options.output:
+        raise RuntimeError( 'missing --output option' )
+    if not options.primers_loc:
+        raise RuntimeError( 'missing --primers_loc option' )
+    if not options.scaffold_col:
+        raise RuntimeError( 'missing --scaffold_col option' )
+    if not options.pos_col:
+        raise RuntimeError( 'missing --pos_col option' )
+    if not options.species:
+        raise RuntimeError( 'missing --species option' )
+    
+    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
+
+    out_fh = gd._openfile( options.output, 'w' )
+
+    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
+
+    file_root, file_ext = os.path.splitext( primer_data_file )
+    primer_index_file = file_root + ".cdb"
+    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
+
+    while snps.next():
+        seq, pos = snps.get_seq_pos()
+        primer = primers.get_entry( seq, pos )
+        if primer:
+            out_fh.write( primer )
+
+    out_fh.close()
+
+if __name__ == "__main__":
+    main()
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff extract_primers.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_primers.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,90 @@
+<tool id="gd_extract_primers" name="Pick Primers" version="1.0.0">
+  <description>: Find suitable PCR primers for SNPs</description>
+
+  <command interpreter="python">
+    extract_primers.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
+    #if $override_metadata.choice == "0":
+      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
+    #else
+      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
+    #end if
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="choose columns">
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
+        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
+        <param name="species" type="select" label="Choose species">
+          <options from_file="gd.species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="output"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0"/>
+      <output name="output" file="test_out/extract_primers/extract_primers.txt" />
+    </test>
+  </tests>
+
+
+  <help>
+
+**What it does**
+
+  This tool extracts primers for SNPs in the dataset using the Primer3 program.
+  The first line of output for a given SNP reports the name of the assembled
+  contig, the SNP's position in the contig, the two variant nucleotides, and
+  Primer3's "pair penalty".  The next line, if not blank, names restriction
+  enzymes (from the user-adjustable list) that differentially cut at that
+  site, but do not cut at any other position between and including the
+  primer positions.  The next lines show the SNP's flanking regions, with
+  the SNP position indicated by "n", including the primer positions and an
+  additional 3 nucleotides.
+
+-----
+
+**Example**
+
+- input file::
+
+    chr5_30800874_30802049    734   G  A  chr5   30801606   A  24  0  99   4  11  97   Y  496  0.502  0.033  0.215  6
+    chr8_55117827_55119487    994   A  G  chr8   55118815   G  25  0  102  4  11  96   Y  22   0.502  0.025  2.365  1
+    chr9_100484836_100485311  355   C  T  chr9   100485200  T  27  0  108  6  17  100  Y  190  0.512  0.880  2.733  4
+    chr12_3635530_3637738     2101  T  C  chr12  3637630    T  25  0  102  4  13  93   Y  169  0.554  0.024  0.366  4
+
+- output file::
+
+    chr5_30800874_30802049 734 G A 0.352964
+     BglII,MboI,Sau3AI,Tru9I,XhoII
+      1 CTGAAGGTGAGCAGGATTCAGGAGACAGAAAACAAAGCCCAGGCCTGCCCAAGGTGGAAA
+           >>>>>>>>>>>>>>>>>>>>
+     
+     61 AGTCTAACAACTCGCCCTCTGCTTAnATCTGAGACTCACAGGGATAATAACACACTTGGT
+     
+     
+     21 CAAGGAATAAACTAGATATTATTCACTCCTCTAGAAGGCTGCCAGGAAAATTGCCTGACT
+                                                             &lt;&lt;&lt;&lt;&lt;&lt;&lt;
+     
+    181 TGAACCTTGGCTCTGA
+        &lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;&lt;
+    etc.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff filter_gd_snp.xml
--- a/filter_gd_snp.xml	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-<tool id="gd_filter_gd_snp" name="Filter SNPs" version="1.0.0">
-  <description>: Discard some SNPs based on coverage or quality</description>
-
-  <command interpreter="python">
-    modify_snp_table.py "$input" "$p1_input" "$output" "$lo_coverage" "$hi_coverage" "$low_ind_cov" "$lo_quality"
-    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
-        #set $arg = '%s:%s' % ($individual_col, $individual)
-        "$arg"
-    #end for
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
-    <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
-    <param name="lo_coverage" type="integer" min="0" value="0" label="Lower bound on total coverage" />
-    <param name="hi_coverage" type="integer" min="0" value="1000" label="Upper bound on total coverage" />
-    <param name="low_ind_cov" type="integer" min="0" value="0" label="Lower bound on individual coverage" />
-    <param name="lo_quality" type="integer" min="0" value="0" label="Lower bound on individual quality values" />
-  </inputs>
-
-  <outputs>
-    <data name="output" format="gd_snp" metadata_source="input" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
-      <param name="p1_input" value="test_in/a.gd_indivs" ftype="gd_indivs" />
-      <param name="choice" value="1" />
-      <param name="lo_coverage" value="0" />
-      <param name="hi_coverage" value="1000" />
-      <param name="low_ind_cov" value="3" />
-      <param name="lo_quality" value="30" />
-      <output name="output" file="test_out/modify_snp_table/modify.gd_snp" />
-    </test>
-  </tests>
-
-  <help>
-
-**Dataset formats**
-
-The input datasets are in gd_snp_ and gd_indivs_ formats.
-The output dataset is in gd_snp_ format.  (`Dataset missing?`_)
-
-.. _gd_snp: ./static/formatHelp.html#gd_snp
-.. _gd_indivs: ./static/formatHelp.html#gd_indivs
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-The user specifies that some of the individuals in a gd_snp dataset form a
-"population", by supplying a list that has been previously created using the
-Specify Individuals tool.  SNPs are then discarded if their total coverage
-for the population is too low or too high, or if their coverage or quality
-score for any individual in the population is too low.
-
------
-
-**Example**
-
-- input gd_snp::
-
-    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
-    Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30    1  0  2  30   Y  22  +99.   0
-    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
-    etc.
-
-- input individuals::
-
-    9   PB1
-    13  PB2
-    17  PB3
-
-- output when the lower bound on individual coverage is "3"::
-
-    Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45   10  0  2  57   Y  54  0.323  0
-    Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39    5  0  2  42   N   1  +99.   0
-    etc.
-
-  </help>
-</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff find_intervals.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/find_intervals.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import subprocess
+import sys
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def run_program(prog, args, stdout_file=None):
+    #print "args:", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if stdout_file is not None:
+        with open(stdout_file, 'w') as ofh:
+            print >> ofh, stdoutdata.rstrip('\r\n')
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+if len(sys.argv) != 11:
+    print "usage"
+    sys.exit(1)
+
+input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11]
+
+prog = 'sweep'
+
+args = [ prog ]
+args.append(input)
+args.append(chrom_col)
+args.append(pos_col)
+args.append(score_col)
+args.append(cutoff)
+args.append(shuffles)
+args.append(report_snps)
+
+run_program(None, args, stdout_file=output)
+
+if report_snps == "0":
+    sys.exit(0)
+
+################################################################################
+
+mkdir_p(output_files_path)
+
+bedgraph_filename = 'bedgraph.txt'
+links_filename = os.path.join(output_files_path, 'links.txt')
+
+data = []
+links_data = []
+    
+with open(output) as fh:
+    chrom = None
+    for line in fh:
+        line = line.rstrip('\r\n')
+        if not line:
+            continue
+        if line[0] != ' ':
+            # chrom line, add a link
+            chrom, interval_begin, interval_end, interval_value = line.split('\t')
+            links_data.append((chrom, int(interval_begin), int(interval_end)))
+        else:
+            # data line, add a bedgraph line
+            begin, value = line.split()
+            data.append((chrom, int(begin), value))
+
+with open(bedgraph_filename, 'w') as ofh:
+    print >> ofh, 'track type=bedGraph'
+    for chrom, begin, value in sorted(data):
+        print >> ofh, chrom, begin, begin+1, value
+
+with open(links_filename, 'w') as ofh:
+    for chrom, begin, end in sorted(links_data):
+        print >> ofh, chrom, begin, end
+
+################################################################################
+
+chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey)
+
+prog = 'fetchChromSizes'
+
+args = [ prog ]
+args.append(dbkey)
+
+run_program(None, args, stdout_file=chrom_sizes_filename)
+
+################################################################################
+
+prog = 'bedGraphToBigWig'
+
+args = [ prog ]
+args.append(bedgraph_filename)
+args.append(chrom_sizes_filename)
+args.append(output)
+
+run_program(None, args)
+
+################################################################################
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff find_intervals.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/find_intervals.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,142 @@
+<tool id="gd_find_intervals" name="Remarkable Intervals" version="1.0.0">
+  <description>: Find high-scoring runs of SNPs</description>
+
+  <command interpreter="python">
+    find_intervals.py "$input" "$input.metadata.dbkey" "$output" "$output.files_path"
+
+    #if $override_metadata.choice == "0"
+      "$input.metadata.ref" "$input.metadata.rPos"
+    #else
+      "$override_metadata.ref_col" "$override_metadata.rpos_col"
+    #end if
+
+    "$score_col" "$shuffles"
+
+    #if $cutoff.type == 'percentage'
+      "$cutoff.cutoff_pct"
+    #else
+      "=$cutoff.cutoff_val"
+    #end if
+
+    "$out_format"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Input">
+      <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+    </param>
+
+    <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Column with score"/>
+
+    <conditional name="cutoff">
+      <param name="type" type="select" label="Cutoff type">
+        <option value="percentage">percentage</option>
+        <option value="value">value</option>
+      </param>
+      <when value="percentage">
+        <param name="cutoff_pct" type="float" value="95" min="0" max="100" label="Percentage cutoff"/>
+      </when>
+      <when value="value">
+        <param name="cutoff_val" type="float" value="0.0" label="Value cutoff"/>
+      </when>
+    </conditional>
+
+    <param name="shuffles" type="integer" min="0" value="0" label="Number of randomizations"/>
+
+    <param name="out_format" type="select" format="integer" label="Report individual positions">
+      <option value="0" selected="true">No</option>
+      <option value="1">Yes</option>
+    </param>
+
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="Choose columns" help="Note: you need to choose the columns if the input dataset is not gd_snp">
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome" help="Note: be sure the build in the metadata is the same as using here."/>
+        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position" help="Note: either zero or one based positions will work"/>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="interval">
+        <change_format>
+            <when input="out_format" value="1" format="bigwigpos" />
+        </change_format>
+    </data>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="score_col" value="5" />
+      <param name="type" value="value" />
+      <param name="cutoff_val" value="700.0" />
+      <param name="shuffles" value="10" />
+      <param name="out_format" value="0" />
+      <param name="choice" value="0" />
+
+      <output name="output" file="test_out/find_intervals/find_intervals.interval" />
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is tabular_, with required columns of chromosome, position,
+and score (in any column).
+The output dataset is interval_.  (`Dataset missing?`_)
+
+.. _interval: ./static/formatHelp.html#interval
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a tabular dataset (such as a gd_snp dataset) and 
+if the dataset is not also gd_snp format, specifies 
+the columns containing chromosome, position, and scores (such as an Fst-value for the SNP). 
+For gd_snp format the metadata can be used to specify the chromosome and 
+position.
+Other inputs include
+a percentage or raw score for the "cutoff" which should be greater than the 
+average value for the scores column.  A higher value will give smaller intervals
+in the output.
+If a percentage (e.g. 95%) is specified
+then that percentile of the scores is used as the cutoff; 
+percentile may not work well if many rows or SNPs have the same score
+(in that case use a raw score).  The program subtracts the
+cutoff from every score, then finds genomic intervals (i.e., consecutive runs
+of SNPs) whose total score cannot be increased by adding or subtracting one
+or more adjusted scores at the ends of the interval.
+Another input is the number of times the
+data should be randomized (only intervals with score exceeding the maximum for
+the randomized data are reported).  
+If 100 shuffles are requested, then any interval reported by the tool has a 
+score with probability less than 0.01 of being equaled or exceeded by chance.
+
+-----
+
+**Example**
+
+- input (gd_snp)::
+
+    Contig222_chr2_9817738_9818143   220     C       T       888.0   chr2    9817960         C       17      0       2       78      12      0       2       63      20      0       2       87      8       0       2       51      11      0       2       60      12      0       2       63      Y       76      0.093   1
+    Contig47_chr2_25470778_25471576  126     G       A       888.0   chr2    25470896        G       12      0       2       63      14      0       2       69      14      0       2       69      10      0       2       57      18      0       2       81      13      0       2       66      N       11      0.289   1
+    ...
+    Contig115_chr2_61631913_61632510 310     G       T       999.3   chr2    61632216        G       7       0       2       48      9       0       2       54      7       0       2       48      11      0       2       60      10      0       2       57      10      0       2       57      N       13      0.184   0
+    Contig31_chr2_67331584_67331785  39      C       T       999.0   chr2    67331623        C       11      0       2       60      10      0       2       57      7       0       2       48      9       0       2       54      2       0       2       33      4       0       2       39      N       110     0.647   1
+    etc.
+
+- output not reporting individual positions::
+
+    chr2    9817960 67331624        1272.2000
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff gd_composite.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gd_composite.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,128 @@
+#!/usr/bin/env python
+
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "Cheetah" )
+from Cheetah.Template import Template
+
+import errno
+import os
+from datetime import datetime
+
+################################################################################
+
+def die(message):
+    print >> sys.stderr, message
+    sys.exit(1)
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+################################################################################
+
+class Display(object):
+    def display(self, parameter):
+        print parameter
+
+class DisplayFile(Display):
+    def display(self, parameter):
+        return '<a href="{0}">{1}</a>'.format(parameter.value, parameter.name)
+
+class DisplayValue(Display):
+    def display(self, parameter):
+        if parameter.value is not None:
+            return '{0}: {1}'.format(parameter.description, parameter.value)
+        else:
+            return '{0}'.format(parameter.description)
+
+class DisplayTagList(Display):
+    def display(self, parameter):
+        rv = []
+        if parameter.name:
+            rv.append(parameter.name)
+        rv.append('<ol>')
+        for tag in parameter.value:
+            col, individual_name = tag.split(':')
+            rv.append('<li>{0}</li>'.format(individual_name))
+        rv.append('</ol>')
+        return '\n'.join(rv)
+
+class DisplayPopulationList(Display):
+    def display(self, parameter):
+        rv = []
+        rv.append('Populations')
+        rv.append('<ul>')
+        for population in parameter.value:
+            rv.append('<li>')
+            if population.name is not None:
+                rv.append(population.name)
+            rv.append('<ol>')
+            for name in population.individual_names():
+                rv.append('<li>{0}</li>'.format(name))
+            rv.append('</ol>')
+            rv.append('</li>')
+        rv.append('</ul>')
+        return '\n'.join(rv)
+
+#    def display(self, parameter, name=''):
+#        print '<ul> {0}'.format(name)
+#        for individual_name in parameter.individual_names():
+#            print '<li>{0}>/li>'.format(individual_name)
+#        print '</ul>'
+        
+        
+class Parameter(object):
+    def __init__(self, name=None, value=None, description=None, display_type=None):
+        self.name = name
+        self.value = value
+        self.description = description
+        if display_type is None:
+            self.display_type = Display()
+        else:
+            self.display_type = display_type
+
+    def display(self):
+        return self.display_type.display(self)
+
+class InfoPage(object):
+    _realpath = os.path.realpath(__file__)
+    _script_dir = os.path.dirname(_realpath)
+    template_file = os.path.join(_script_dir, 'gd_composite_template.html')
+    def __init__(self):
+        self.timestamp = datetime.now().strftime('%Y-%m-%d %I:%M:%S %p')
+        self.title = 'Genome Diversity Composite Dataset'
+        self.inputs = []
+        self.outputs = []
+        self.misc = ''
+        self.template = self.load_template()
+
+    def load_template(self):
+        with open(self.template_file) as f:
+            return f.read().rstrip('\r\n')
+
+    def set_title(self, title):
+        self.title = title
+
+    def add_input_parameter(self, parameter):
+        self.inputs.append(parameter)
+
+    def add_output_parameter(self, parameter):
+        self.outputs.append(parameter)
+
+    def add_misc(self, misc):
+        self.misc = misc
+
+    def render(self):
+        return Template(self.template, searchList=[{'tool': self}])
+        
+
+            
+
+
+
+
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff gd_composite_template.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gd_composite_template.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,40 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>${tool.title}</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: $tool.timestamp
+      <p/>
+      #if $tool.outputs
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+          #for output in $tool.outputs
+            <li>${output.display()}</li>
+          #end for
+        </ul>
+      </div>
+      #end if
+      #if $tool.inputs
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+          #for input in $tool.inputs
+            <li>${input.display()}</li>
+          #end for
+        </ul>
+      </div>
+      #end if
+      #if $tool.misc
+      <div id="gd_misc">
+        $tool.misc.display()
+      </div>
+      #end if
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,266 @@
+#!/usr/bin/env python
+
+import sys
+import cdblib
+
+def _openfile( filename=None, mode='r' ):
+    try:
+        fh = open( filename, mode )
+    except IOError, err:
+        raise RuntimeError( "can't open file: %s\n" % str( err ) )
+    return fh
+
+def get_filename_from_loc( species=None, filename=None ):
+    fh = _openfile( filename )
+    for line in fh:
+        if line and not line.startswith( '#' ):
+            line = line.rstrip( '\r\n' )
+            if line:
+                elems = line.split( '\t' )
+                if len( elems ) >= 2 and elems[0] == species:
+                    return elems[1]
+
+    raise RuntimeError( "can't find '%s' in location file: %s\n" % ( species, filename ) )
+
+
+class SnpFile( object ):
+    def __init__( self, filename=None, seq_col=1, pos_col=2, ref_seq_col=7, ref_pos_col=8 ):
+        self.filename = filename
+        self.fh = _openfile( filename )
+        self.seq_col = seq_col
+        self.pos_col = pos_col
+        self.ref_seq_col = ref_seq_col
+        self.ref_pos_col = ref_pos_col
+        self.elems = None
+        self.line = None
+        self.comments = []
+
+    def next( self ):
+        while self.fh:
+            try:
+                self.line = self.fh.next()
+            except StopIteration:
+                self.line = None
+                self.elems = None
+                return None
+            if self.line:
+                self.line = self.line.rstrip( '\r\n' )
+                if self.line:
+                    if self.line.startswith( '#' ):
+                        self.comments.append( self.line )
+                    else:
+                        self.elems = self.line.split( '\t' )
+                        return 1
+
+    def get_seq_pos( self ):
+        if self.elems:
+            return self.elems[ self.seq_col - 1 ], self.elems[ self.pos_col - 1 ]
+        else:
+            return None, None
+
+    def get_ref_seq_pos( self ):
+        if self.elems:
+            return self.elems[ self.ref_seq_seq - 1 ], self.elems[ self.ref_pos_col - 1 ]
+        else:
+            return None, None
+
+
+class IndexedFile( object ):
+
+    def __init__( self, data_file=None, index_file=None ):
+        self.data_file = data_file
+        self.index_file = index_file
+        self.data_fh = _openfile( data_file )
+        self.index_fh = _openfile( index_file )
+        self._reader = cdblib.Reader( self.index_fh.read(), hash )
+
+    def get_indexed_line( self, key=None ):
+        line = None
+        if key in self._reader:
+            offset = self._reader.getint( key )
+            self.data_fh.seek( offset )
+            try:
+                line = self.data_fh.next()
+            except StopIteration:
+                raise RuntimeError( 'index file out of sync for %s' % key )
+        return line
+
+class PrimersFile( IndexedFile ):
+    def get_primer_header( self, sequence=None, position=None ):
+        key = "%s %s" % ( str( sequence ), str( position ) )
+        header = self.get_indexed_line( key )
+        if header:
+            if header.startswith( '>' ):
+                elems = header.split()
+                if len( elems ) < 3:
+                    raise RuntimeError( 'short primers header for %s' % key )
+                if sequence != elems[1] or str( position ) != elems[2]:
+                    raise RuntimeError( 'primers index for %s finds %s %s' % ( key, elems[1], elems[2] ) )
+            else:
+                raise RuntimeError( 'primers index out of sync for %s' % key )
+        return header
+
+    def get_entry( self, sequence=None, position=None ):
+        entry = self.get_primer_header( sequence, position )
+        if entry:
+            while self.data_fh:
+                try:
+                    line = self.data_fh.next()
+                except StopIteration:
+                    break
+                if line.startswith( '>' ):
+                    break
+                entry += line
+        return entry
+
+    def get_enzymes( self, sequence=None, position=None ):
+        entry = self.get_primer_header( sequence, position )
+        enzyme_list = []
+        if entry:
+            try:
+                line = self.data_fh.next()
+            except StopIteration:
+                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
+            if line.startswith( '>' ):
+                raise RuntimeError( 'primers entry for %s %s is truncated' % ( str( sequence ), str( position ) ) )
+            line.rstrip( '\r\n' )
+            if line:
+                enzymes = line.split( ',' )
+                for enzyme in enzymes:
+                    enzyme = enzyme.strip()
+                    if enzyme:
+                        enzyme_list.append( enzyme )
+        return enzyme_list
+
+class SnpcallsFile( IndexedFile ):
+    def get_snp_seq( self, sequence=None, position=None ):
+        key = "%s %s" % ( str( sequence ), str( position ) )
+        line = self.get_indexed_line( key )
+        if line:
+            elems = line.split( '\t' )
+            if len (elems) < 3:
+                raise RuntimeError( 'short snpcalls line for %s' % key )
+            if sequence != elems[0] or str( position ) != elems[1]:
+                raise RuntimeError( 'snpcalls index for %s finds %s %s' % ( key, elems[0], elems[1] ) )
+            return elems[2]
+        else:
+            return None
+
+    def get_flanking_dna( self, sequence=None, position=None, format='fasta' ):
+        if format != 'fasta' and format != 'primer3':
+            raise RuntimeError( 'invalid format for flanking dna: %s' % str( format ) )
+        seq = self.get_snp_seq( sequence, position )
+        if seq:
+            p = seq.find('[')
+            if p == -1:
+                raise RuntimeError( 'snpcalls entry for %s %s missing left bracket: %s' % ( str( sequence ), str( position ), seq ) )
+            q = seq.find(']', p + 1)
+            if q == -1:
+                raise RuntimeError( 'snpcalls entry for %s %s missing right bracket: %s' % ( str( sequence ), str( position ), seq ) )
+            q += 1
+
+            if format == 'fasta':
+                flanking_seq = '> '
+            else:
+                flanking_seq = 'SEQUENCE_ID='
+
+            flanking_seq += "%s %s %s %s\n" % ( str( sequence ), str( position ), seq[p+1], seq[p+3] )
+
+            if format == 'primer3':
+                flanking_seq += 'SEQUENCE_TEMPLATE='
+
+            flanking_seq += "%sn%s\n" % ( seq[0:p], seq[q:] )
+
+            if format == 'primer3':
+                flanking_seq += "SEQUENCE_TARGET=%d,11\n=\n" % ( p - 5 )
+
+            return flanking_seq
+        else:
+            return None
+
+
+
+class LocationFile( object ):
+    def __init__(self, filename):
+        self.build_map(filename)
+
+    def build_map(self, filename):
+        self.map = {}
+        self.open_file(filename)
+        for line in self.read_lines():
+            elems = line.split('\t', 1)
+            if len(elems) == 2:
+                self.map[ elems[0].strip() ] = elems[1].strip()
+        self.close_file()
+
+    def read_lines(self):
+        for line in self.fh:
+            if not line.startswith('#'):
+                line = line.rstrip('\r\n')
+                yield line
+
+    def open_file(self, filename):
+        self.filename = filename
+        try:
+            self.fh = open(filename, 'r')
+        except IOError, err:
+            print >> sys.stderr, "Error opening location file '%s': %s" % (filename, str(err))
+            sys.exit(1)
+
+    def close_file(self):
+        self.fh.close()
+
+    def loc_file( self, key ):
+        if key in self.map:
+            return self.map[key]
+        else:
+            print >> sys.stderr, "'%s' does not appear in location file '%s'" % (key, self.filename)
+            sys.exit(1)
+        
+class ChrLens( object ):
+    def __init__( self, chrlen_filename ):
+        self.chrlen_filename = chrlen_filename
+        self.build_map()
+
+    def build_map(self):
+        self.map = {}
+        self.open_file(self.chrlen_filename)
+        for line in self.read_lines():
+            elems = line.split('\t', 1)
+            if len(elems) == 2:
+                chrom = elems[0].strip()
+                chrom_len_text = elems[1].strip()
+                try:
+                    chrom_len = int( chrom_len_text )
+                except ValueError:
+                    print >> sys.stderr, "Bad length '%s' for chromosome '%s' in '%s'" % (chrom_len_text, chrom, self.chrlen_filename)
+                self.map[ chrom ] = chrom_len
+        self.close_file()
+
+    def read_lines(self):
+        for line in self.fh:
+            if not line.startswith('#'):
+                line = line.rstrip('\r\n')
+                yield line
+
+    def open_file(self, filename):
+        self.filename = filename
+        try:
+            self.fh = open(filename, 'r')
+        except IOError, err:
+            print >> sys.stderr, "Error opening chromosome length file '%s': %s" % (filename, str(err))
+            sys.exit(1)
+
+    def close_file(self):
+        self.fh.close()
+
+    def length( self, key ):
+        if key in self.map:
+            return self.map[key]
+        else:
+            return None
+
+    def __iter__( self ):
+        for chrom in self.map:
+            yield chrom
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/Makefile
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/Makefile	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,8 @@
+all:
+	cd src && make
+
+clean:
+	cd src && make clean
+
+install:
+	cd src && make install
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/bin/gd_ploteig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/bin/gd_ploteig	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,172 @@
+#!/usr/bin/env perl
+
+### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k]  [-y] [-z sep]
+use Getopt::Std ;
+use File::Basename ;
+use warnings ;
+
+## pops : separated  -x = make postscript and pdf  -z use another separator
+##  -k keep intermediate files
+## NEW if pops is a file names are read one per line
+
+getopts('i:o:p:c:s:d:z:t:xky',\%opts) ;
+$postscmode = $opts{"x"} ;
+$oldkeystyle =  $opts{"y"} ;
+$kflag = $opts{"k"} ;
+$keepflag = 1 if ($kflag) ;
+$keepflag = 1 unless ($postscmode) ;
+
+$zsep = ":" ;
+if (defined $opts{"z"}) {
+ $zsep = $opts{"z"} ;
+ $zsep = "\+" if ($zsep eq "+") ;
+}
+
+$title = "" ;
+if (defined $opts{"t"}) {
+ $title = $opts{"t"} ;
+}
+if (defined $opts{"i"}) {
+ $infile = $opts{"i"} ;
+}
+else {
+ usage() ;
+ exit 0 ;
+}
+open (FF, $infile) || die "can't open $infile\n" ;
+@L = (<FF>) ;
+chomp @L ;
+$nf = 0 ;
+foreach $line (@L) { 
+ next if ($line =~ /^\s+#/) ;
+ @Z = split " ", $line ;
+ $x = @Z ;
+ $nf = $x if ($nf < $x) ;
+}
+printf "## number of fields: %d\n", $nf ;
+$popcol = $nf-1 ;
+
+
+if (defined $opts{"p"}) {
+ $pops = $opts{"p"} ;
+}
+else {
+ die "p parameter compulsory\n" ;
+}
+
+$popsname = setpops ($pops) ;
+print "$popsname\n" ;
+
+$c1 = 1; $c2 =2 ;
+if (defined $opts{"c"}) {
+ $cols = $opts{"c"} ;
+ ($c1, $c2) = split ":", $cols ;
+ die "bad c param: $cols\n" unless (defined $cols) ;
+}
+
+$stem = "$infile.$c1:$c2" ;
+if (defined $opts{"s"}) {
+ $stem = $opts{"s"} ;
+}
+$gnfile = "$stem.$popsname.xtxt" ;
+ 
+if (defined $opts{"o"}) {
+ $gnfile = $opts{"o"} ;
+}
+
+@T = () ; ## trash 
+open (GG, ">$gnfile") || die "can't open $gnfile\n" ;
+print GG "## " unless ($postscmode) ;
+print GG "set terminal postscript color\n" ;
+print GG "set style line  2 lc rgbcolor \"#376600\"\n";
+print GG "set style line 11 lc rgbcolor \"#376600\"\n";
+print GG "set style line 20 lc rgbcolor \"#376600\"\n";
+print GG "set style line 29 lc rgbcolor \"#376600\"\n";
+print GG "set style line  6 lc rgbcolor \"#FFCC00\"\n";
+print GG "set style line 15 lc rgbcolor \"#FFCC00\"\n";
+print GG "set style line 24 lc rgbcolor \"#FFCC00\"\n";
+print GG "set style increment user\n";
+print GG "set title  \"$title\" \n" ; 
+print GG "set key outside\n" unless ($oldkeystyle) ; 
+print GG "set xlabel  \"eigenvector $c1\" \n" ; 
+print GG "set ylabel  \"eigenvector $c2\" \n" ; 
+print GG "plot " ;
+$np = @P ;
+$lastpop = $P[$np-1] ;
+$d1 = $c1+1 ;
+$d2 = $c2+1 ;
+foreach $pop (@P)  { 
+ $dfile = "$stem:$pop" ;
+ push @T, $dfile ;
+ print GG " \"$dfile\" using $d1:$d2 title \"$pop\" " ;
+ print GG ", \\\n" unless ($pop eq $lastpop) ;
+ open (YY, ">$dfile") || die "can't open $dfile\n" ;
+ foreach $line (@L) {
+  next if ($line =~ /^\s+#/) ;
+  @Z = split " ", $line ;
+  next unless (defined $Z[$popcol]) ;
+  next unless ($Z[$popcol] eq $pop) ;
+  print YY "$line\n" ;
+ }
+ close YY ;
+}
+print GG "\n" ;
+print GG "## "  if ($postscmode) ;
+print GG "pause 9999\n"  ;
+close GG ;
+
+if ($postscmode) { 
+$psfile = "$stem.ps" ;
+
+ if ($gnfile =~ /xtxt/) { 
+  $psfile = $gnfile ;
+  $psfile  =~ s/xtxt/ps/ ;
+ }
+system "gnuplot < $gnfile > $psfile" ;
+#system "fixgreen  $psfile" ;
+system "ps2pdf  $psfile " ;
+}
+unlink (@T) unless $keepflag ;
+
+sub usage { 
+ 
+print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k]\n" ;  
+print "-i eigfile     input file first col indiv-id last col population\n" ;
+print "## as output by smartpca in outputvecs \n" ;
+print "-c a:b         a, b columns to plot.  1:2 would be common and leading 2 eigenvectors\n" ;
+print "-p pops        Populations to plot.  : delimited.   eg  -p Bantu:San:French\n" ;
+print "## pops can also be a filename.  List populations 1 per line\n" ;
+print "[-s stem]      stem will start various output files\n"  ;
+print "[-o ofile]     ofile will be gnuplot control file.  Should have xtxt suffix\n"; 
+print "[-x]           make ps and pdf files\n" ; 
+print "[-k]           keep various intermediate files although  -x set\n" ;
+print "## necessary if .xtxt file is to be hand edited\n" ;
+print "[-y]           put key at top right inside box (old mode)\n" ;
+print "[-t]           title (legend)\n" ;
+
+print "The xtxt file is a gnuplot file and can be easily hand edited.  Intermediate files
+needed if you want to make your own plot\n" ;
+
+}
+sub setpops {      
+ my ($pops) = @_  ; 
+ local (@a, $d, $b, $e) ; 
+
+ if (-e $pops) {  
+  open (FF1, $pops) || die "can't open $pops\n" ;
+  @P = () ;
+  foreach $line (<FF1>) { 
+  ($a) = split " ", $line ;
+  next unless (defined $a) ;
+  next if ($a =~ /\#/) ;
+  push  @P, $a ;
+  }
+  $out = join ":", @P ; 
+  print "## pops: $out\n" ;
+  ($b, $d , $e) = fileparse($pops) ;
+  return $b ;
+ }
+ @P = split $zsep, $pops ;
+ return $pops ;
+
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Fst_ave.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Fst_ave.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,228 @@
+/* Fst_ave -- determine the average Fst values between two specified populations
+*  and between two random populations
+*
+*    argv{1] = a Galaxy SNP table. For each of several individuals, the table
+*              has four columns (#A, #B, genotype, quality).
+*    argv[2] = 1 if Fst is estimated from SAMtools genotypes; 0 means use
+*	        read-coverage data.
+*    argv[3] = lower bound, for individual quality value if argv[2] = 1
+*	       or for total number of reads per population if argv[2] = 0.
+*	       SNPs not satisfying these lower bounds are ignored.
+*    argv[4] = 1 to discard SNPs that appear fixed in the two populations
+*    argv[5] = 1 for unbiased estimator, else 0 for the original Wright form.
+*    argv[6] = k => 0 says report the average Fst and the largest average over k
+*              randomly chosen splits into two populations of those sizes
+*    argv[7], argv[8], ...,  have the form "13:1", "13:2" or "13:0", meaning
+*             that the 13th and 14th columns (base 1) give the allele counts
+*             for an individual that is in population 1, in population 2,
+*             or in neither population.
+
+What it does on Galaxy
+
+The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to select individuals from a SNP table. No individual can be in both populations. Other choices are as follows.
+
+Data soure. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele, or by adding the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the data source, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual. SMPs not meeting these lower bounds are ignored.
+
+The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded.
+
+The user chooses which definition of Fst to use: Wright's original definition or Weir's unbiased estimator.
+
+Finally, the user decides whether to use randomizations. If so, then the user specifies how many randomly generated population pairs (retaining the numbers of individuals of the originals) to generate, as well as the "population" of additional individuals (not in the first two popuations) that can be used in the ransmization process.
+
+The program prints the average Fst for the original populations and the number of SNPs used to compute it. If randomizations were requested, it prints the average Fst for each randomly generated population pair, ending with a summary that includes the maximum and average value, and the highest-scoring population pair.
+*/
+
+#include "lib.h"
+#include "Fst_lib.h"
+
+// maximum legth of a line from the table
+#define MOST 5000
+
+// information about the specified individuals
+// x is an array of nI values 0, 1, or 2;
+// shuffling x creates random "populations"
+int col[MOST], x[MOST], best_x[MOST];
+int nI, lower_bound, unbiased, discard, genotypes, nsnp;
+
+// each SNP has an array of counts
+struct count {
+	int A, B;
+};
+
+// linked list summarizes the Galaxy table
+struct snp {
+	struct count *c;
+	struct snp *next;
+} *start, *last;
+
+// given the two populations specified by x[], return the average Fst
+double ave_Fst() {
+	double tot_Fst;
+	struct snp *s;
+	int i, A1, B1, A2, B2, too_few;
+
+
+	// scan the SNPs
+	tot_Fst = 0.0;
+	nsnp = 0;
+	for (s = start; s != NULL; s = s->next) {
+		// get counts for the two populations at this SNP
+		for (A1 = B1 = A2 = B2 = i = 0; i < nI; ++i) {
+			if (s->c[i].A < 0) // no genotypes
+				continue;
+			if (x[i] == 1) {
+				A1 += s->c[i].A;
+				B1 += s->c[i].B;
+			} else if (x[i] == 2) {
+				A2 += s->c[i].A;
+				B2 += s->c[i].B;
+			}
+		}
+		if (discard && ((A1 == 0 && A2 == 0) || (B1 == 0 && B2 == 0)))
+			continue;	// fixed in these two populations
+		too_few = (genotypes ? 1 : lower_bound);
+		if (A1+B1 >= too_few && A2+B2 >= too_few) {
+			++nsnp;
+			tot_Fst += Fst(A1, B1, A2, B2, unbiased);
+		}
+	}
+	return tot_Fst/nsnp;
+}
+
+/* shuffle the values x[0], x[1], ... , x[nI-1];
+*  Uses Algorithm P in page 125 of "The Art of Computer Programming (Vol II)
+*  Seminumerical Programming", by Donald Knuth, Addison-Wesley, 1971.
+*/
+void shuffle() {
+	int i, j, temp;
+
+	for (i = nI - 1; i > 0; --i) {
+		// swap what's in location i with location j, where 0 <= j <= i
+		j = random() % (i+1);
+		temp = x[i];
+		x[i] = x[j];
+		x[j] = temp;
+	} 
+}
+
+int main(int argc, char **argv) {
+	FILE *fp;
+	char *p, *z = "\t\n", buf[MOST];
+	int X[MOST], nshuff, n, i, j, k, saw[3], larger, all = 1;
+	struct snp *new;
+	double F, F1, largest_F, tot_F;
+
+	if (argc < 7)
+		fatal("args: table data-source lower_bound discard? unbiased? #shuffles n:1 m:2 ...");
+
+	// handle command-line arguments
+	genotypes = atoi(argv[2]);
+	lower_bound = atoi(argv[3]);
+	if (!genotypes && lower_bound <= 0)
+		fatal("minimum coverage should exceed 0");
+	discard = atoi(argv[4]);
+	unbiased = atoi(argv[5]);
+	nshuff = atoi(argv[6]);
+	saw[0] = saw[1] = saw[2] = 0;
+	// populations 1 and 2 must be disjoint
+	// population 0 can be replaced by population 1 or 2 
+	for (i = 7; i < argc; ++i) {
+		if (sscanf(argv[i], "%d:%d", &j, &k) != 2)
+			fatalf("not like 13:2 : %s", argv[i]);
+		if (k < 0 || k > 2)
+			fatalf("not population 0, 1 or 2: %s", argv[i]);
+		saw[k] = 1;
+		// seen this individual (i.e., column) before??
+		for (n = 0; n < nI && col[n] != j; ++n)
+			;
+		if (n < nI) { // OK if one of the populations is 0
+			if (k > 0) {
+				if (x[n] > 0 && x[n] != k)
+				  fatalf("column %d is in both populations", j);
+				x[n] = k;
+			}
+		} else {
+			col[nI] = j;
+			x[nI] = k;
+			++nI;
+		}
+	}
+	if (saw[1] == 0)
+		fatal("population 1 is empty");
+	if (saw[2] == 0)
+		fatal("population 2 is empty");
+
+	// read the table of SNPs and store the essential allele counts
+	fp = ckopen(argv[1], "r");
+	while (fgets(buf, MOST, fp)) {
+		if (buf[0] == '#')
+			continue;
+		new = ckalloc(sizeof(*new));
+		new->next = NULL;
+		new->c = ckalloc(nI*sizeof(struct count));
+		// set X[i] = atoi(i-th word of buf), i is base 1
+		for (i = 1, p = strtok(buf, z); p != NULL;
+		  ++i, p = strtok(NULL, z))
+			X[i] = atoi(p);
+		for (i = 0; i < nI; ++i) {
+			n = col[i];
+			if (genotypes) {
+				k = X[n+2];
+				if (k == -1 || X[n+3] < lower_bound)
+					new->c[i].A = new->c[i].B = -1;
+				else {
+					new->c[i].A = k;
+					new->c[i].B = 2 - k;
+				}
+			} else {
+				new->c[i].A = X[n];
+				new->c[i].B = X[n+1];
+			}
+		}
+		if (start == NULL)
+			start = new;
+		else
+			last->next = new;
+		last = new;
+	}
+	fclose(fp);
+
+	F1 = ave_Fst();
+	printf("average Fst is %5.5f, using %d SNPs\n", F1, nsnp);
+	for (j = 0; j < nI; ++j)
+		best_x[j] = x[j];
+	for (tot_F = largest_F = 0.0, larger = i = 0; i < nshuff; ++i) {
+		shuffle();
+		if ((F = ave_Fst()) > F1)
+			++larger;
+		if (F > largest_F) {
+			largest_F = F;
+			for (j = 0; j < nI; ++j)
+				best_x[j] = x[j];
+		}
+		tot_F += F;
+		if (all)	// make this optional?
+			printf("%d: %f\n", i+1, F);
+	}
+	if (nshuff > 0) {
+		printf("%d of %d random groupings had a larger average Fst\n",
+		  larger, nshuff);
+		printf("largest = %5.5f, mean = %5.5f\n", largest_F,
+		  tot_F/nshuff);
+		if (largest_F > F1) {
+			printf("first columns for the best two populations:\n");
+			for (i = 0; i < nI; ++i)
+				if (best_x[i] == 1)
+					printf("%d ", col[i]);
+			printf("and\n");
+			for (i = 0; i < nI; ++i)
+				if (best_x[i] == 2)
+					printf("%d ", col[i]);
+			putchar('\n');
+		}
+	}
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Fst_column.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Fst_column.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,132 @@
+/* Fst_column -- add an Fst column to a Galaxy table
+*
+*    argv{1] = a Galaxy SNP table. For each of several individuals, the table
+*              has four columns (#A, #B, genotype, quality).
+*    argv[2] = 1 if Fst is estimated from SAMtools genotypes; 0 means use
+*	        read-coverage data.
+*    argv[3] = lower bound for total number of reads per population
+*    argv[4] = lower bound for individual quality value
+*    argv[5] = 1 to retain SNPs that fail to satisfy the lower bound and set
+*	       Fst = -1; delete them if argv[4] = 0.
+*    argv[6] = 1 to discard SNPs that appear fixed in the two populations
+*    argv[7] = 1 for unbiased estimator, else 0 for the original Wright form.
+*    argv[8], argv[9], ...,  have the form "13:1" or "13:2", meaning that
+*             the 13th, 14th, and 15th columns (base 1) give the allele counts
+*             and genotype for an individual that is in population 1 or
+*	      population 2, respectively.
+
+What It Does on Galaxy
+
+The user specifies a SNP table and two "populations" of individuals, both previously defined using the Galaxy tool to select individuals from a SNP table. No individual can be in both populations. Other choices are as follows.
+
+Data soure. The allele frequencies of a SNP in the two populations can be estimated either by the total number of reads of each allele, or by adding the frequencies inferred from genotypes of individuals in the populations.
+
+After specifying the data source, the user sets lower bounds on amount of data required at a SNP. For estimating the Fst using read counts, the bound is the minimum count of reads of the two alleles in a population. For estimations based on genotype, the bound is the minimum reported genotype quality per individual.
+
+The user specifies whether the SNPs that violate the lower bound should be ignored or the Fst set to -1.
+
+The user specifies whether SNPs where both populations appear to be fixed for the same allele should be retained or discarded.
+
+Finally, the user chooses which definition of Fst to use: Wright's original definition or Weir's unbiased estimator.
+
+A column is appended to the SNP table giving the Fst for each retained SNP.
+
+*/
+
+#include "lib.h"
+#include "Fst_lib.h"
+
+// most characters allowed in a row of the table
+#define MOST 5000
+
+// column and population for the relevant individuals/groups
+int col[MOST], pop[MOST];
+int nI;
+
+int main(int argc, char **argv) {
+	FILE *fp;
+	char *p, *z = "\t\n", buf[MOST], trash[MOST];
+	int X[MOST], min_cov, min_qual, retain, discard, unbiased, genotypes,
+	  n, i, g, A1, B1, A2, B2, saw[3], x1, y1, x2, y2;
+	double F;
+
+	if (argc < 7)
+		fatal("args: table data-source lower-bound retain? discard? unbiased? n:1 m:2 ...");
+	genotypes = atoi(argv[2]);
+	min_cov = atoi(argv[3]);
+	min_qual = atoi(argv[4]);
+	retain = atoi(argv[5]);
+	discard = atoi(argv[6]);
+	unbiased = atoi(argv[7]);
+	saw[1] = saw[2] = 0;
+	for (i = 8; i < argc; ++i, ++nI) {
+		if (sscanf(argv[i], "%d:%d", &(col[nI]), &(pop[nI])) != 2)
+			fatalf("not like 13:2 : %s", argv[i]);
+		if (pop[nI] < 1 || pop[nI] > 2)
+			fatalf("not population 1 or 2: %s", argv[i]);
+		saw[pop[nI]] = 1;
+		// seen this individual before?
+		for (n = 0; n < nI && col[n] != col[nI]; ++n)
+			;
+		if (n < nI)
+			fatalf("individual at column %d is mentioned twice",
+			  col[n]);
+	}
+	if (saw[1] == 0)
+		fatal("population 1 is empty");
+	if (saw[2] == 0)
+		fatal("population 2 is empty");
+
+	fp = ckopen(argv[1], "r");
+	while (fgets(buf, MOST, fp)) {
+		if (buf[0] == '#')
+			continue;
+		strcpy(trash, buf);
+		// set X[i] = atoi(i-th word of s), i is base 0
+		for (i = 1, p = strtok(trash, z); p != NULL;
+		  ++i, p = strtok(NULL, z))
+			X[i] = atoi(p);
+		for (i = A1 = B1 = A2 = B2 = x1 = y1 = x2 = y2 = 0;
+		     i < nI; ++i) {
+			n = col[i];
+			g = X[n+2];	// save genotype
+			if ((genotypes && g == -1) || X[n+3] < min_qual)
+				continue;
+			if (pop[i] == 1) {
+				// column n (base 1) corresponds to entry X[n]
+				x1 += X[n];
+				y1 += X[n+1];
+				if (genotypes) {
+					A1 += g;
+					B1 += (2 - g);
+				} else {
+					A1 += X[n];
+					B1 += X[n+1];
+				}
+			} else if (pop[i] == 2) {
+				x2 += X[n];
+				y2 += X[n+1];
+				if (genotypes) {
+					A2 += g;
+					B2 += (2 - g);
+				} else {
+					A2 += X[n];
+					B2 += X[n+1];
+				}
+			}
+		}
+		if (discard && ((A1 == 0 && A2 == 0) || (B1 == 0 && B2 == 0)))
+			continue; // not variable in the two populations
+		if (x1+y1 < min_cov || x2+y2 < min_cov)
+			F = -1.0;
+		else
+			F = Fst(A1, B1, A2, B2, unbiased);
+		if (F == -1.0 && !retain)
+			continue;
+		if ((p = strchr(buf, '\n')) != NULL)
+			*p = '\0';
+		printf("%s\t%5.4f\n", buf, F);
+	}
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Fst_lib.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Fst_lib.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,49 @@
+// procedure to compute either Wright's Fst or an unbiased estimator of if
+
+#include "lib.h"
+// Wright's Fst
+static double Wright(double f1, double f2) {
+	double
+	  f, // frequency in the pooled population
+	  H_ave, // average of HWE heterogosity in the two populations
+	  H_all; // HWE heterozygosity in the pooled popuations
+
+	H_ave = f1*(1.0 - f1) + f2*(1.0 - f2);
+	f = (f1 + f2)/2.0;
+	if (f == 0.0 || f == 1.0)
+		return 0.0;
+	H_all = 2.0*f*(1.0 - f);
+	return (H_all - H_ave) / H_all;
+}
+
+/* unbiased estimator of Fst from:
+  Weir, B.S. and Cockerham, C.C. 1984. Estimating F-statistics for the
+  analysis of population structure. Evolution 38: 1358–1370.
+as interpreted by:
+  Akey, J.M., Zhang, G., Zhang, K., Jin, L., and Shriver, M.D. 2002.
+  Interrogating a high-density SNP map for signatures of natural
+  selection. Genome Res. 12: 1805–1814.
+*/
+static double Weir(int n1, double p1, int n2, double p2) {
+	double F, p_bar, nc, MSP, MSG, N = n1 + n2; 
+
+	if (p1 == p2)
+		return 0.0;
+	MSG = (n1*p1*(1.0-p1) + n2*p2*(1.0-p2))/(N-1.0);
+	p_bar = (n1*p1 + n2*p2)/N;
+	MSP = n1*(p1-p_bar)*(p1-p_bar) + n2*(p2-p_bar)*(p2-p_bar);
+        nc = N - (double)(n1*n1 + n2*n2)/N;
+	F = (MSP - MSG) / (MSP + (nc-1)*MSG);
+	if (F < 0.0)
+		F = 0.0;
+	return F;
+}
+
+double Fst(int nA1, int na1, int nA2, int na2, int unbiased) {
+	double p1, p2;
+
+	p1 = (double)nA1 / (double)(nA1+na1);
+	p2 = (double)nA2 / (double)(nA2+na2);
+
+	return (unbiased ? Weir(nA1+na1, p1, nA2+na2, p2) : Wright(p1, p2));
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Fst_lib.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Fst_lib.h	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,8 @@
+/* return either Sewall Wright's Fst or its Weir unbiased estimator
+*  parameters are as follows
+*  1, 2 : frequencies of the two alleles in population 1
+*  3, 4 : frequencies of the two alleles in population 2
+*  5 : 0 = return Wright's formulation, 1 = return unbiased estimator
+*/
+
+double Fst(int, int, int, int, int);
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Huang.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Huang.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,44 @@
+// Find highest scoring intervals, as discussed in Huang.h.
+
+#include "lib.h"
+#include "Huang.h"
+
+void Huang(double x[], int n) {
+	double Score, oldScore;
+	int v, L, i;
+
+	top = 0;	// don't use location 0, so as to follow Fig. 6
+	for (Score = 0.0, v = 0; v < n; ++v) {
+		oldScore = Score;
+		Score += x[v];
+		if (x[v] < 0)
+			continue;
+		if (top > 0 && R[top].Rpos == v-1) {
+			// add edge to top subpath
+			R[top].Rpos = v;
+			R[top].Rscore = Score;
+		} else {
+			// create a one-edge subpath
+			++top;
+			if (top >= MAX_R)
+				fatal("In Haung(), top is too big");
+			R[top].Lpos = v-1;
+			R[top].Lscore = oldScore;
+			R[top].Rpos = v;
+			R[top].Rscore = Score;
+			R[top].Lower = top-1;
+			while ((L = R[top].Lower) > 0 &&
+			  R[L].Lscore > R[top].Lscore)
+				R[top].Lower = R[L].Lower;
+		}
+		// merge subpaths
+		while (top > 1 && (L = R[top].Lower) > 0 &&
+		    R[L].Rscore <= R[top].Rscore) {
+			R[L].Rpos = R[top].Rpos;
+			R[L].Rscore = R[top].Rscore;
+			top = L;
+		}
+	}
+	for (i = 1; i <= top; ++i)
+		R[i].Score = R[i].Rscore - R[i].Lscore;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Huang.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Huang.h	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,20 @@
+/* Find intervals of highest total score, i.e., such that adding postions to
+*  either end will decrease the total. We use the method of Fig. 6 of the paper:
+*  Xiaoqiu Huang, Pavel Pevzner, Webb Miller (1994) Parametric recomputing in
+*  alignment graphs. Combinatorial Pattern Matching (Springer Lecture Notes in
+*  Computer Science, 807), 87-101.
+*
+*  The input scores are in x[0], x[1], ..., x[n-1], but the output regions
+*  are in R[1], R[2], ..., R[top]. R[i].Score is the total score of the i-th
+*  (in order of position) positive-scoring interval of x, which consists of of
+*  x[R[i].Lpos + 1] to x[R[i].Rpos].
+*/
+#define MAX_R 5000000
+
+struct region {	// a consecutive (relative to the reference) run of SNPs
+	double Lscore, Rscore, Score;
+	int Lpos, Rpos, Lower;
+} R[MAX_R];
+int top;
+
+void Huang(double *x, int n);
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/Makefile
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/Makefile	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,49 @@
+CC = gcc
+COPT = -O2
+CWARN = -W -Wall
+CFLAGS = $(COPT) $(CWARN)
+INSTALL_DIR = ../bin
+
+TARGETS = admix_prep coords2admix coverage dist_mat dpmix eval2pct \
+          Fst_ave Fst_column pop sweep
+
+all: $(TARGETS)
+
+install: $(TARGETS)
+	if [ ! -d "$(INSTALL_DIR)" ]; then mkdir -p "$(INSTALL_DIR)"; fi
+	cp $(TARGETS) $(INSTALL_DIR)
+
+admix_prep: admix_prep.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+coords2admix: coords2admix.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+coverage: coverage.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+dist_mat: dist_mat.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+dpmix: dpmix.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+eval2pct: eval2pct.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+Fst_ave: Fst_ave.c Fst_lib.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+Fst_column: Fst_column.c Fst_lib.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+pop: pop.c lib.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+sweep: sweep.c lib.c Huang.c
+	$(CC) $(CFLAGS) $^ -o $@
+
+.PHONY: clean
+
+clean:
+	rm -f $(TARGETS)
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/admix_prep.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/admix_prep.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,119 @@
+/* admix_prep -- prepare the ".ped" and ".map" files (PLINK format) for input to
+*  the "admixture" program.
+*
+*  argv[1] -- a Galaxy SNP table
+*  argv[2] -- required number of reads for each individual to use a SNP
+*  argv[3] -- required genotype quality for each individual to use a SNP
+*  argv[4] -- minimum spacing between SNPs on the same scaffold
+*  argv[k] for k > 4 have the form "13:fred", meaning that the 13th and 14th
+*    columns (base 0) give the allele counts for the individual or group named
+*    "fred".
+
+What it does on Galaxy
+The tool converts a SNP table into two tables, called "admix.map" and "admix.ped", needed for estimating the population structure. The user can read or download those files, or simply pass this tool's output on to other programs. The user imposes conditions on which SNPs to consider, such as the minimum coverage and/or quality value for every individual, or the distance to the closest SNP in the same contig (as named in the first column of the SNP table). A useful piece of information produced by the tool is the number of SNPs meeting those conditions, which can be found by clicking on the "eye" after the program runs.
+
+*/
+
+#include "lib.h"
+
+// bounds line length for a line of the Galaxy table
+#define MOST 5000
+struct individual {
+	int column;
+	char *name;
+} I[MOST/8]; // each individual has 4 columns and 4 tab characters
+int nI;	// number of individuals
+int X[MOST];	// integer values in a row of the SNP table
+
+// bounds the number of SNPs that can be kept
+#define MAX_KEEP 10000000
+char *S[MAX_KEEP];	// S[i] is a row of 2*nI alleles
+int nK;
+
+int main(int argc, char **argv) {
+	FILE *fp, *ped, *map;
+	char *p, *z = " \t\n", buf[MOST], trash[MOST], name[100], *s,
+	  scaf[100], prev_scaf[100];
+	int i, j, m, min_coverage, min_quality, min_space, nsnp, genotype,
+	   pos, prev_pos;
+
+	if (argc < 5)
+		fatal("args: Galaxy-table min-cov min-qual min-space 13:fred 16:mary ...");
+	min_coverage = atoi(argv[2]);
+	min_quality = atoi(argv[3]);
+	min_space = atoi(argv[4]);
+
+	for (i = 5; i < argc; ++i, ++nI) {
+		if (nI >= MOST/8)
+			fatal("Too many individuals");
+		if (sscanf(argv[i], "%d:%s", &(I[nI].column), name) != 2)
+			fatalf("bad arg: %s", argv[i]);
+		I[nI].name = copy_string(name);
+	}
+
+	map = ckopen("admix.map", "w");
+
+	fp = ckopen(argv[1], "r");
+	prev_scaf[0] = '\0';
+	prev_pos = 0;
+	for (nsnp = 0; fgets(buf, MOST, fp); ) {
+		if (buf[0] == '#')
+			continue;
+		++nsnp;
+		if (sscanf(buf, "%s %d", scaf, &pos) != 2)
+			fatalf("choke: %s", buf);
+		if (same_string(scaf, prev_scaf)) {
+			if (pos < prev_pos + min_space)
+				continue;
+		} else {
+			strcpy(prev_scaf, scaf);
+			prev_pos = -min_space;
+		}
+
+		// X[i] = atoi(i-th word base-1)
+		strcpy(trash, buf);
+		for (i = 1, p = strtok(trash, z); p != NULL;
+		     ++i, p = strtok(NULL, z))
+			X[i] = atoi(p);
+		for (i = 0; i < nI; ++i) {
+			m = I[i].column;
+			if (X[m] + X[m+1] < min_coverage || X[m+3] < min_quality)
+				break;
+		}
+		if (i < nI)
+			continue;
+		prev_pos = pos;
+		
+		if (nK >= MAX_KEEP)
+			fatal("Too many SNPs");
+		fprintf(map, "1 snp%d 0 %d\n", nsnp, nsnp+1);
+		s = S[nK++] = ckalloc(2*nI*sizeof(char));
+		for (i = j = 0; i < nI; ++i, j += 2) {
+			genotype = X[I[i].column+2];
+			if (genotype == 2)
+				s[j] = s[j+1] = '1';
+			else if (genotype == 0)
+				s[j] = s[j+1] = '2';
+			else if (genotype == 1) {
+				s[j] = '1';
+				s[j+1] = '2';
+			} else	// undefined genotype
+				s[j] = s[j+1] = '0';
+		}
+	}
+
+	fclose(map);
+
+	ped = ckopen("admix.ped", "w");
+	for (i = 0; i < nI; ++i) {
+		fprintf(ped, "%s 1 0 0 1 1", I[i].name);
+		for (j = 0; j < nK; ++j)
+			fprintf(ped, " %c %c", S[j][2*i], S[j][2*i+1]);
+		putc('\n', ped);
+	}
+
+	printf("Using %d of %d SNPs\n", nK, nsnp);
+	fclose(ped);
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/coords2admix.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/coords2admix.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,91 @@
+// coords2admix -- add projections onto chords to information about
+// coordinates in PCA plots
+
+#include "lib.h"
+
+#define MAX_POP 1000
+struct pop {
+	char *name;
+	float x, y;
+} P[MAX_POP];
+int nP;
+
+int main(int argc, char **argv) {
+	FILE *fp;
+	char buf[500], x[100], y[100], z[100], cur_pop[100];
+	int ncur, i, j, k;
+	float eig1, eig2, tot_x = 0.0, tot_y = 0.0, x1, y1, x2, y2, a, b, c, d;
+
+	if (argc == 1)
+		fp = stdin;
+	else if (argc == 2)
+		fp = ckopen(argv[1], "r");
+	else
+		fatal("optional arg: smartpca coordinates");
+
+	if (!fgets(buf, 500, fp))
+		fatal("empty set of coordinates");
+	if (sscanf(buf, "%s %s %s", x, y, z) != 3 ||
+	    !same_string(x, "#eigvals:"))
+		fatalf("cannot find eigenvalues: %s", buf);
+	printf("%s", buf);
+	eig1 = atof(y);
+	eig2 = atof(z);
+	//printf("eig1 = %f, eig2 = %f\n", eig1, eig2);
+	
+	strcpy(cur_pop, "");
+	ncur = 0;
+	while (fgets(buf, 500, fp)) {
+		if (sscanf(buf, "%*s %s %s %s", x, y, z) != 3)
+			fatalf("gag: %s", buf);
+		printf("%s", buf);
+		if (!same_string(cur_pop, z)) {
+			if (ncur > 0) {
+				P[nP].name = copy_string(cur_pop);
+				P[nP].x = tot_x/ncur;
+				P[nP].y = tot_y/ncur;
+				++nP;
+			}
+			ncur = 1;
+			strcpy(cur_pop, z);
+			tot_x = atof(x);
+			tot_y = atof(y);
+		} else {
+			++ncur;
+			tot_x += atof(x);
+			tot_y += atof(y);
+		}
+	}
+	P[nP].name = copy_string(cur_pop);
+	P[nP].x = tot_x/ncur;
+	P[nP].y = tot_y/ncur;
+	++nP;
+
+/*
+for (i = 0; i < nP; ++i)
+printf("%s %f %f\n", P[i].name, P[i].x, P[i].y);
+*/
+
+	// loop over pairs of populations
+	for (i = 0; i < nP; ++i) {
+		x1 = eig1*P[i].x;
+		y1 = eig2*P[i].y;
+		for (j = i+1; j < nP; ++j) {
+			printf("\nprojection along chord %s -> %s\n",
+			  P[i].name, P[j].name);
+			x2 = eig1*P[j].x;
+			y2 = eig2*P[j].y;
+			c = (x1-x2)*(x1-x2) + (y1-y2)*(y1-y2);
+			for (k = 0; k < nP; ++k)
+				if (k != i && k != j) {
+					a = eig1*P[k].x;
+					b = eig2*P[k].y;
+					d = (x2-x1)*(a-x1) + (y2-y1)*(b-y1);
+					printf("  %s: %f\n", P[k].name, d/c);
+				}
+		}
+	}
+
+	return 0;
+}
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/coverage.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/coverage.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,155 @@
+/* coverage -- report distributions of SNP coverage or quality for individuals,
+*  or coverage for populations
+*
+*    argv{1] -- a Galaxy SNP table. For each individuals, the table has four
+*		columns (count of each allele, genotype, quality).
+*    argv[2] -- 0 = sequence coverage, 1 = genotype quality
+*    argv[3] -- file name for the text version of output (input for producing
+*		the graphical summary goes to stdout)
+*    argv[4], argv[5], ...,  have the form "13:fred",  meaning that the 13th
+*		14th, and 16th columns (base 1) give the two allele counts
+*		and the quality for "fred", where "fred" can be the name of
+*		a population with several individuals (all named "fred")
+What it does on Galaxy
+The tool reports distributions of SNP reliability indicators for individuals or populations. The reliability can be measured by either the sequence coverage or the SAMtools quality value, though the notion of a population-level quality is not supported. Textual and graphical reports are generated, where the text output gives the cumulative distributions.
+*/
+
+#include "lib.h"
+
+// maximum length of a line from the table
+#define MOST 5000
+
+// the largest coverage or quality value being considered
+#define MAX_VAL 1000
+
+FILE *gp;	// for text output
+
+// a population is the set of all indivuals with the same name
+// (perhaps just a single individual)
+struct pop {
+	int cov, n[MAX_VAL+1];
+	long long sum, tot;
+	char *name;
+} P[MOST/4];
+int nP;	// number of populations
+
+// maps column to population
+struct individual {
+	int col, pop;
+} I[MOST/4];
+int nI;
+
+/* Report the distribution for each individual. P[i].n[k] is the number of SNPs
+*  of value (coverage or quality) k in population i, for k < MAX_VAL;
+*  I[i].n[MAX_VAL] is the number of SNPs of value k >= MAX_VAL.
+*  We print the percentages, p, of SNPs with value <= k, ending when all
+*  populations have reached a p >= 98%.
+*/
+void print_cov() {
+	int i, j, k, last_j;
+	long long sum;
+
+	// find where to stop printing
+	for (last_j = i = 0; i < nP; ++i) {
+		for (sum = j = 0; j <= MAX_VAL; ++j)
+			sum += P[i].n[j];
+		P[i].tot = sum;
+		for (sum = j = 0; j <= MAX_VAL; ++j) {
+			sum += P[i].n[j];
+			if (sum >= 0.98*P[i].tot)
+				break;
+		}
+		last_j = MAX(last_j, j);
+	}
+
+
+	++last_j;
+	// print to stdout the output for graphing; not broken into short lines
+	for (j = 0; j < last_j; ++j)
+		printf("\t%3d", j);
+	putchar('\n');
+	for (i = 0; i < nP; ++i) {
+		printf("%s", P[i].name);
+		for (sum = j = 0; j < last_j; ++j) {
+			sum += P[i].n[j];
+			printf("\t%4.2f", 100.0*(float)sum/(float)P[i].tot);
+		}
+		putchar('\n');
+	}
+
+	// print a user-friendly version to the named file
+	// <= 20 numbers per row
+	for (j = 0; j < last_j; j += 20) {
+		fprintf(gp, "\n          ");
+		for (k = j; k < MIN(j+20, last_j); ++k)
+			fprintf(gp, "%3d", k);
+		for (i = 0; i < nP; ++i) {
+			fprintf(gp, "\n%10s", P[i].name);
+			for (k = j; k < MIN(j+20, last_j); ++k) {
+				P[i].sum += P[i].n[k];
+				fprintf(gp, "%3lld",
+				  MIN(99, 100*P[i].sum/P[i].tot));
+			}
+		}
+		fprintf(gp,"\n\n");
+	}
+}
+
+int main(int argc, char **argv) {
+	FILE *fp;
+	char buf[MOST], *z = " \t\n", *p;
+	int X[MOST], i, j, cov, m, quality, is_pop;
+
+	if (argc < 5)
+		fatal("args: SNP-file quality-value? out-name 13:fred ... ");
+	quality = atoi(argv[2]);
+	gp = ckopen(argv[3], "w");
+	// record the individuals and populations
+	for (nI = 0, i = 4; i < argc; ++i, ++nI) {
+		if (nI >= MOST)
+			fatal("Too many individuals");
+		// allow spaces in names
+		if ((p = strchr(argv[i], ':')) == NULL)
+			fatalf("no colon: %s", argv[i]);
+		I[nI].col = atoi(argv[i]);
+		for (j = 0; j < nP && !same_string(p+1, P[j].name); ++j)
+			;
+		if (j == nP) { // new population
+			is_pop = 1;
+			P[nP++].name = copy_string(p+1);
+		}
+		I[nI].pop = j;
+	}
+	if (is_pop && quality)
+		fatal("quality values for a population are not supported.");
+
+	// Record the number of SNPs with coverage 0, 1, ..., MAX_VAL-1,
+	// or >= MAX_VAL for each individual.
+	fp = ckopen(argv[1], "r");
+	while (fgets(buf, MOST, fp)) {
+		if (buf[0] == '#')
+			continue;
+		// P[i].cov is the total coverage for all individuals in pop i
+		for (i = 0; i < nP; ++i)
+			P[i].cov = 0;
+		// X[i] = atoi(i-th word base-1)
+		for (i = 1, p = strtok(buf, z); p != NULL;
+		     ++i, p = strtok(NULL, z))
+			X[i] = atoi(p);
+		for (i = 0; i < nI; ++i) {
+			m = I[i].col;
+			if (quality)
+				cov = X[m+3];
+			else
+				cov = X[m] + X[m+1];
+			P[I[i].pop].cov += cov;
+		}
+		for (i = 0; i < nP; ++i)
+			P[i].n[MIN(P[i].cov, MAX_VAL)]++;
+	}
+
+	// Print the distributions.
+	print_cov();
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/dist_mat.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/dist_mat.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,196 @@
+/* dist_mat -- create a distance matrix in PHYLIP format for pairs of
+*  specified individuals, including by default the reference sequence
+*
+*  argv[1] -- a Galaxy SNP table
+*  argv[2] -- min coverage
+*  argv[3] -- min quality
+*  argv[4] -- name of reference species (or "none")
+*  argv[5] -- 0 = distance from coverage; 1 = distance from genotype
+*  argv[6] -- name of file for the numbers of informative SNPs
+*  argv[7] -- name of file to write the Mega-format distance matrix
+*  argv[k] for k > 7 have the form "13:fred", meaning that the 13th and 14th
+*    columns (base 0) give the allele counts for the individual or group named
+*    "fred".
+
+What it does on Galaxy
+This tool uses the selected SNP table to determine a "genetic distance" between each pair of selected individuals; the table of pairwise distances can be used by the Neighbor-Joining methods to construct a tree that depicts how the individuals are related. For a given pair of individuals, we find all SNP positions where both individuals have at least a minimum number of sequence "reads"; the individuals' distance at that SNP is defined as the absolute value of difference in the frequency of the first allele (equivalently: the second allele). For instance, if the first individuals has 5 reads of each allele and the second individual has respectivley 3 and 6 reads, then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that SNP (provided that the minimum read total is at most 9). The output includes a report of the numbers of SNPs passing that thresold for each pair of individuals.
+
+*/
+
+#include "lib.h"
+
+// bounds line length for a line of the Galaxy table
+
+#define MOST 5000
+#define MIN_SNPS 3
+
+struct argument {
+	int column;
+	char *name;
+} A[MOST];
+int nA;	// number of individuals or groups + 1 (for the reference species)
+
+#define MOST_INDIVIDUALS 100
+#define SIZ 1+MOST_INDIVIDUALS // includes the reference
+
+double tot_diff[SIZ][SIZ];
+int ndiff[SIZ][SIZ], X[MOST];
+
+int main(int argc, char **argv) {
+	FILE *fp, *gp, *mega;
+	char *p, *z = "\t\n", buf[MOST], name[100], B[100], C[100], D[100],
+	  *nucs = "ACGT";
+	int i, j, m, n, min_coverage, too_few, ref_allele = -1, has_ref,
+	  min_quality, genotype;
+	double fi, fj, dist;
+
+	if (argc < 8)
+		fatal("args: Galaxy-table min-cov min-qual min-snp ref-name genotype dist-out mega-out 13:fred 16:mary ...");
+	min_coverage = atoi(argv[2]);
+	min_quality = atoi(argv[3]);
+	if (min_coverage <= 0 && min_quality <= 0)
+		fatal("coverage and/or quality of SNPs should be constrained");
+
+	if (same_string(argv[4], "none"))
+		has_ref = 0;
+	else {
+		has_ref = 1;
+		A[0].name = copy_string(argv[4]);
+	}
+	genotype = atoi(argv[5]);
+	gp = ckopen(argv[6], "w");
+	mega = ckopen(argv[7], "w");
+	fprintf(mega, "#mega\n!Title: Galaxy;\n");
+	  
+	for (nA = has_ref, i = 8; i < argc; ++i, ++nA) {
+		if (nA >= SIZ)
+			fatal("Too many individuals");
+		if (sscanf(argv[i], "%d:%s", &(A[nA].column), name) != 2)
+			fatalf("bad arg: %s", argv[i]);
+		A[nA].name = copy_string(name);
+	}
+	fprintf(mega,
+	  "!Format DataType=Distance DataFormat=LowerLeft NTaxa=%d;\n\n",
+	  nA);
+	for (i = 0; i < nA; ++i)
+		fprintf(mega, "[%d] #%s\n", i+1, A[i].name);
+	fprintf(mega, "\n\n\n[");
+	for (i = 1; i <= nA; ++i)
+		fprintf(mega, "%4d", i);
+	fprintf(mega, " ]\n");
+	fp = ckopen(argv[1], "r");
+	while (fgets(buf, MOST, fp)) {
+		if (buf[0] == '#')
+			continue;
+		if (has_ref) {
+			// get the reference allele
+			if (sscanf(buf, "%*s %*s %s %s %*s %*s %*s %s", B, C, D)
+			    != 3)
+				fatalf("3 fields: %s", buf);
+			if (strchr(nucs, B[0]) == NULL ||
+			    strchr(nucs, C[0]) == NULL)
+				fatalf("not nucs : %s %s", B, C);
+			if (D[0] == B[0])
+				ref_allele = 1;
+			else if (D[0] == C[0])
+				ref_allele = 2;
+			else if (strchr(nucs, D[0]) != NULL)
+				ref_allele = 3;
+			else {
+				if (D[0] != '-' && D[0] != 'N')
+					fatalf("what is this: %s", D);
+				ref_allele = -1;
+			}
+		}
+			
+		// X[i] = atoi(i-th word base-1)
+		for (i = 1, p = strtok(buf, z); p != NULL;
+		     ++i, p = strtok(NULL, z))
+			X[i] = atoi(p);
+		for (i = has_ref; i < nA; ++i) {
+			m = A[i].column;
+			if (X[m] + X[m+1] < min_coverage ||
+			    X[m+3] < min_quality)
+				continue;
+
+			// frequency of the second allele
+			if (genotype) {
+				if (X[m+2] == -1)
+					continue;	// no genotype
+				fi = (double)X[m+2];
+			} else
+				fi = (double)X[m+1] / (double)(X[m]+X[m+1]);
+			if (has_ref && ref_allele > 0) {
+				ndiff[0][i]++;
+				// reference allele might be different from both
+				if (ref_allele == 1)
+					tot_diff[0][i] += fi;
+				else if (ref_allele == 2)
+					tot_diff[0][i] += (1.0 - fi);
+				else
+					tot_diff[0][i] += 1.0;
+			}
+			for (j = i+1; j < nA; ++j) {
+				n = A[j].column;
+				if (X[n] + X[n+1] < min_coverage ||
+				   X[n+3] < min_quality)
+					continue;
+				if (genotype && X[n+2] == -1)
+					continue;
+				ndiff[i][j]++;
+				if (genotype)
+					fj = (double)X[n+2];
+				else
+					fj = (double)X[n+1] /
+					     (double)(X[n] + X[n+1]);
+				fj -= fi;
+				// add abs. value of difference in frequencies
+				tot_diff[i][j] += (fj >= 0.0 ? fj : -fj);
+			}
+
+		}
+	}
+	for (i = too_few = 0; i < nA; ++i)
+		for (j = i+1; j < nA; ++j)
+			if (ndiff[i][j] < MIN_SNPS) {
+				too_few = 1;
+				fprintf(stderr,
+				  "%s and %s have only %d informative SNPs\n",
+				  A[i].name, A[j].name, ndiff[i][j]);
+			}
+	if (too_few)
+		fatal("remove individuals or relax constraints");
+		
+	// print distances
+	printf("%d\n", nA);
+	for (i = 0; i < nA; ++i) {
+		printf("%9s", A[i].name);
+		fprintf(mega, "[%d] ", i+1);  
+		for (j = 0; j < i; ++j) {
+			dist = tot_diff[j][i]/(double)ndiff[j][i];
+			printf(" %6.4f", dist);
+			fprintf(mega, " %6.4f", dist);
+		}
+		fprintf(mega, "  \n");
+		printf(" 0.0000");
+		for (j = i+1; j < nA; ++j)
+			printf(" %6.4f",
+			  tot_diff[i][j]/(double)ndiff[i][j]);
+		putchar('\n');
+	}
+	fprintf(mega, "\n\n\n\n\n");
+	fclose(mega);
+
+	// print numbers of SNPs
+	for (i = 0; i < nA; ++i) {
+		fprintf(gp, "%9s", A[i].name);
+		for (j = 0; j < i; ++j)
+			fprintf(gp, " %8d", ndiff[j][i]);
+		fprintf(gp, "        0");
+		for (j = i+1; j < nA; ++j)
+			fprintf(gp," %8d", ndiff[i][j]);
+		putc('\n', gp);
+	}
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/dpmix.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/dpmix.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,510 @@
+/* dpmix -- admixture using dynamic programming
+*
+*    argv{1] = a Galaxy SNP table. For each of several individuals, the table
+*              has four columns (#A, #B, genotype, quality) -- SNPs on the same
+*	       chromosome must appear together, and in order of position
+*    argv[2] = column with the chromosome name (position is the next column)
+*    argv[3] = "all" or e.g., "chr20"
+*    argv[4] = 1 if ancestral allele frequencies are estimated from SAMtools
+*		genotypes; 0 means use read-coverage data.
+*    argv[5] = 1 to add logarithms of probabilities, allowing unobserve alleles,
+*	       0 to simply add probabilities
+*    argv[6] = switch penalty (>= 0)
+*    argv[7] = file giving heterochromatic intervals ('-' means that no file is
+*	       given)
+*    argv[8] = file name for additional output
+*    argv[9], argv[10], ...,  have the form "13:1:Peter", "13:2:Paul" or
+*	      "13:0:Mary", meaning that the 13th and 14th columns (base 1)
+*	      give the allele counts for an individual that is in ancestral
+*	      population 1, ancestral population 2, or is a potentially admixed
+*	      individual, resp.
+
+What it does on Galaxy
+The user specifies two "ancestral" populations (i.e., sources for chromosomes) and a set of potentially admixed individuals, and chooses between the sequence coverage or the estimated genotypes to measure the similarity of genomic intervals in admixed individuals to the two classes of ancestral chromosomes. The user also picks a "switch penalty", typically between 10 and 100. For each potentially admixed individual, the program divides the genome into three "genotypes": (0) homozygous for the second ancestral population (i.e., both chromosomes from that population), (1) heterozygous, or (2) homozygous for the second ancestral population. Parts of a reference chromosome that are labeled as "heterochromatic" are given the non-genotype, 3. Smaller values of the switch penalty (corresponding to more ancient admixture events) generally lead to the reconstruction of more frequent changes between genotypes.
+*/
+
+#include "lib.h"
+//#include <math.h>
+
+// maximum length of a line from the table
+#define MOST 5000
+
+// we create a linked list of "events" on a chromosome -- mostly SNPs, but
+// also ends of hetorochomatic intervals
+struct snp {
+	double F1, F2;	// reference allele frequencies in the two populations
+	int pos, *g,	// position and an array of admixed genotypes
+	  type;		// 0 = SNP, 1 = start of het. interval, 2 = end
+	struct snp *prev;	// we keep the list in order of decreasing pos
+} *last;
+
+// array of potentially admixed individuals
+struct admixed {
+	char *name;
+	int gcol, ge20, gt02;
+	long long x[4];		// number of reference bp in each state
+} A[MOST];
+
+// information about "ancestral" individuals, namely column and population
+struct ances {
+	int col, pop;
+	char *name;
+} C[MOST];
+
+// heterochromatic intervals
+struct het {
+	char *chr;
+	int b, e;
+} H[MOST];
+
+// global variables
+int *B[4],	// backpointer to state at the previous SNP (or event)
+    *P;		// chromosome position
+int nH, nI, nG, genotypes, nsnp, debug, chr_col, logs;
+char this_chr[100];
+double switch_penalty;
+char buf[MOST], *status;
+FILE *fp, *out;
+
+// probability of producing genotype g in admixture state s
+// given reference allele frequencies f1 and f2 in the ancestral populations
+double score (double f1, double f2, int g, int s) {
+	double p;
+
+	if (s == 2) { // homozygous for the first ancestral population
+		if (g == 2)
+			p = f1*f1;
+		else if (g == 0)
+			p = (1.0-f1)*(1.0-f1);
+		else
+			p = 2.0*f1*(1.0-f1);
+	} else if (s == 0) { // homozygous for the second ancestral population
+		if (g == 2)
+			p = f2*f2;
+		else if (g == 0)
+			p = (1.0-f2)*(1.0-f2);
+		else
+			p = 2.0*f2*(1.0-f2);
+	} else { // one chromosome from each ancestral population
+		if (s != 1)
+			fatalf("bad state %d", s);
+		if (g == 2)
+			p = f1*f2;
+		else if (g == 0)
+			p = (1.0-f1)*(1.0-f2);
+		else
+			p = f1*(1.0-f2)  + (1.0-f1)*f2;
+	}
+	
+	if (p < 0.0)
+		fatalf("%f %f %d %d => %f", f1, f2, g, s, p);
+	if (!logs)
+		return p;
+#ifdef NEVER
+	if (p == 0.0)
+		return -5.0;
+	p = log(p);
+	if (p < -5.0)
+		p = -5.0;
+	return p;
+#endif
+	fatal("dpmix: cannot happen");
+}
+
+char *get_chr_name() {
+	static char tmp[MOST];
+	char *s, *z = "\t\n";
+	int i = chr_col;
+
+	strcpy(tmp, buf);
+	s = strtok(tmp, z);
+	while (--i > 0)
+		s = strtok(NULL, z);
+	return s;
+}
+
+/* Process the a-th potentially admixed individual.
+*  We think of a graph with nodes (event, state) for each event (SNP or
+*  end-point of a heterochromatic interval on the current chromosome) and state
+*  = 0, 1, 2, 3 (corresponding to genotypes 0, 1, and 2, plus 3 =
+*  heterochromatin); for events other than the last one, there are edges from
+*  each (event, state) to (event+1, k) for 0 <= k <= 3. An edge (event, j) to
+*  (event+1, k) has penalty 0 if j = k and penalty switch_penalty otherwise.
+*  The bonus at SNP node (event, state) for 0 <= state <= 2 is the probability
+*  of generating the genotype observed in the a-th potentially admixed
+*  individual given the allele frequences in the two ancestral populations and
+*  the assumed admixture state in this region of the chromosome. The score of a
+*  path is the sum of the node bonuses minus the sum of the edge penalties.
+*
+*  Working backwards through the events, we compute the maximum path score,
+*  from[state], from (event,state) back to the closest admixed interval.
+*  To force paths to reach state 3 at an event signalling the start of a
+*  heterochromatic interval (type = 1), but to avoid state 3 at other events,
+*  we assign huge but arbitrary negative scores (see "avoid", below).
+*  At (event,state), B[event][state] is the backpointer to the state at
+*  event+1 on an optimal path. Finally, we follow backpointers to partition
+*  the chromosome into admixture states.
+*/
+void one_admix(int a) {
+	int i, j, m, state, prev_pos, b;
+	double from[4], f[4], ff[4], avoid = -1000000.0;
+	struct snp *p;
+
+	// from[i] = highest score of a path from the current event
+	// (usually a SNP) to the next (to the right) heterochromatic interval
+	// or the end of the chromosome. The score of the path is the sum of
+	// SNP scores minus (switch_penalty times number of state switches). 
+	// We assume that the last two event on the chromosome are the start
+	// and end of a heterochromatic interval (possibly of length 0)/
+	for (i = 0; i < 4; ++i)
+		from[i] = 0;
+	for (i = nsnp-1, p = last; i >= 0 && p != NULL; --i, p = p->prev) {
+		for (state = 0; state < 4; ++state) {
+			// find highest path-score from this event onward
+			for (m = j = 0; j < 4; ++j) {
+				f[j] = from[j];
+				if (j != state)
+					f[j] -= switch_penalty;
+				//if (abs(j-state) == 2)
+					//from[j] -= switch_penalty;
+				if (f[j] > f[m])
+					m = j;
+			}
+			B[state][i] = m;
+			ff[state] = f[m];
+			if (state < 3 && p->type == 0)
+				ff[state] +=
+				    score(p->F1, p->F2, p->g[a], state);
+		}
+		if (p->type == 1) {
+			// start of heterochomatic interval. Force paths
+			// reaching this point to go through state 3
+			from[3] = 0;
+			from[0] = from[1] = from[2] = avoid;
+		} else {
+			for (j = 0; j < 3; ++j)
+				from[j] = ff[j];
+			from[3] = avoid;
+		}
+		if (debug)
+			fprintf(stderr, "%d: %f(%d) %f(%d) %f(%d) %f(%d)\n",
+			  i, from[0], B[0][i], from[1], B[1][i], from[2],
+			  B[2][i], from[3], B[3][i]);
+	}
+
+	// find the best initial state
+	for (state = 0, j = 1; j < 4; ++j)
+		if (from[j] > from[state])
+			state = j;
+
+	// trace back to find the switch points
+	// A[a].x[state] records the total length of intervals in each state
+	for (prev_pos = i = 0; i < nsnp; ++i) {
+		if ((b = B[state][i]) != state) {
+			if (prev_pos < P[i+1]-1)
+				printf("%s\t%d\t%d\t%d\t%s\n",
+				  this_chr, prev_pos, P[i+1], state, A[a].name);
+			A[a].x[state] += (P[i+1]-prev_pos);
+			prev_pos = P[i+1];
+			state = b;
+		}
+	}
+} 
+
+// Add a heterochromatic interval to the SNP list, where type = 1 signifies
+// the start of the interval, 2 signifies the end.
+void add_het(int b, int type) {
+	struct snp *new = ckalloc(sizeof(struct snp));
+	int i;
+
+	new->F1 = new->F2 = 0.0;
+	new->pos = b;
+	new->type = type;
+	new->g = ckalloc(nG*sizeof(int));
+	for (i = 0; i < nG; ++i)
+		new->g[i] = 0;
+	new->prev = last;
+	last = new;
+}
+
+/* Process one chromosome. Read the SNPs on the chromosome (the first one is
+*  already in the buf). Boil each SNP down to the contents of a SNP entry
+*  (pos, F1, F2, g[]) and put it in the linked list. Also, intersperse the
+*  "events" corresponding to the start and end of a heterochromatic interval.
+*  Then call the dynamic-programming routine for each potentially admixed
+*  individual.
+*/
+void one_chr() {
+	char *s, *z = "\t\n";
+	int X[MOST], n, i, g, A1, B1, A2, B2, a, do_read, p, pos, het;
+	struct snp *new;
+	double F1, F2;
+
+	strcpy(this_chr, get_chr_name());
+	nsnp = 0;
+	last = NULL;
+	// advance to this chromosome in the list of heterochromatic intervals
+	for (het = 0; het < nH && !same_string(this_chr, H[het].chr); ++het)
+		;
+	// loop over the SNPs on the current chromosome
+	for (do_read = 0; ; do_read = 1) {
+		if (do_read && (status = fgets(buf, MOST, fp)) == NULL)
+			break; 
+		if (!same_string(get_chr_name(), this_chr))
+			break;
+		
+		// set X[i] = atoi(i-th word of buf), i is base 1
+		for (i = 1, s = strtok(buf, z); s != NULL;
+		  ++i, s = strtok(NULL, z))
+			X[i] = atoi(s);
+
+		// insert events (pseudo-SNPs) for heterochomatin intervals
+		// coming before the SNP
+		pos = X[chr_col+1];
+		while (het < nH && same_string(this_chr, H[het].chr) &&
+		   H[het].b < pos) {
+			add_het(H[het].b, 1);
+			add_het(H[het].e, 2);
+			nsnp+= 2;
+			++het;
+		}
+			
+		// should we discard this SNP?
+		if (pos == -1)	// SNP not mapped to the reference
+			continue;
+		for (i = 0; i < nG && X[A[i].gcol] >= 0; ++i)
+			;
+		if (i < nG)	// genotype of admixed individual not called
+			continue;
+
+		// add SNP to a "backward pointing" linked list, recording the
+		// major allele frequencies in the two reference populations
+		// and genotypes in the potential admixed individuals
+		for (i = A1 = B1 = A2 = B2 = 0; i < nI; ++i) {
+			n = C[i].col;
+			p = C[i].pop;
+			if (genotypes) {
+				g = X[n+2];
+				if (g == -1)
+					continue;
+				if (g < 0 || g > 2)
+					fatalf("invalid genotype %d", g);
+				if (p == 1) {
+					A1 += g;
+					B1 += (2 - g);
+				} else if (p == 2) {
+					A2 += g;
+					B2 += (2 - g);
+				}
+			} else {	// use read counts
+				if (p == 1) {
+					A1 += X[n];
+					B1 += X[n+1];
+				} else if (p == 2) {
+					A2 += X[n];
+					B2 += X[n+1];
+				}
+			}
+		}
+		if (A1+B1 == 0 || A2+B2 == 0)
+			continue;
+		++nsnp;
+		new = ckalloc(sizeof(struct snp));
+		new->pos = X[chr_col+1];
+		new->F1 = F1 = (double)A1/(double)(A1+B1);
+		new->F2 = F2 = (double)A2/(double)(A2+B2);
+		new->type = 0;
+		new->g = ckalloc(nG*sizeof(int));
+		for (i = 0; i < nG; ++i) {
+			g = new->g[i] = X[A[i].gcol];
+			if (score(F1, F2, g, 2) >= score(F1, F2, g, 0))
+				A[i].ge20++;
+			else 
+				A[i].gt02++;
+		}
+		if (F1 < 0.0 || F1 > 1.0)
+			fatalf("F1 = %f (A1 = %d, B1 = %d) at snp %d",
+			  F1, A1, B1, nsnp);
+		if (F2 < 0.0 || F2 > 1.0)
+			fatalf("F2 = %f (A2 = %d, B2 = %d) at snp %d",
+			  F2, A2, B2, nsnp);
+		new->prev = last;
+		last = new;
+	}
+	// insert heterochomatin intervals that follow all SN
+	while (het < nH && same_string(this_chr, H[het].chr)) {
+		add_het(H[het].b, 1);
+		add_het(H[het].e, 2);
+		nsnp += 2;
+		++het;
+	}
+/*
+printf("nsnp = %d\n", nsnp);
+for (i = nsnp-1, new = last; i >= 0 && new != NULL; --i, new = new->prev) {
+printf("%d %d ", new->pos, new->type);
+printf("%g %g ", new->F1, new->F2);
+for (a = 0; a < nG; ++a)
+printf("%d", new->g[a]);
+putchar('\n');
+}
+//exit(0);
+printf("\nbacktrace\n");
+*/
+
+	// allocate arrays for the DP analysis
+	P = ckalloc(nsnp*sizeof(int));	// position of each event
+	for (i = nsnp-1, new = last; i >= 0 && new != NULL;
+	     --i, new = new->prev)
+		P[i] = new->pos;
+
+	for (i = 0; i < 4; ++i) {	// space for back-pointers
+		B[i] = ckalloc((nsnp+1)*sizeof(int));
+		B[i][nsnp] = 0;
+	}
+	
+	// loop over possibly admixed individuals
+	for (a = 0; a < nG; ++a)
+		one_admix(a);
+
+	// free the allocated storage
+	while (last != NULL) {
+		new = last;
+		last = last->prev;
+		free(new->g);
+		free(new);
+	}
+	free(P);
+	for (i = 0; i < 4; ++i)
+		free(B[i]);
+}
+
+int main(int argc, char **argv) {
+	int n, i, j, k, saw[3];
+	long long het_len, ref_len;
+	float N;
+	char nam[100], *chr;
+
+	if (argc < 9)
+		fatal("args: table chr-col chr data-source logs switch heterochrom outfile n:1:name1 m:2:name2 ...");
+	if (same_string(argv[argc-1], "debug")) {
+		debug = 1;
+		--argc;
+	}
+
+	// handle command-line arguments
+	chr_col = atoi(argv[2]);
+	chr = argv[3];
+	genotypes = atoi(argv[4]);
+
+	logs = atoi(argv[5]);
+	if (logs)
+		fatal("logarithms of probabilities -- under development");
+	//if (logs) switch_penalty = log(switch_penalty);
+
+	switch_penalty = atof(argv[6]);
+	if (switch_penalty < 0.0)
+		fatal("negative switch penalty");
+	out = ckopen(argv[8], "w");
+
+	het_len = ref_len = 0;
+	if (!same_string(argv[7], "-")) {
+		fp = ckopen(argv[7], "r");
+		while (fgets(buf, MOST, fp)) {
+			if (nH >= MOST)
+				fatal("Too many heterochromatic intervals");
+			if (sscanf(buf, "%s %d %d", nam, &i, &j) != 3)
+				fatalf("gagging: %s", buf);
+			H[nH].chr = copy_string(nam);
+			H[nH].b = i;
+			H[nH].e = j;
+			// assumes last event per chrom. is a het. interval
+			if (nH > 0 && !same_string(nam, H[nH-1].chr))
+				ref_len += j;
+			het_len += (j - i);
+			++nH;
+		}
+		fclose(fp);
+	}
+	ref_len += H[nH-1].e;
+
+	// populations must be disjoint
+	saw[1] = saw[2] = 0;
+	for (i = 9; i < argc; ++i) {
+		if (sscanf(argv[i], "%d:%d:%s", &j, &k, nam) != 3)
+			fatalf("not like 13:2:fred : %s", argv[i]);
+		if (k < 0 || k > 2)
+			fatalf("not population 0, 1 or 2: %s", argv[i]);
+		saw[k] = 1;
+
+		// seen this individual (i.e., column) before??
+		for (n = 0; n < nI && C[n].col != j; ++n)
+			;
+		if (n < nI)
+			fatal("populations are not disjoint");
+		if (k == 0) {	// admixed individual
+			if (nG >= MOST)
+				fatal("Too many admixed individuals");
+			A[nG].name = copy_string(nam);
+			A[nG++].gcol = j+2;
+		} else {	// in an ancestral population
+			if (nI >= MOST)
+				fatal("Too many ancestral individuals");
+			C[nI].col = j;
+			C[nI].pop = k;
+			C[nI++].name = copy_string(nam);
+		}
+	}
+	if (saw[0] == 0)
+		fatal("no admixed individual is specified");
+	if (saw[1] == 0)
+		fatal("first reference population is empty");
+	if (saw[2] == 0)
+		fatal("second reference population is empty");
+
+	// start the output file of text
+	for (k = 1; k <= 2; ++k) {
+		fprintf(out, "state %d agrees with:", k == 1 ? 2 : 0);
+		for (i = 0; i < nI; ++i)
+			if (C[i].pop == k)
+				fprintf(out, " %s", C[i].name);
+		putc('\n', out);
+	}
+	putc('\n', out);
+
+	fp = ckopen(argv[1], "r");
+	while ((status = fgets(buf, MOST, fp)) != NULL && buf[0] == '#')
+		;
+	if (same_string(chr, "all"))
+		while (status != NULL)
+			one_chr();
+	else {	// skip to the specified chromosome
+		while (!same_string(chr, get_chr_name()) &&
+		       (status = fgets(buf, MOST, fp)) != NULL)
+			;
+		if (status != NULL)
+			one_chr();
+	}
+	for (i = 0; i < nG; ++i) {
+		fprintf(out,
+		  "%s: %d SNPs where state 2 is at least as likely as state 0\n",
+		  A[i].name, A[i].ge20);
+		fprintf(out,
+		  "%s: %d SNPs where state 0 is more likely than state 2\n\n",
+		  A[i].name, A[i].gt02);
+	}
+	// write fractions in each state to the output text file
+
+	if (ref_len)
+		fprintf(out,
+		  "%lld of %lld reference bp (%1.1f%%) are heterochromatin\n\n",
+		  het_len, ref_len, 100.0*(float)het_len/(float)ref_len);
+
+	for (i = 0; i < nG; ++i) {
+		N = (float)(A[i].x[0] + A[i].x[1] + A[i].x[2])/100.0;
+		fprintf(out, "%s: 0 = %1.1f%%, 1 = %1.1f%%, 2 = %1.1f%%\n",
+		  A[i].name, (float)A[i].x[0]/N, (float)A[i].x[1]/N,
+		  (float)A[i].x[2]/N); 
+	}
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/eval2pct.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/eval2pct.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,26 @@
+#include "lib.h"
+
+#define MAX_EVAL 1000
+
+float E[MAX_EVAL];
+int nE;
+
+int main (int argc, char **argv) {
+	FILE *fp;
+	char buf[500];
+	int i;
+	float tot;
+
+	fp = (argc== 1 ? stdin : ckopen(argv[1], "r"));
+	while (fgets(buf, 500, fp)) {
+		if (nE >= MAX_EVAL)
+			fatal("Too many eigenvalues");
+		E[nE++] = atof(buf);
+	}
+	for (tot = 0.0, i = 0; i < nE; ++i)
+		tot += E[i];
+	printf("Percentage explained by eigenvectors:\n");
+	for (i = 0 ; i < nE && E[i] > 0.0; ++i)
+		printf("%d: %1.1f%%\n", i+1, 100.0*(float)E[i]/tot);
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/lib.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/lib.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,71 @@
+// lib.c -- a little library of C procudures
+
+#include "lib.h"
+
+char *argv0;
+
+/* print_argv0 ---------------------------------------- print name of program */
+void print_argv0(void)
+{
+	if (argv0) {
+	char *p = strrchr(argv0, '/');
+	(void)fprintf(stderr, "%s: ", p ? p+1 : argv0);
+	}
+}
+
+/* fatal ---------------------------------------------- print message and die */
+void fatal(const char *msg)
+{
+	fatalf("%s", msg);
+}
+
+/* fatalf --------------------------------- format message, print it, and die */
+void fatalf(const char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	fflush(stdout);
+	print_argv0();
+	(void)vfprintf(stderr, fmt, ap);
+	(void)fputc('\n', stderr);
+	va_end(ap);
+	exit(1);
+}
+
+/* ckopen -------------------------------------- open file; check for success */
+FILE *ckopen(const char *name, const char *mode)
+{
+	FILE *fp;
+
+	if ((fp = fopen(name, mode)) == NULL)
+		fatalf("Cannot open %s.", name);
+	return fp;
+}
+
+/* ckalloc -------------------------------- allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+	void *p;
+
+	if ((long)amount < 0)                                  /* was "<= 0" -CR */
+		fatal("ckalloc: request for negative space."); 
+	if (amount == 0)
+		amount = 1; /* ANSI portability hack */
+	if ((p = malloc(amount)) == NULL)
+		fatalf("Ran out of memory trying to allocate %lu.",
+			(unsigned long)amount);
+	return p;
+}
+
+/* same_string ------------------ determine whether two strings are identical */
+bool same_string(const char *s, const char *t)
+{
+	return (strcmp(s, t) == 0);
+}
+
+/* copy_string ---------------------- save string s somewhere; return address */
+char *copy_string(const char *s)
+{
+	char *p = ckalloc(strlen(s)+1);    /* +1 to hold '\0' */
+	return strcpy(p, s);
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/lib.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/lib.h	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,33 @@
+// lib.h -- header file for some useful procedures
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>   /* INT_MAX, INT_MIN, LONG_MAX, LONG_MIN, etc. */
+#include <stdarg.h>
+
+typedef unsigned char uchar;
+typedef int bool;
+
+extern char *argv0;
+
+void print_argv0(void);
+#ifdef __GNUC__     /* avoid some "foo might be used uninitialized" warnings */
+	void fatal(const char *msg) __attribute__ ((noreturn));
+	void fatalf(const char *fmt, ...) __attribute__ ((noreturn));
+	void fatalfr(const char *fmt, ...) __attribute__ ((noreturn));
+#else
+	void fatal(const char *msg);
+	void fatalf(const char *fmt, ...);
+	void fatalfr(const char *fmt, ...);
+#endif
+FILE *ckopen(const char *name, const char *mode);
+void *ckalloc(size_t amount);
+bool same_string(const char *s, const char *t);
+char *copy_string(const char *s);
+
+#undef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#undef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/pop.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/pop.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,76 @@
+/* pop -- add four columns (allele counts, genotype, maximum quality) for a
+*  specified population to a Galaxy SNP table, or enforce bounds
+*
+*  argv[1] = file containing a Galaxy table
+*  argv[2] = lower bound on total coverage (-1 = no lower bound)
+*  argv[3] = upper bound on total coverae (-1 if no bound)
+*  argv[4] = lower bound on individual coverage (-1 = no bound)
+*  argv[5] = lower bound on individual quality value (-1 = no bound)
+*  argv[6] ... are the starting columns (base-1) for the chosen individuals
+
+What it does on Galaxy
+The user specifies that some of the individuals in the selected SNP table are form a "population" that has been previously defined using the Galaxy tool to select individuals from a SNP table. One option is for the program to append four columns to the table, giving the total counts for the two alleles, the "genotype" for the population and the maximum quality value, taken over all indivuals in the population. If all defined genotypes in the population are 2 (agree with the reference), the population's genotype is 2; similarly for 0; otherwise the genoype is 1 (unless all individuals have undefined genotype, in which case it is -1.  The other option is to remove rows from the table for which the total coverage for the population is either too low or too high, and/or if the individual coverage or quality value is too low.
+*/
+
+#include "lib.h"
+
+// most characters allowed in a row of the table
+#define MOST 50000
+
+// column for the relevant individuals/groups
+int col[MOST];
+int nI;
+
+int main(int argc, char **argv) {
+	FILE *fp;
+	char *p, *z = "\t\n", buf[MOST], trash[MOST];
+	int X[MOST], m, i, A, B, G, Q, lo, hi, indiv, qual, g, q;
+
+	if (argc < 3)
+		fatalf("args: SNP-table low high col1 col2 ...");
+
+	lo = atoi(argv[2]);
+	hi = atoi(argv[3]);
+	indiv = atoi(argv[4]);
+	qual = atoi(argv[5]);
+	for (i = 6, nI = 0; i < argc; ++i, ++nI)
+		col[nI] = atoi(argv[i]);
+
+	fp = ckopen(argv[1], "r");
+	while (fgets(buf, MOST, fp)) {
+		if (buf[0] == '#')
+			continue;
+		strcpy(trash, buf);
+		// set X[i] = atoi(i-th word of s), i is base 0
+		for (i = 1, p = strtok(trash, z); p != NULL;
+		  ++i, p = strtok(NULL, z))
+			X[i] = atoi(p);
+		for (i = A = B = Q = 0, G = -1; i < nI; ++i) {
+			m = col[i];
+			if (X[m]+X[m+1] < indiv || (q = X[m+3]) < qual)
+				break;
+			A += X[m];
+			B += X[m+1];
+			g = X[m+2];
+			if (g != -1) {
+				if (G == -1)	// first time
+					G = g;
+				else if (G != g)
+					G = 1;
+			}
+			Q = MAX(Q, q);
+		}
+		if (i < nI)	// check bounds on the population's individuals
+			continue;
+		if (lo == -1 && hi == -1 && indiv == -1 && qual == -1) {
+			// add columns
+			if ((p = strchr(buf, '\n')) != NULL)
+				*p = '\0';
+			printf("%s\t%d\t%d\t%d\t%d\n", buf, A, B, G, Q);
+		} else if (A+B >= lo && (hi == -1 || A+B <= hi))
+			// coverage meets the population-level restrictions
+			printf("%s", buf);
+	}
+
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff genome_diversity/src/sweep.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/genome_diversity/src/sweep.c	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,279 @@
+/* sweep -- find regions of the genome with high scores (e.g., Fst scores).
+*
+*  argv[1] -- file containing a Galaxy table
+*  argv[2] -- column number (base-1) for the chromosome name
+*  argv[3] -- column number for the (base-0) chromosomal position
+*  argv[4] -- column number for a score for the position
+*  argv[5] -- a percentage, such as "95", or a raw score, such as "=0.9".
+*  argv[6] -- the number of randomizations (shuffles) of the scores
+*  argv[7] -- [optional] if present and non-zero, report SNPs
+*
+*  The program first determines a threshold such that the stated percentage
+*  of the scores are below that threshold (or uses the provided number if
+*  argv[5] starts with "=").  The program subtracts the threshold
+*  from each score, then looks for maximal-scoring runs of SNPs, i.e., where
+*  adding or subtracting SNPs from an end of then run always decreases the
+*  total score. These regions are printed in order of descreasing total score.
+*  To determine a cutoff for the printed regions, the programs takes the maximum
+*  score over all regions observed in a specified number of shuffles of the
+*  list of scores. If argv[6] = 0, then all maximal-scoring runs of at least
+*  4 table entries are printed.
+
+What it does on Galaxy
+The user selects a SNP table and specifies the columns containing (1) chromosome, (2) position, (3) scores (such as an Fst-value for the SNP), (4) a percentage or raw score for the "cutoff" and (5) the number of times the data should be radomized (only intervals with score exceeding the maximum for the randomized data are reported). If a percentage (e.g. 95%) is specified for #3, then that percentile of the scores is used as the cutoff; this may not work well if many SNPs have the same score. The program subtracts the cutoff from every score, then finds genomic intervals (i.e., consecutive runs of SNPs) whose total score cannot be increased by adding or subtracting one or more SNPs at the ends of the interval.
+*/
+
+#include "lib.h"
+#include "Huang.h"
+
+// maximum number of rows in any processed table
+#define MANY 20000000
+#define BUF_SIZE 5000
+#define MAX_WINDOW 1000000
+
+double X[MANY];	// holds all scores
+int nX;
+
+// position-score pairs for a single chromosome
+struct score {
+	int pos;
+	double x; // original score, then shifted score
+} S[MANY];
+int nS;
+
+struct snp {
+	int pos;
+	double x;
+	struct snp *next;
+};
+
+// structure to hold the maximum-scoring chromosomal intervals
+struct sweep {
+	float score;
+	char *chr;
+	int b, e;
+	struct snp *snps;
+} W[MAX_WINDOW];
+int nW;
+
+// return the linked list of SNPs in positions b to e
+struct snp *add_snps(int b, int e) {
+	struct snp *first = NULL, *last = NULL, *new;
+	int i;
+	for (i = b; i <= e; ++i)
+		if (S[i].pos >= 0) {
+			new = ckalloc(sizeof(*new));
+			new->pos = S[i].pos;
+			new->x = S[i].x;
+			new->next = NULL;
+			if (first == NULL)
+				first = new;
+			else
+				last->next = new;
+			last = new;
+		}
+	return first;
+}
+
+// given a table row, return a pointer to the item in a particular column
+char *get_col(char *buf, int col) {
+	static char temp[BUF_SIZE], *p;
+	int i;
+	char *z = " \t\n";
+
+	strcpy(temp, buf);
+	for (p = strtok(temp, z), i = 1; *p && i < col;
+	     p = strtok(NULL, z), ++i)
+		;
+	if (p == NULL)
+		fatalf("no column %d in %s", col, buf);
+	return p;
+}
+
+// fill S[] with position-score pairs for the next chromosome
+// return 0 for EOF
+int get_chr(FILE *fp, int chr_col, int pos_col, int score_col, char *chr) {
+	static char buf[BUF_SIZE];
+	static int init = 1;
+	char *status;
+
+	if (init) {
+		while ((status = fgets(buf, BUF_SIZE, fp)) != NULL &&
+		  buf[0] == '#')
+			;
+		if (status == NULL)
+			fatal("empty table");
+		init = 0;
+	}
+	if (buf[0] == '\0')
+		return 0;
+	
+	if (buf[0] == '#')
+		fatal("cannot happen");
+	strcpy(chr, get_col(buf, chr_col));
+	S[0].pos = atoi(get_col(buf, pos_col));
+	S[0].x = atof(get_col(buf, score_col));
+	for (nS = 1; ; ++nS) {
+		if (!fgets(buf, BUF_SIZE, fp)) {
+			buf[0] = '\0';
+			return 1;
+		}
+		if (!same_string(chr, get_col(buf, chr_col)))
+			break;
+		S[nS].pos = atoi(get_col(buf, pos_col));
+		S[nS].x = atof(get_col(buf, score_col));
+	}
+	return 1;
+}
+
+// for sorting genomic intervals by *decreasing* score
+int Wcompar(struct sweep *a, struct sweep *b) {
+	float y = a->score, z = b->score;
+
+	if (y > z)
+		return -1;
+	if (y < z)
+		return 1;
+	return 0;
+}
+
+// for sorting an array of scores into increasing order
+int fcompar(double *a, double *b) {
+	if (*a < *b)
+		return -1;
+	if (*a > *b)
+		return 1;
+	return 0;
+}
+
+/* shuffle the values S[0], S[1], ... , S[nscores-1];
+*  Uses Algorithm P in page 125 of "The Art of Computer Programming (Vol II)
+*  Seminumerical Programming", by Donald Knuth, Addison-Wesley, 1971.
+*/
+void shuffle_scores() {
+	int i, j;
+	double temp;
+
+	for (i = nX-1; i > 0; --i) {
+		// swap what's in location i with location j, where 0 <= j <= i
+		j = random() % (i+1);
+		temp = X[i];
+		X[i] = X[j];
+		X[j] = temp;
+	}
+}
+
+// return the best interval score (R[i] is the struct operated by Huang())
+double best() {
+	int i;
+	double bestScore;
+
+	Huang(X, nX);
+
+	for (bestScore = 0.0, i = 1; i <= top; ++i) 
+		bestScore = MAX(R[i].Score, bestScore);
+	return bestScore;
+}
+
+int main(int argc, char **argv) {
+	FILE *fp;
+	char buf[BUF_SIZE], chr[100], *a;
+	double shift = 0.0, cutoff;
+	int i, b, e, chr_col, pos_col, score_col, nshuffle, snps = 0;
+	struct snp *s;
+
+	if (argc != 7 && argc != 8)
+		fatal("args: table chr_col pos_col score_col threhold randomizations [SNPs]");
+
+	// process command-line arguments
+	chr_col = atoi(argv[2]);
+	pos_col = atoi(argv[3]);
+	score_col = atoi(argv[4]);
+	a = argv[5];
+	fp = ckopen(argv[1], "r");
+	if (argc == 8)
+		snps = atoi(argv[7]);
+	if (isdigit(a[0])) {
+		for (nX = 0; nX < MANY && fgets(buf, BUF_SIZE, fp); ) {
+			if (buf[0] == '#') 
+				continue;
+			X[nX++] = atof(get_col(buf, score_col));
+		}
+		if (nX == MANY)
+			fatal("Too many rows");
+		qsort((void *)X, (size_t)nX, sizeof(double),
+		  (const void *)fcompar);
+		shift = X[atoi(a)*nX/100];
+		rewind(fp);
+	} else if (a[0] == '=')
+		shift = atof(a+1);
+
+//fprintf(stderr, "shift = %4.3f\n", shift);
+	nshuffle = atoi(argv[6]);
+	if (nshuffle == 0)
+		cutoff = 0;
+	else {
+		for (nX = 0; nX < MANY && fgets(buf, BUF_SIZE, fp); ) { 
+			if (buf[0] == '#')
+				continue;
+			X[nX++] = atof(get_col(buf, score_col)) - shift;
+		}
+		if (nX == MANY)
+			fatal("Too many rows");
+		for (cutoff = 0.0, i = 0; i < nshuffle; ++i) {
+			shuffle_scores();
+			cutoff = MAX(cutoff, best());
+		}
+		rewind(fp);
+	}
+//fprintf(stderr, "cutoff = %4.3f\n", cutoff);
+
+	// loop over chromosomes;
+	// start by getting the chromosome's scores
+	while (get_chr(fp, chr_col, pos_col, score_col, chr)) {
+		// subtract shift from the scores
+		for (i = 0; i < nS; ++i)
+			X[i] = S[i].x - shift;
+
+		// find the maximum=scoring regions
+		Huang(X, nS);
+	
+		// save any regions with >= 4 points and score >= cutoff
+		for (i = 0; i <= top; ++i) {
+			if (nW >= MAX_WINDOW)
+				fatalf("too many windows");
+
+			// get indices of the first and last SNP in the interval
+			b = R[i].Lpos + 1;
+			e = R[i].Rpos;
+
+			// remove unmapped SNP position from intervals' ends
+			while (b < e && S[b].pos == -1)
+				++b;
+			while (e > b && S[e].pos == -1)
+				--e;
+
+			// record intervals
+			if (e - b < 3 || R[i].Score < cutoff)
+				continue;
+			W[nW].score = R[i].Score;
+			W[nW].chr = copy_string(chr);
+			W[nW].b = S[b].pos;
+			W[nW].e = S[e].pos+1;	// Ws are half-open
+			if (snps)
+				W[nW].snps = add_snps(b, e);
+			++nW;
+		}
+	}
+
+	// sort by decreasing score
+	qsort((void *)W, (size_t)nW, sizeof(W[0]), (const void *)Wcompar);
+
+	for (i = 0; i < nW; ++i) {
+		printf("%s\t%d\t%d\t%4.4f\n", 
+			W[i].chr, W[i].b, W[i].e, W[i].score);
+		for (s = W[i].snps; s; s = s->next)
+			printf(" %d %3.2f\n", s->pos, s->x);
+	}
+	return 0;
+}
diff -r fdb4240fb565 -r 8ae67e9fb6ff lib/galaxy/datatypes/wsf.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/wsf.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,185 @@
+"""
+SnpFile datatype
+"""
+
+import galaxy.datatypes.data
+import tempfile
+import os
+import simplejson
+from galaxy import util
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.images import Html
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+
+class Wped( Html ):
+    allow_datatype_change = False
+    composite_type = 'basic'
+    file_ext = 'gd_ped'
+
+    MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='WpedData', readonly=True, set_in_upload=True )
+
+    def __init__( self, **kwd ):
+        Html.__init__( self, **kwd )
+        self.add_composite_file( '%s.ped', description = 'Pedigree File', substitute_name_with_metadata = 'base_name', is_binary = False )
+        self.add_composite_file( '%s.map', description = 'Map File', substitute_name_with_metadata = 'base_name', is_binary = False )
+
+class Individuals( Tabular ):
+    file_ext = 'gd_indivs'
+    def __init__(self, **kwd):
+        Tabular.__init__( self, **kwd )
+        self.column_names = [ 'Column', 'Name', 'Alias' ]
+
+    def display_peek( self, dataset ):
+        return Tabular.make_html_table( self, dataset, column_names=self.column_names )
+
+class DatasetComments( object ):
+    def __init__( self, dataset, comment_string='#' ):
+        self.dataset = dataset
+        self.comment_string = comment_string
+        self.comment_string_len = len(comment_string)
+        self._comments = []
+        self._read_comments()
+
+    def _read_comments( self ):
+        if self.dataset.has_data():
+            try:
+                for line in open(self.dataset.file_name, 'rU'):
+                    if line.startswith(self.comment_string):
+                        comment = line[self.comment_string_len:]
+                        self._comments.append(comment)
+                    else:
+                        break
+            except:
+                pass
+
+    def __str__( self ):
+        return "".join(self._comments)
+
+    @property
+    def comments( self ):
+        return self._comments
+
+class DatasetCommentMetadata( object ):
+    def __init__( self, dataset, comment_string='#' ):
+        self.dataset_comments = DatasetComments( dataset, comment_string )
+        self._comment_metadata = {}
+        self._decode_dataset_comments()
+
+    def _decode_dataset_comments( self ):
+        dataset_comment_string = str( self.dataset_comments )
+        try:
+            self._comment_metadata = simplejson.loads( dataset_comment_string )
+        except simplejson.JSONDecodeError as e:
+            pass
+
+    @property
+    def comment_metadata( self ):
+        return self._comment_metadata
+
+class AnnotatedTabular( Tabular ):
+    """ Tabular file with optional comment block containing JSON to be imported into metadata """
+    MetadataElement( name="comment_metadata", desc="comment metadata", param=metadata.DictParameter, visible=False, readonly=True )
+
+    def set_meta( self, dataset, overwrite = True, **kwd ):
+        Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd )
+        if dataset.metadata.comment_metadata is None:
+            dataset_comment_metadata = DatasetCommentMetadata( dataset )
+            dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy()
+            self.set_dataset_metadata_from_comments( dataset )
+
+    def set_dataset_metadata_from_comments( self, dataset ):
+        pass
+
+    def set_peek( self, dataset, line_count=None, is_multi_byte=False ):
+        super(Tabular, self).set_peek( dataset, line_count=line_count, is_multi_byte=is_multi_byte, WIDTH='unlimited', skipchars=['#'] )
+
+    def display_peek( self, dataset ):
+        """Returns formated html of peek"""
+        return Tabular.make_html_table( self, dataset, skipchars=['#'] )
+
+class Fake( AnnotatedTabular ):
+    MetadataElement( name="scaffold", desc="scaffold column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="pos", desc="pos column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="ref", desc="ref column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="rPos", desc="rPos column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="species", desc="species", default='', no_value='', visible=False, readonly=True )
+
+    def set_dataset_metadata_from_comments( self, dataset ):
+        self.set_dataset_column_names_metadata( dataset )
+        self.set_dataset_columnParameter_metadata( dataset )
+        self.set_dataset_species_metadata( dataset )
+        self.set_dataset_dbkey_metadata( dataset )
+
+    def set_dataset_column_names_metadata( self, dataset ):
+        value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'column_names', None )
+        if isinstance( value_from_comment_metadata, list ):
+            dataset.metadata.column_names = value_from_comment_metadata[:]
+
+    def set_dataset_columnParameter_metadata( self, dataset ):
+        for name, spec in dataset.metadata.spec.items():
+            if isinstance( spec.param, metadata.ColumnParameter ):
+                value_from_comment_metadata = dataset.metadata.comment_metadata.get( name, None )
+                if value_from_comment_metadata is not None:
+                    try:
+                        i = int( value_from_comment_metadata )
+                    except:
+                        i = 0
+                    if 0 <= i <= dataset.metadata.columns:
+                        setattr( dataset.metadata, name, i )
+
+    def set_dataset_species_metadata( self, dataset ):
+        value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'species', None )
+        if isinstance( value_from_comment_metadata, basestring ):
+            dataset.metadata.species = value_from_comment_metadata
+
+    def set_dataset_dbkey_metadata( self, dataset ):
+        value_from_comment_metadata = dataset.metadata.comment_metadata.get( 'dbkey', '?' )
+        if isinstance( value_from_comment_metadata, basestring ):
+            dataset.metadata.dbkey = value_from_comment_metadata
+
+class GDSnp( Fake ):
+    """ Webb's SNP file format """
+    file_ext = 'gd_snp'
+
+    MetadataElement( name="individual_names", desc="individual names", visible=False, readonly=True )
+    MetadataElement( name="individual_columns", desc="individual columns", visible=False, readonly=True )
+
+    def set_dataset_metadata_from_comments( self, dataset ):
+        Fake.set_dataset_metadata_from_comments( self, dataset )
+        self.set_dataset_individual_metadata( dataset )
+
+    def set_dataset_individual_metadata( self, dataset ):
+        individual_list = dataset.metadata.comment_metadata.get( 'individuals', None )
+        if not isinstance( individual_list, list ):
+            individual_list = []
+
+        individual_names = []
+        individual_columns = []
+
+        for individual in individual_list:
+            if not isinstance( individual, list ) or len( individual ) != 2:
+                continue
+            name, col = individual
+            if not isinstance( name, basestring ):
+                name = ''
+            try:
+                c = int( col )
+            except:
+                c = 0
+            if 0 < c <= dataset.metadata.columns:
+                individual_names.append( name )
+                individual_columns.append( c )
+
+        if individual_names:
+            dataset.metadata.individual_names = individual_names[:]
+            dataset.metadata.individual_columns = individual_columns[:]
+
+class GDSap( Fake ):
+    """ Webb's SAP file format """
+    file_ext = 'gd_sap'
+
+    MetadataElement( name="kegg_gene", desc="KEGG gene code column", param=metadata.ColumnParameter, default=0 )
+    MetadataElement( name="kegg_path", desc="KEGG pathway code/name column", param=metadata.ColumnParameter, default=0 )
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff map_ensembl_transcripts.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/map_ensembl_transcripts.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,42 @@
+<tool id="gd_new_oscar" name="Get Pathways" version="1.0.0">
+  <description>: Look up KEGG pathways for given Ensembl transcripts</description>
+
+  <command interpreter="python">
+    rtrnKEGGpthwfENSEMBLTc.py
+      "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.oscar.loc"
+      "--species=${input.metadata.dbkey}"
+      "--input=${input}"
+      "--posENSEMBLclmn=${ensembl_col}"
+      "--output=${output}"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="tabular" label="Table" />
+    <param name="ensembl_col" type="data_column" data_ref="input" label="Column with ENSEMBL transcript code" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="test_in/ensembl.tabular" ftype="tabular">
+        <metadata name="dbkey" value="canFam2" />
+      </param>
+      <param name="ensembl_col" value="1" />
+
+      <output name="output" file="test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular" />
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**What it does**
+
+Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff mkpthwpng.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mkpthwpng.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       mkpthwpng.py
+#       
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#       
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#       
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#       
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,mechanize,os,sys
+
+#this return an image made up from a list of genes and pathway code
+def rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,pthwcod,ouPthwpng):
+	inpx='\n'.join(tmpddGenrcgenPresent)#inpx="ALDH2 color \nALDH3A1	color"
+	request=mechanize.Request("http://www.genome.jp/kegg/tool/map_pathway2.html")
+	response = mechanize.urlopen(request)
+	forms = mechanize.ParseResponse(response, backwards_compat=False)
+	form=forms[0]
+	form["unclassified"]=inpx
+	form["org_name"]=[sppPrefx]
+	request2 = form.click()
+	response2 = mechanize.urlopen(request2)
+	a=str(response2.read()).split('href="/kegg-bin/show_pathway?')[1]
+	code=a.split('/')[0]#response2.read()
+	request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%(code,pthwcod))#request=mechanize.Request("http://www.genome.jp/kegg-bin/show_pathway?%s/%s.args"%('13171478854246','hsa00410'))
+	response = mechanize.urlopen(request)
+	forms = mechanize.ParseResponse(response, backwards_compat=False)
+	form=forms[1]
+	status=' NOT '
+	try:
+		imgf=str(forms[1]).split('/mark_pathway')[1].split('/')[0]
+		os.system("wget --quiet http://www.genome.jp/tmp/mark_pathway%s/%s.png -O %s"%(imgf,pthwcod,ouPthwpng))
+		status=' '
+	except:
+		pass
+	return 'A pathway image was%ssuccefully produced...'%status
+
+
+def main():
+	parser = argparse.ArgumentParser(description='Obtain KEGG images from a list of genes.')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	parser.add_argument('--output',metavar='output PNG image',type=str,help='the output image file in png format')
+	parser.add_argument('--KEGGpath',metavar='KEGG pathway code (i.e. cfa00230)',type=str,help='the code of the pathway of interest')
+	parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name')
+	parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code')
+	#~Open arguments 
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,outputf,KEGGpathw,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.output,fulargs.KEGGpath,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn
+    # make posKEGGclmn, Kgeneposcolmn 0-based
+	sppPrefx= KEGGpathw[:3]
+	posKEGGclmn -= 1
+	Kgeneposcolmn -= 1
+	#make a dictionary of valid genes
+	dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()])
+	for mt1gene in [x for x in dKEGGcPthws.keys() if x.find('.')>-1]:#to crrect names with more than one gene
+		pthwsAssotd=dKEGGcPthws.pop(mt1gene)
+		for eachg in mt1gene.split('.'):
+			dKEGGcPthws[eachg]=pthwsAssotd
+	tmpddGenrcgenPresent=set()
+	sKEGGc=dKEGGcPthws.keys()
+	lsKEGGc=len(sKEGGc)
+	ctPthw=0
+	while ctPthw < lsKEGGc:#to save memory
+		eachK=sKEGGc.pop()
+		alPthws=dKEGGcPthws[eachK]
+		if KEGGpathw in alPthws:
+			tmpddGenrcgenPresent.add('\t'.join([eachK,'red']))
+		ctPthw+=1
+	#run the program
+	rtnHTMLformat(tmpddGenrcgenPresent,sppPrefx,KEGGpathw,outputf)
+	return 0
+	
+
+if __name__ == '__main__':
+	main()
diff -r fdb4240fb565 -r 8ae67e9fb6ff modify_snp_table.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/modify_snp_table.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+import sys
+import subprocess
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) < 9:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, p1_input, output, lo, hi, lo_ind, lo_ind_qual = sys.argv[1:8]
+individual_metadata = sys.argv[8:]
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+p1 = Population()
+p1.from_population_file(p1_input)
+
+if not p_total.is_superset(p1):
+    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+    sys.exit(1)
+
+################################################################################
+
+prog = 'pop'
+
+args = []
+args.append(prog)
+args.append(input)
+args.append(lo)
+args.append(hi)
+args.append(lo_ind)
+args.append(lo_ind_qual)
+
+columns = p1.column_list()
+
+for column in sorted(columns):
+    args.append(column)
+
+fh = open(output, 'w')
+
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
+rc = p.wait()
+fh.close()
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff pathway_image.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pathway_image.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,48 @@
+<tool id="gd_pathway_image" name="Pathway Image" version="1.0.0">
+  <description>: Draw a KEGG pathway, highlighting specified gene modules</description>
+
+  <command interpreter="python">
+    mkpthwpng.py
+      "--input=${input}"
+      "--output=${output}"
+      "--KEGGpath=${pathway}"
+      "--posKEGGclmn=${input.metadata.kegg_path}"
+      "--KEGGgeneposcolmn=${input.metadata.kegg_gene}"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_sap" label="Table">
+      <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata.  Click the pencil icon in the history item to edit/save the metadata attributes" />
+    </param>
+    <param name="pathway" type="select">
+      <options from_file="gd.pathways.txt">
+        <column name="value" index="1"/>
+        <column name="name" index="2"/>
+        <filter type="data_meta" ref="input" key="dbkey" column="0" separator="\t" />
+      </options>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="png" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" />
+      <param name="pathway" value="cfa05214" />
+      <output name="output" file="test_out/pathway_image/pathway_image.png" compare="sim_size" delta = "10000" />
+    </test>
+  </tests>
+
+  <help>
+
+**What it does**
+
+This tool produces an image of an input KEGG pathway, highlighting the
+modules representing genes in an input list.  NOTE:  a given gene can
+be assigned to multiple modules, and different genes can be assigned to
+the same module.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff pca.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pca.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,258 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import shutil
+import subprocess
+import sys
+from BeautifulSoup import BeautifulSoup
+import gd_composite
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+################################################################################
+
+def run_program(prog, args, stdout_file=None):
+    #print "args: ", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if stdout_file is not None:
+        with open(stdout_file, 'w') as ofh:
+            print >> ofh, stdoutdata
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+def do_ped2geno(input, output):
+    lines = []
+    with open(input) as fh:
+        for line in fh:
+            line = line.rstrip('\r\n')
+            lines.append(line.split())
+
+    pair_map = {
+        '0':{ '0':'9', '1':'9', '2':'9' },
+        '1':{ '0':'1', '1':'2', '2':'1' },
+        '2':{ '0':'1', '1':'1', '2':'0' }
+    }
+    with open(output, 'w') as ofh:
+        for a_idx in xrange(6, len(lines[0]), 2):
+            b_idx = a_idx + 1
+            print >> ofh, ''.join(map(lambda line: pair_map[line[a_idx]][line[b_idx]], lines))
+
+def do_map2snp(input, output):
+    with open(output, 'w') as ofh:
+        with open(input) as fh:
+            for line in fh:
+                elems = line.split()
+                print >> ofh, '  {0} 11 0.002 2000 A T'.format(elems[1])
+
+def make_ind_file(ind_file, input):
+    pops = []
+
+    ofh = open(ind_file, 'w')
+
+    with open(input) as fh:
+        soup = BeautifulSoup(fh)
+        misc = soup.find('div', {'id': 'gd_misc'})
+        populations = misc('ul')[0]
+
+        i = 0
+        for entry in populations:
+            if i % 2 == 1:
+                population_name = entry.contents[0].encode('utf8').strip().replace(' ', '_')
+                pops.append(population_name)
+                individuals = entry.ol('li')
+                for individual in individuals:
+                    individual_name = individual.string.encode('utf8').strip()
+                    print >> ofh, individual_name, 'M', population_name
+            i += 1
+
+    ofh.close()
+    return pops
+
+def make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file):
+    with open(par_file, 'w') as fh:
+        print >> fh, 'genotypename: {0}'.format(geno_file)
+        print >> fh, 'snpname: {0}'.format(snp_file)
+        print >> fh, 'indivname: {0}'.format(ind_file)
+        print >> fh, 'evecoutname: {0}'.format(evec_file)
+        print >> fh, 'evaloutname: {0}'.format(eval_file)
+        print >> fh, 'altnormstyle: NO'
+        print >> fh, 'numoutevec: 2'
+
+def do_smartpca(par_file):
+    prog = 'smartpca'
+
+    args = [ prog ]
+    args.append('-p')
+    args.append(par_file)
+
+    #print "args: ", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+    stats = []
+
+    save_line = False
+    for line in stdoutdata.split('\n'):
+        if line.startswith(('## Average divergence', '## Anova statistics', '## Statistical significance')):
+            stats.append('')
+            save_line = True
+        if line.strip() == '':
+            save_line = False
+        if save_line:
+            stats.append(line)
+
+    return '\n'.join(stats[1:])
+
+def do_ploteig(evec_file, population_names):
+    prog = 'gd_ploteig'
+
+    args = [ prog ]
+    args.append('-i')
+    args.append(evec_file)
+    args.append('-c')
+    args.append('1:2')
+    args.append('-p')
+    args.append(':'.join(population_names))
+    args.append('-x')
+
+    run_program(None, args)
+
+def do_eval2pct(eval_file, explained_file):
+    prog = 'eval2pct'
+
+    args = [ prog ]
+    args.append(eval_file)
+
+    with open(explained_file, 'w') as ofh:
+        #print "args:", ' '.join(args)
+        p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
+        (stdoutdata, stderrdata) = p.communicate()
+        rc = p.returncode
+
+        if rc != 0:
+            print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+            print >> sys.stderr, stderrdata
+            sys.exit(1)
+
+def do_coords2admix(coords_file):
+    prog = 'coords2admix'
+
+    args = [ prog ]
+    args.append(coords_file)
+
+    with open('fake', 'w') as ofh:
+        #print "args:", ' '.join(args)
+        p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
+        (stdoutdata, stderrdata) = p.communicate()
+        rc = p.returncode
+
+        if rc != 0:
+            print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+            print >> sys.stderr, stderrdata
+            sys.exit(1)
+
+    shutil.copy2('fake', coords_file)
+
+################################################################################
+
+if len(sys.argv) != 5:
+    print "usage"
+    sys.exit(1)
+
+input, input_files_path, output, output_files_path = sys.argv[1:5]
+
+mkdir_p(output_files_path)
+
+ped_file = os.path.join(input_files_path, 'admix.ped')
+geno_file = os.path.join(output_files_path, 'admix.geno')
+do_ped2geno(ped_file, geno_file)
+
+map_file = os.path.join(input_files_path, 'admix.map')
+snp_file = os.path.join(output_files_path, 'admix.snp')
+do_map2snp(map_file, snp_file)
+
+ind_file = os.path.join(output_files_path, 'admix.ind')
+population_names = make_ind_file(ind_file, input)
+
+par_file = os.path.join(output_files_path, 'par.admix')
+evec_file = os.path.join(output_files_path, 'coordinates.txt')
+eval_file = os.path.join(output_files_path, 'admix.eval')
+make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file)
+
+smartpca_stats = do_smartpca(par_file)
+
+do_ploteig(evec_file, population_names)
+plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names))
+output_plot_file = os.path.join(output_files_path, 'PCA.pdf')
+shutil.copy2(plot_file, output_plot_file)
+os.unlink(plot_file)
+
+do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt'))
+os.unlink(eval_file)
+
+do_coords2admix(evec_file)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('PCA Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='PCA.pdf', value='PCA.pdf', display_type=display_file)
+out_evec = gd_composite.Parameter(name='coordinates.txt', value='coordinates.txt', display_type=display_file)
+out_explained = gd_composite.Parameter(name='explained.txt', value='explained.txt', display_type=display_file)
+
+evec_prefix = 'coordinates.txt.1:2.{0}'.format(':'.join(population_names))
+ps_file = '{0}.ps'.format(evec_prefix)
+xtxt_file = '{0}.xtxt'.format(evec_prefix)
+
+os.unlink(os.path.join(output_files_path, ps_file))
+os.unlink(os.path.join(output_files_path, xtxt_file))
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_evec)
+info_page.add_output_parameter(out_explained)
+
+in_admix = gd_composite.Parameter(name='par.admix', value='par.admix', display_type=display_file)
+in_geno = gd_composite.Parameter(name='admix.geno', value='admix.geno', display_type=display_file)
+in_snp = gd_composite.Parameter(name='admix.snp', value='admix.snp', display_type=display_file)
+in_ind = gd_composite.Parameter(name='admix.ind', value='admix.ind', display_type=display_file)
+
+info_page.add_input_parameter(in_admix)
+info_page.add_input_parameter(in_geno)
+info_page.add_input_parameter(in_snp)
+info_page.add_input_parameter(in_ind)
+
+misc_stats = gd_composite.Parameter(description='Stats<p/><pre>\n{0}\n</pre>'.format(smartpca_stats), display_type=display_value)
+
+info_page.add_misc(misc_stats)
+
+with open (output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff pca.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pca.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,116 @@
+<tool id="gd_pca" name="PCA" version="1.0.0">
+  <description>: Principal Component Analysis of genotype data</description>
+
+  <command interpreter="python">
+    pca.py "$input" "$input.extra_files_path" "$output" "$output.files_path"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_ped" label="Dataset" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="fake" ftype="gd_ped" >
+        <metadata name="base_name" value="admix" />
+        <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" />
+        <composite_data value="test_out/prepare_population_structure/admix.ped" />
+        <composite_data value="test_out/prepare_population_structure/admix.map" />
+        <edit_attributes type="name" value="fake" />
+      </param>
+
+      <output name="output" file="test_out/pca/pca.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="admix.geno" value="test_out/pca/admix.geno" />
+        <extra_files type="file" name="admix.gd_indivs" value="test_out/pca/admix.gd_indivs" />
+        <extra_files type="file" name="admix.gd_snp" value="test_out/pca/admix.gd_snp" />
+        <extra_files type="file" name="coordinates.txt" value="test_out/pca/coordinates.txt" />
+        <extra_files type="file" name="explained.txt" value="test_out/pca/explained.txt" />
+        <extra_files type="file" name="par.admix" value="test_out/pca/par.admix" compare="diff" lines_diff="10" />
+        <extra_files type="file" name="PCA.pdf" value="test_out/pca/PCA.pdf" compare="sim_size" delta = "1000" />
+      </output>
+      
+    </test>
+  </tests>
+  -->
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_ped_ format.
+The output dataset is html_ with links to a pdf for a graphical output and
+text files.  (`Dataset missing?`_)
+
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _html: ./static/formalHelp.html#html
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a gd_ped dataset generated by the Prepare Input tool.
+The PCA tool runs a
+Principal Component Analysis on the input genotype data and constructs
+a plot of the top two principal components. It also reports the
+following estimates of the statistical significance of the analysis.
+
+1. Average divergence between each pair of populations.  Specifically,
+from the covariance matrix X whose eigenvectors were computed, we can
+compute a "distance", d, for each pair of individuals (i,j): d(i,j) =
+X(i,i) + X(j,j) - 2X(i,j).  For each pair of populations (a,b) now
+define an average distance: D(a,b) = \sum d(i,j) (in pop a, in pop b)
+/ (\|pop a\| * \|pop b\|).  We then normalize D so that the diagonal
+has mean 1 and report it.
+
+2. Anova statistics for population differences along each
+eigenvector. For each eigenvector, a P-value for statistical
+significance of differences between each pair of populations along
+that eigenvector is printed.  +++ is used to highlight P-values less
+than 1e-06.  \*\*\* is used to highlight P-values between 1e-06 and
+1e-03.  If there are more than 2 populations, then an overall P-value
+is also printed for that eigenvector, as are the populations with
+minimum (minv) and maximum (maxv) eigenvector coordinate. [If there is
+only 1 population, no Anova statistics are printed.]
+
+3. Statistical significance of differences between populations. For
+each pair of populations, the above Anova statistics are summed across
+eigenvectors. The result is approximately chisq with d.o.f. equal to
+the number of eigenvectors. The chisq statistic and its p-value are
+printed. [If there is only 1 population, no statistics are printed.]
+
+We post-process the output of the PCA tool to estimate "admixture
+fractions".  For this, we take three populations at a time and
+determine each one's average point in the PCA plot (by separately
+averaging first and second coordinates).  For each combination of two
+center points, modeling two ancestral populations, we try to model the
+third central point as having a certain fraction, r, of its SNP
+genotypes from the second ancestral population and the remainder from
+the first ancestral population, where we estimate r.  The output file
+"coordinates.txt" then contains pairs of lines like
+
+projection along chord Population1 -> Population2
+  Population3: 0.12345
+
+where the number (in this case 0.1245) is the estimation of r.
+Computations with simulated data suggests that the true r is
+systematically underestimated, perhaps giving roughly 0.6 times r.
+
+-----
+
+**Acknowledgments**
+
+We use the programs "smartpca" and "ploteig" downloaded from
+
+http://genepath.med.harvard.edu/~reich/Software.htm
+
+and described in the paper "Population structure and eigenanalysis"
+by Nick Patterson, Alkes L. Price, and David Reich, PLoS Genetics, 2 (2006), e190.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff phylogenetic_tree.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phylogenetic_tree.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+
+import os
+import errno
+import sys
+import subprocess
+import shutil
+from Population import Population
+import gd_composite
+
+################################################################################
+
+def mkdir_p(path):
+  try:
+    os.makedirs(path)
+  except OSError, e:
+    if e.errno <> errno.EEXIST:
+      raise
+
+################################################################################
+
+if len(sys.argv) < 11:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10]
+
+individual_metadata = sys.argv[10:]
+
+# note: TEST THIS
+if dbkey in ['', '?', 'None']:
+    dbkey = 'none'
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+
+################################################################################
+
+mkdir_p(extra_files_path)
+
+################################################################################
+
+def run_program(prog, args, ofh):
+    #print "args: ", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+    ofh.close()
+
+    if rc != 0:
+        #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip')
+newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick')
+ps_outfile = 'tree.ps'
+pdf_outfile = os.path.join(extra_files_path, 'tree.pdf')
+
+################################################################################
+
+informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt')
+mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt')
+
+prog = 'dist_mat'
+
+args = []
+args.append(prog)
+args.append(input)
+args.append(minimum_coverage)
+args.append(minimum_quality)
+args.append(dbkey)
+args.append(data_source)
+args.append(informative_snp_file)
+args.append(mega_distance_matrix_file)
+
+if p1_input == "all_individuals":
+    tags = p_total.tag_list()
+else:
+    p1 = Population()
+    p1.from_population_file(p1_input)
+    if not p_total.is_superset(p1):
+        print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+        sys.exit(1)
+    tags = p1.tag_list()
+
+for tag in tags:
+    args.append(tag)
+
+fh = open(phylip_outfile, 'w')
+run_program(None, args, fh)
+
+################################################################################
+
+prog = 'quicktree'
+
+args = []
+args.append(prog)
+args.append('-in')
+args.append('m')
+args.append('-out')
+args.append('t')
+args.append(phylip_outfile)
+
+fh = open(newick_outfile, 'w')
+run_program(None, args, fh)
+
+################################################################################
+
+prog = 'draw_tree'
+
+args = []
+args.append(prog)
+if draw_tree_options:
+    args.append(draw_tree_options)
+args.append(newick_outfile)
+
+fh = open(ps_outfile, 'w')
+run_program(None, args, fh)
+
+################################################################################
+
+prog = 'ps2pdf'
+
+args = []
+args.append(prog)
+args.append('-dPDFSETTINGS=/prepress')
+args.append(ps_outfile)
+args.append('-')
+
+fh = open(pdf_outfile, 'w')
+run_program(None, args, fh)
+
+shutil.copyfile(pdf_outfile, output)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Phylogenetic tree Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file)
+out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file)
+out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file)
+out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file)
+out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_newick)
+info_page.add_output_parameter(out_phylip)
+info_page.add_output_parameter(out_mega)
+info_page.add_output_parameter(out_snps)
+
+in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value)
+in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value)
+
+include_ref_value = 'no'
+if dbkey != 'none':
+    include_ref_value = 'yes'
+
+in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value)
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+
+branch_type_value = 'square'
+if 'd' in draw_tree_options:
+    branch_type_value = 'diagonal'
+
+in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value)
+
+branch_scale_value = 'yes'
+if 's' in draw_tree_options:
+    branch_scale_value = 'no'
+
+in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value)
+
+branch_length_value = 'yes'
+if 'b' in draw_tree_options:
+    branch_length_value = 'no'
+
+in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value)
+
+tree_layout_value = 'horizontal'
+if 'v' in draw_tree_options:
+    tree_layout_value = 'vertical'
+
+in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value)
+
+info_page.add_input_parameter(in_min_cov)
+info_page.add_input_parameter(in_min_qual)
+info_page.add_input_parameter(in_include_ref)
+info_page.add_input_parameter(in_data_source)
+info_page.add_input_parameter(in_branch_type)
+info_page.add_input_parameter(in_branch_scale)
+info_page.add_input_parameter(in_branch_length)
+info_page.add_input_parameter(in_tree_layout)
+
+misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
+
+info_page.add_misc(misc_individuals)
+
+
+with open(output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+################################################################################
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff phylogenetic_tree.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phylogenetic_tree.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,192 @@
+<tool id="gd_phylogenetic_tree" name="Phylogenetic Tree" version="1.0.0">
+  <description>: Show genetic relationships among individuals</description>
+
+  <command interpreter="python">
+    phylogenetic_tree.py "$input"
+    #if $individuals.choice == '0'
+      "all_individuals"
+    #else if $individuals.choice == '1'
+      "$p1_input"
+    #end if
+    "$output" "$output.files_path" "$minimum_coverage" "$minimum_quality"
+	#if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
+        "none"
+    #else
+        "$input.metadata.dbkey"
+    #end if
+    "$data_source"
+    #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
+    #if $draw_tree_options == ''
+        ""
+    #else
+        "-$draw_tree_options"
+    #end if
+    #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = '%s:%s' % ($individual_col, $individual_name)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Individuals">
+        <option value="0" selected="true">All individuals</option>
+        <option value="1">Individuals in a population</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="p1_input" type="data" format="gd_indivs" label="Population individuals" />
+      </when>
+    </conditional>
+
+    <param name="minimum_coverage" type="integer" min="0" value="0" label="Minimum coverage" />
+
+    <param name="minimum_quality" type="integer" min="0" value="0" label="Minimum quality" help="Note: minimum coverage and minimum quality cannot both be 0" />
+
+    <param name="include_reference" type="select" format="integer" label="Include reference sequence">
+      <option value="1" selected="true">Yes</option>
+      <option value="0">No</option>
+    </param>
+
+    <param name="data_source" type="select" format="integer" label="Data source">
+      <option value="0" selected="true">sequence coverage</option>
+      <option value="1">estimated genotype</option>
+    </param>
+
+    <param name="branch_style" type="select" display="radio">
+      <label>Branch type</label>
+      <option value="" selected="true">square</option>
+      <option value="d">diagonal</option>
+    </param>
+     
+    <param name="scale_style" type="select" display="radio">
+      <label>Draw branches to scale</label>
+      <option value="" selected="true">yes</option>
+      <option value="s">no</option>
+    </param>
+     
+    <param name="length_style" type="select" display="radio">
+      <label>Show branch lengths</label>
+      <option value="" selected="true">yes</option>
+      <option value="b">no</option>
+    </param>
+     
+    <param name="layout_style" type="select" display="radio">
+      <label>Tree layout</label>
+      <option value="" selected="true">horizontal</option>
+      <option value="v">vertical</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0" />
+      <param name="minimum_coverage" value="3" />
+      <param name="minimum_quality" value="30" />
+      <param name="data_source" value="0" />
+      <param name="branch_style" value="" />
+      <param name="scale_style" value="" />
+      <param name="length_style" value="" />
+      <param name="layout_style" value="" />
+      <output name="output" file="test_out/phylogenetic_tree/phylogenetic_tree.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="distance_matrix.phylip" value="test_out/phylogenetic_tree/distance_matrix.phylip" />
+        <extra_files type="file" name="informative_snps.txt" value="test_out/phylogenetic_tree/informative_snps.txt" />
+        <extra_files type="file" name="mega_distance_matrix.txt" value="test_out/phylogenetic_tree/mega_distance_matrix.txt" />
+        <extra_files type="file" name="phylogenetic_tree.newick" value="test_out/phylogenetic_tree/phylogenetic_tree.newick" />
+        <extra_files type="file" name="tree.pdf" value="test_out/phylogenetic_tree/tree.pdf" compare="sim_size" delta = "1000"/>
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_snp_ format.
+The output is a composite dataset, containing the tree in both text (Newick_)
+and PostScript formats, as well as supplemental text information.
+(`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _Newick: http://evolution.genetics.washington.edu/phylip/newicktree.html
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+This tool uses a gd_snp dataset to determine a kind of "genetic distance"
+between each pair of individuals.  That information is used to
+produce a tree-shaped figure that depicts how the individuals are related,
+both as a text files and as a diagram.
+The text files include a common tree format, Newick, as well as distance
+matrices and counts of informative SNPs for each pairwise comparison.
+The informative SNPs can be used as a guide to how reliable the tree is.
+
+The input parameters are:
+
+SNP dataset
+  A table of SNPs for various individuals, in gd_snp format.
+
+Individuals
+  By default all individuals are included in the analysis, but this can
+  optionally be restricted to a subset that has been defined using the
+  Specify Individuals tool.
+
+Minimum coverage
+  For each pair of individuals, the tool looks for informative SNPs, i.e.,
+  where the sequence data for both individuals is adequate according to
+  some criterion.  Specifying, say, 7 for this option instructs the tool
+  to consider only SNPs with coverage at least 7 in both individuals
+  when estimating their "genetic distance".
+
+Minimum quality
+  Specifying, say, 37 for this option instructs the tool to consider
+  only SNPs with SAMtools quality value at least 37 in both individuals
+  when estimating their "genetic distance".
+
+Include reference sequence
+  For gd_snp datasets containing columns for a reference sequence, the
+  user can ask that the reference be indicated in the tree, to help with
+  rooting it.  If the dataset has no reference columns, this option has
+  no effect.
+
+Data source
+  The genetic distance between two individuals at a given SNP can
+  be estimated two ways.  One method is to use the absolute value of the
+  difference in the frequency of the first allele (or equivalently, the
+  second allele).  For instance, if the first individual has 5 reads of
+  each allele and the second individual has respectively 3 and 6 reads,
+  then the frequencies are 1/2 and 1/3, giving a distance 1/6 at that
+  SNP.  The other approach is to use the SAMtools genotypes to estimate
+  the difference in the number of occurrences of the first allele.
+  For instance, if the two genotypes are 2 and 1, i.e., the individuals
+  are estimated to have respectively 2 and 1 occurrences of the first
+  allele at this location, then the distance is 1 (the absolute value
+  of the difference of the two numbers).
+
+Output options
+  The final four options apply mostly to the graphical drawing of the
+  tree, except that the branch lengths are also added to the Newick text
+  file.
+
+-----
+
+**Acknowledgments**
+
+To convert the distance matrix to a Newick-formatted tree, we use the
+QuickTree program from
+http://www.sanger.ac.uk/resources/software/quicktree/ .
+
+To make the diagram we use draw_tree, available at
+http://compgen.bscb.cornell.edu/phast/ .
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff population_structure.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/population_structure.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import shutil
+import subprocess
+import sys
+from BeautifulSoup import BeautifulSoup
+import gd_composite
+
+################################################################################
+
+def run_admixture(ped_file, populations):
+    prog = 'admixture'
+
+    args = []
+    args.append(prog)
+    args.append(input_ped_file)
+    args.append(populations)
+
+    #print "args:", ' '.join(args)
+    ofh = open('/dev/null', 'w')
+    p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=sys.stderr)
+    rc = p.wait()
+    ofh.close()
+
+def run_r(input_file, output_file, populations):
+    prog = 'R'
+
+    args = []
+    args.append(prog)
+    args.append('--vanilla')
+    args.append('--quiet')
+    args.append('--args')
+    args.append(input_file)
+    args.append(output_file)
+    args.append(populations)
+
+    _realpath = os.path.realpath(__file__)
+    _script_dir = os.path.dirname(_realpath)
+    r_script_file = os.path.join(_script_dir, 'population_structure.r')
+
+    ifh = open(r_script_file)
+    ofh = open('/dev/null', 'w')
+    p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
+    rc = p.wait()
+    ifh.close()
+    ofh.close()
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def get_populations(input):
+    pops = []
+    pop_names = {}
+
+    with open(input) as fh:
+        soup = BeautifulSoup(fh)
+        misc = soup.find('div', {'id': 'gd_misc'})
+
+        return 'Populations\n{0}'.format(misc('ul')[0])
+
+################################################################################
+
+if len(sys.argv) != 6:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input_html_file, input_ped_file, output_file, extra_files_path, populations = sys.argv[1:6]
+populations_html = get_populations(input_html_file)
+
+run_admixture(input_ped_file, populations)
+
+ped_base = os.path.basename(input_ped_file)
+if ped_base.endswith('.ped'):
+    ped_base = ped_base[:-4]
+
+p_file = '%s.%s.P' % (ped_base, populations)
+q_file = '%s.%s.Q' % (ped_base, populations)
+
+mkdir_p(extra_files_path)
+numeric_output_file = os.path.join(extra_files_path, 'numeric.txt')
+shutil.copy2(q_file, numeric_output_file)
+os.remove(p_file)
+os.remove(q_file)
+
+graphical_output_file = os.path.join(extra_files_path, 'graphical.pdf')
+run_r(numeric_output_file, graphical_output_file, populations)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Population structure Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='graphical.pdf', value='graphical.pdf', display_type=display_file)
+out_txt = gd_composite.Parameter(name='numeric.txt', value='numeric.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_txt)
+
+in_pops = gd_composite.Parameter(description='Number of populations', value=populations, display_type=display_value)
+
+info_page.add_input_parameter(in_pops)
+
+misc_pops = gd_composite.Parameter(description=populations_html, display_type=display_value)
+
+info_page.add_misc(misc_pops)
+
+
+with open (output_file, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+
+sys.exit(0)
diff -r fdb4240fb565 -r 8ae67e9fb6ff population_structure.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/population_structure.r	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,19 @@
+library(RColorBrewer)
+
+args = commandArgs(trailingOnly=TRUE)
+q_file = args[[1]]
+output_file = args[[2]]
+populations = args[[3]]
+
+tbl <- read.table(q_file)
+
+if ( populations >= 3 && populations <= 12 ) {
+    colors = brewer.pal(populations, 'Paired')
+} else {
+    colors = rainbow(populations)
+}
+
+pdf(file=output_file, onefile=TRUE, width=7, height=3)
+barplot(t(as.matrix(tbl)), col=colors, xlab="Individual #", ylab="Ancestry", border=NA)
+
+dev.off()
diff -r fdb4240fb565 -r 8ae67e9fb6ff population_structure.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/population_structure.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,72 @@
+<tool id="gd_population_structure" name="Ancestry" version="1.0.0">
+  <description>: Characterize ancestries w.r.t. inferred ancestral populations</description>
+
+  <command interpreter="python">
+    population_structure.py "$input" "${input.extra_files_path}/admix.ped" "$output" "$output.files_path" "$populations"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_ped" label="Dataset" />
+    <param name="populations" type="integer" min="1" value="2" label="Number of populations" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="html" />
+  </outputs>
+
+  <!--
+  <tests>
+    <test>
+      <param name="input" value="fake" ftype="gd_ped" >
+        <metadata name="base_name" value="admix" />
+        <composite_data value="test_out/prepare_population_structure/prepare_population_structure.html" />
+        <composite_data value="test_out/prepare_population_structure/admix.ped" />
+        <composite_data value="test_out/prepare_population_structure/admix.map" />
+        <edit_attributes type="name" value="fake" />
+      </param>
+      <param name="populations" value="2" />
+
+      <output name="output" file="test_out/population_structure/population_structure.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="numeric.txt" value="test_out/population_structure/numeric.txt" />
+        <extra_files type="file" name="graphical.pdf" value="test_out/population_structure/graphical.pdf" compare="sim_size" delta="1000" />
+      </output>
+    </test>
+  </tests>
+  -->
+
+
+  <help>
+
+**Dataset formats**
+
+The input dataset is in gd_ped_ format.
+The output dataset is a composite dataset containing a graph and text.
+(`Dataset missing?`_)
+
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The user selects a gd_ped dataset generated by the Prepare Input tool,
+and specifies a number, K, of ancestral
+populations.  The tool estimates the proportion of each individual's ancestry
+coming from each ancestral population.  The proportions are shown both as
+numbers and graphically.
+
+-----
+
+**Acknowledgments**
+
+We use the program "Admixture", downloaded from
+
+http://www.genetics.ucla.edu/software/admixture/
+
+and described in the paper "Fast model-based estimation of ancestry in
+unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange,
+Genome Research 19 (2009), pp. 1655-1664.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff prepare_population_structure.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_population_structure.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+
+import errno
+import os
+import shutil
+import subprocess
+import sys
+from Population import Population
+import gd_composite
+
+################################################################################
+
+def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list):
+    info_page = gd_composite.InfoPage()
+    info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset')
+
+    display_file = gd_composite.DisplayFile()
+    display_value = gd_composite.DisplayValue()
+
+    out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file)
+    out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file)
+    out_use = gd_composite.Parameter(description=using_info, display_type=display_value)
+
+    info_page.add_output_parameter(out_ped)
+    info_page.add_output_parameter(out_map)
+    info_page.add_output_parameter(out_use)
+
+    in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value)
+    in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value)
+    in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value)
+
+    info_page.add_input_parameter(in_min_reads)
+    info_page.add_input_parameter(in_min_qual)
+    info_page.add_input_parameter(in_min_spacing)
+
+    misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+    info_page.add_misc(misc_populations)
+
+    with open(filename, 'w') as ofh:
+        print >> ofh, info_page.render()
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def die(message, exit=True):
+    print >> sys.stderr, message
+    if exit:
+        sys.exit(1)
+
+################################################################################
+
+if len(sys.argv) < 9:
+    die("Usage")
+
+# parse command line
+input_snp_filename, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:7]
+args = sys.argv[7:]
+
+individual_metadata = []
+population_files = []
+population_names = []
+all_individuals = False
+
+for arg in args:
+    if arg == 'all_individuals':
+        all_individuals = True
+    elif len(arg) > 11:
+        tag = arg[:11]
+        value = arg[11:]
+        if tag == 'individual:':
+            individual_metadata.append(value)
+        elif tag == 'population:':
+            filename, name = value.split(':', 1)
+            population_files.append(filename)
+            population_names.append(name)
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+individual_population = {}
+
+population_list = []
+
+if all_individuals:
+    p1 = p_total
+    p1.name = 'All Individuals'
+    population_list.append(p1)
+else:
+    p1 = Population()
+    for idx in range(len(population_files)):
+        population_file = population_files[idx]
+        population_name = population_names[idx]
+        this_pop = Population(population_name)
+        this_pop.from_population_file(population_file)
+        population_list.append(this_pop)
+        p1.from_population_file(population_file)
+        tags = p1.tag_list()
+        for tag in tags:
+            if tag not in individual_population:
+                individual_population[tag] = population_name
+
+if not p_total.is_superset(p1):
+    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+    sys.exit(1)
+
+# run tool
+prog = 'admix_prep'
+
+args = []
+args.append(prog)
+args.append(input_snp_filename)
+args.append(min_reads)
+args.append(min_qual)
+args.append(min_spacing)
+
+tags = p1.tag_list()
+for tag in tags:
+    args.append(tag)
+
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr)
+(stdoutdata, stderrdata) = p.communicate()
+rc = p.returncode
+
+if rc != 0:
+    die('admix_prep failed: rc={0}'.format(rc))
+
+using_info = stdoutdata.rstrip('\r\n')
+mkdir_p(output_files_path)
+output_ped_filename = os.path.join(output_files_path, 'admix.ped')
+output_map_filename = os.path.join(output_files_path, 'admix.map')
+shutil.copy2('admix.ped', output_ped_filename)
+shutil.copy2('admix.map', output_map_filename)
+do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list)
+
+os.unlink('admix.ped')
+os.unlink('admix.map')
+
+sys.exit(0)
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff prepare_population_structure.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_population_structure.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,118 @@
+<tool id="gd_prepare_population_structure" name="Prepare Input" version="1.0.0">
+  <description>: Filter and convert to the format needed for these tools</description>
+
+  <command interpreter="python">
+    prepare_population_structure.py "$input" "$min_reads" "$min_qual" "$min_spacing" "$output" "$output.files_path"
+    #if $individuals.choice == '0'
+        "all_individuals"
+    #else if $individuals.choice == '1'
+        #for $population in $individuals.populations
+          #set $pop_arg = 'population:%s:%s' % (str($population.p_input), str($population.p_input.name))
+          "$pop_arg"
+        #end for
+    #end if
+    #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
+        #set $arg = 'individual:%s:%s' % ($individual_col, $individual)
+        "$arg"
+    #end for
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_snp" label="SNP dataset" />
+    <param name="min_reads" type="integer" min="0" value="0" label="Minimum reads covering a SNP, per individual" />
+    <param name="min_qual" type="integer" min="0" value="0" label="Minimum quality value, per individual" />
+    <param name="min_spacing" type="integer" min="0" value="0" label="Minimum spacing between SNPs on the same scaffold" />
+    <conditional name="individuals">
+      <param name="choice" type="select" label="Individuals">
+        <option value="0" selected="true">All</option>
+        <option value="1">Choose</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <repeat name="populations" title="Population" min="1">
+          <param name="p_input" type="data" format="gd_indivs" label="Individuals" />
+        </repeat>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="gd_ped">
+      <actions>
+        <action type="metadata" name="base_name" default="admix" />
+      </actions>
+    </data>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
+      <param name="min_reads" value="3" />
+      <param name="min_qual" value="30" />
+      <param name="min_spacing" value="0" />
+      <param name="choice" value="0" />
+      <output name="output" file="test_out/prepare_population_structure/prepare_population_structure.html" ftype="html" compare="diff" lines_diff="2">
+        <extra_files type="file" name="admix.map" value="test_out/prepare_population_structure/admix.map" />
+        <extra_files type="file" name="admix.ped" value="test_out/prepare_population_structure/admix.ped" />
+      </output>
+    </test>
+  </tests>
+
+  <help>
+
+**Dataset formats**
+
+The input datasets are in gd_snp_ and gd_indivs_ formats.  It is important
+for the Individuals datasets to have unique names; rename them if
+necessary to make them unique.  These names are used by the later tools in
+the graphical displays.
+The output dataset is gd_ped_.  (`Dataset missing?`_)
+
+.. _gd_snp: ./static/formatHelp.html#gd_snp
+.. _gd_indivs: ./static/formatHelp.html#gd_indivs
+.. _gd_ped: ./static/formatHelp.html#gd_ped
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+The tool converts a gd_snp dataset into two tables, called "admix.map" and
+"admix.ped", needed for estimating the population structure.  The user
+can read or download those files, or simply pass this tool's output on to
+other programs.  The user imposes conditions on which SNPs to consider,
+such as the minimum coverage and/or quality value for every individual,
+or the distance to the closest SNP in the same contig (as named in the
+first column of the SNP table).  A useful piece of information produced
+by the tool is the number of SNPs meeting those conditions, which can
+be found by clicking on the eye icon in the history panel after the program 
+runs.
+
+-----
+
+**Example**
+
+- input::
+
+    Contig161_chr1_4641264_4641879  115     C       T       73.5    chr1    4641382 C       6       0       2       45      8       0       2       51      15      0       2       72      5       0       2       42      6       0       2       45      10      0       2       57      Y       54      0.323   0
+    Contig48_chr1_10150253_10151311 11      A       G       94.3    chr1    10150264        A       1       0       2       30      1       0       2       30      1       0       2       30      3       0       2       36      1       0       2       30      1       0       2       30      Y       22      +99.    0
+    Contig20_chr1_21313469_21313570 66      C       T       54.0    chr1    21313534        C       4       0       2       39      4       0       2       39      5       0       2       42      4       0       2       39      4       0       2       39      5       0       2       42      N       1       +99.    0
+    etc.
+
+- output map file::
+
+    1 snp1 0 2
+    1 snp3 0 4
+    1 snp4 0 5
+    1 snp5 0 6
+    1 snp6 0 7
+    1 snp7 0 8
+    1 snp8 0 9
+    1 snp9 0 10
+
+- output ped file::
+
+    PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff rank_pathways.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rank_pathways.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,76 @@
+<tool id="gd_calc_freq" name="Rank Pathways" version="1.0.0">
+  <description>: Assess the impact of gene sets on pathways</description>
+
+  <command interpreter="python">
+    #if str($output_format) == 'a'
+      calctfreq.py
+    #else if str($output_format) == 'b'
+      calclenchange.py
+    #end if
+        "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc"
+        "--species=${input.metadata.dbkey}"
+        "--input=${input}"
+        "--output=${output}"
+        "--posKEGGclmn=${input.metadata.kegg_path}"
+        "--KEGGgeneposcolmn=${input.metadata.kegg_gene}"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_sap" label="Table">
+        <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata.  Click the pencil icon in the history item to edit/save the metadata attributes" />
+    </param>
+    <param name="output_format" type="select" label="Output format">
+      <option value="a" selected="true">ranked by percentage of genes affected</option>
+      <option value="b">ranked by change in length and number of paths</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" />
+      <param name="output_format" value="a" />
+      <output name="output" file="test_out/rank_pathways/rank_pathways.tabular" />
+    </test>
+  </tests>
+
+  <help>
+
+**What it does**
+
+This tool produces a table ranking the pathways based on the percentage
+of genes in an input dataset, out of the total in each pathway.
+Alternatively, the tool ranks the pathways based on the change in
+length and number of paths connecting sources and sinks.  This change is
+calculated between graphs representing pathways with and without excluding
+the nodes that represent the genes in an input list.  Sources are all
+the nodes representing the initial reactants/products in the pathway.
+Sinks are all the nodes representing the final reactants/products in
+the pathway.
+
+If pathways are ranked by percentage of genes affected, the output is
+a tabular dataset with the following columns:
+
+   1. number of genes in the pathway present in the input dataset
+   2. percentage of the total genes in the pathway included in the input dataset
+   3. rank of the frequency (from high freq to low freq)
+   4. name of the pathway
+
+If pathways are ranked by change in length and number of paths, the
+output is a tabular dataset with the following columns:
+
+   1. change in the mean length of paths between sources and sinks
+   2. mean length of paths between sources and sinks in the pathway including the genes in the input dataset.  If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
+   3. mean length of paths between sources and sinks in the pathway excluding the genes in the input dataset.  If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
+   4. rank of the change in the mean length of paths between sources and sinks (from high change to low change)
+   5. change in the number of paths between sources and sinks
+   6. number of paths between sources and sinks in the pathway including the genes in the input dataset.  If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
+   7. number of paths between sources and sinks in the pathway excluding the genes in the input dataset.  If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
+   8. rank of the change in the number of paths between sources and sinks (from high change to low change)
+   9. name of the pathway
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff rtrnKEGGpthwfENSEMBLTc.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rtrnKEGGpthwfENSEMBLTc.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#       calclenchange.py
+#       
+#       Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
+#       
+#       This program is free software; you can redistribute it and/or modify
+#       it under the terms of the GNU General Public License as published by
+#       the Free Software Foundation; either version 2 of the License, or
+#       (at your option) any later version.
+#       
+#       This program is distributed in the hope that it will be useful,
+#       but WITHOUT ANY WARRANTY; without even the implied warranty of
+#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#       GNU General Public License for more details.
+#       
+#       You should have received a copy of the GNU General Public License
+#       along with this program; if not, write to the Free Software
+#       Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+#       MA 02110-1301, USA.
+
+import argparse,os,sys
+
+
+def main():
+	parser = argparse.ArgumentParser(description='Adds the fields KEGG gene codes and KEGG pathways to an input table of ENSEMBL transcript codes.')
+	parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database')
+	parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file')
+	parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. The output will have two more fields: KEGG gene codes and KEGG pathways of each ENSEMBL code' )
+	parser.add_argument('--posENSEMBLclmn',metavar='column number',type=int,help='the column with the ENSEMBLE transcript code')
+	parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
+	#~ 
+	#~Open arguments 
+	class C(object):
+		pass
+	fulargs=C()
+	parser.parse_args(sys.argv[1:],namespace=fulargs)
+	#test input vars
+	inputf,loc_file,species,output,posENSEMBLclmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posENSEMBLclmn
+	posENSEMBLclmn-=1#correct pos
+	#~ Get the extra variables
+	crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0]
+	sppPrefx,dinput=crDB[0],crDB[1]#X should be replaced by the position in which the Conversion Dictionary File (CDF) is placed
+	#make a dictionary of the input CDF
+	dKEGGcPthws=dict([(x.split('\t')[0],'\t'.join(x.split('\t')[1:])) for x in open(dinput).read().splitlines() if x.strip()])
+	#~ add the two new columns
+	sall=[]
+	#lENSEMBLTc=[x.split('\t') for x in open(inputf).read().splitlines() if x.strip()]
+	lENSEMBLTc = []
+	with open(inputf) as fh:
+	    for line in fh:
+	        if line.startswith('#'):
+	            continue
+	        lENSEMBLTc.append(line.rstrip('\r\n').split('\t'))
+	nLines=len(lENSEMBLTc)
+	cLines=0
+	sall=[]#the output list for with additional fields
+	#~ 
+	while cLines<nLines:
+		cLines+=1
+		lENSEMBLTcKEGGgKEGGpth=lENSEMBLTc.pop(0)
+		ENSEMBLTc=lENSEMBLTcKEGGgKEGGpth[posENSEMBLclmn]
+		try:
+			KEGGgKEGGpth=dKEGGcPthws[ENSEMBLTc]
+		except:
+			KEGGgKEGGpth='\t'.join(['U','N'])
+		sall.append('\t'.join(['\t'.join(lENSEMBLTcKEGGgKEGGpth),KEGGgKEGGpth]))
+	#~ 
+	salef=open(output,'w')
+	salef.write('\n'.join(sall))
+	salef.close()
+	return 0
+	
+
+if __name__ == '__main__':
+	main()
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff select_snps.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/select_snps.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import math
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function(parse_arguments=None):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: (None, arguments)
+    def main_decorator(to_decorate):
+        def decorated_main(arguments=None):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments(arguments)
+            sys.exit(to_decorate(options, arguments))
+        return decorated_main
+    return main_decorator
+
+def parse_arguments(arguments):
+    parser = OptionParser()
+    parser.add_option('--input', dest='input')
+    parser.add_option('--output', dest='output')
+    parser.add_option('--index_dir', dest='index_dir')
+    parser.add_option('--num_snps', dest='num_snps')
+    parser.add_option('--ref_chrom_col', dest='ref_chrom_col')
+    parser.add_option('--ref_pos_col', dest='ref_pos_col')
+    parser.add_option('--ref_species', dest='ref_species')
+    return parser.parse_args(arguments[1:])
+
+@main_function(parse_arguments)
+def main(options, arguments):
+
+    ref_chrom_idx = to_int( options.ref_chrom_col ) -1
+    ref_pos_idx = to_int( options.ref_pos_col ) -1
+
+    if (ref_chrom_idx < 1) or (ref_pos_idx < 1) or (ref_chrom_idx == ref_pos_idx):
+        print >> sys.stderr, "Cannot locate reference genome sequence (ref) or reference genome position (rPos) column for this dataset."
+        sys.exit(1)
+
+    chrom_len_root = os.path.join( options.index_dir, 'shared/ucsc/chrom')
+    chrom_len_file = '%s.len' % options.ref_species
+    chrom_len_path = os.path.join(chrom_len_root, chrom_len_file)
+
+    chrlens = gd.ChrLens( chrom_len_path )
+
+    total_len = 0
+    for chrom in chrlens:
+        total_len += chrlens.length(chrom)
+
+    total_requested = int( options.num_snps )
+    lines, data, comments = get_snp_lines_data_and_comments( options.input, ref_chrom_idx, ref_pos_idx )
+    selected = select_snps( data, total_len, total_requested )
+    out_data = fix_selection_and_order_like_input(data, selected, total_requested)
+    write_selected_snps( options.output, out_data, lines, comments )
+
+def to_int( value ):
+    try:
+        int_value = int( value )
+    except ValueError:
+        int_value = 0
+    return int_value
+
+def get_snp_lines_data_and_comments( filename, chrom_idx, pos_idx ):
+    fh = open( filename, 'r' )
+    if (chrom_idx >= pos_idx):
+        needed = chrom_idx + 1
+    else:
+        needed = pos_idx + 1
+    lines = []
+    data = []
+    comments = []
+    line_idx = 0
+    line_num = 0
+    for line in fh:
+        line_num += 1
+        line = line.rstrip('\r\n')
+        if line:
+            if line.startswith('#'):
+                comments.append(line)
+            else:
+                elems = line.split('\t')
+                if len(elems) >= needed:
+                    chrom = elems[chrom_idx]
+                    try:
+                        pos = int(elems[pos_idx])
+                    except ValueError:
+                        sys.stderr.write( "bad reference position in line %d column %d: %s\n" % ( line_num, pos_idx+1, elems[pos_idx] ) )
+                        sys.exit(1)
+                    lines.append(line)
+                    chrom_sort = chrom.lstrip('chr')
+                    data.append( [chrom_sort, chrom, pos, line_num, line_idx] )
+                    line_idx += 1
+    fh.close()
+    data = sorted( data, key=lambda x: (x[0], x[2]) )
+    return lines, data, comments
+
+def select_snps( data, total_len, requested ):
+    old_chrom = None
+    next_print = 0
+    selected = []
+    space = total_len / requested
+    for data_idx, datum in enumerate( data ):
+        chrom = datum[1]
+        pos = datum[2]
+        if chrom != old_chrom:
+            old_chrom = chrom
+            next_print = 0
+        if pos >= next_print:
+            selected.append(data_idx)
+            next_print += space
+    return selected
+
+def fix_selection_and_order_like_input(data, selected, requested):
+    total_selected = len( selected )
+    a = float( total_selected ) / requested
+    b = a / 2
+
+    idx_list = []
+    for i in range( requested ):
+        idx = int( math.ceil( i * a + b ) - 1 )
+        idx_list.append( idx )
+
+    out_data = []
+
+    for i, data_idx in enumerate(selected):
+        if total_selected > requested:
+            if i in idx_list:
+                out_data.append(data[data_idx])
+        else:
+            out_data.append(data[data_idx])
+
+    out_data = sorted( out_data, key=lambda x: x[3] )
+
+    return out_data
+
+def write_selected_snps( filename, data, lines, comments ):
+    fh = open( filename, 'w' )
+
+    for comment in comments:
+        fh.write("%s\n" % comment )
+
+    for datum in data:
+        line_idx = datum[4]
+        fh.write("%s\n" % lines[line_idx])
+
+    fh.close()
+
+if __name__ == "__main__":
+    main()
+
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff select_snps.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/select_snps.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,89 @@
+<tool id="gd_select_snps" name="Sample SNPs" version="1.0.0">
+  <description>: Select a specified number of SNPs, uniformly spaced</description>
+
+  <command interpreter="python">
+    select_snps.py "--input=$input" "--output=$output" "--index_dir=$GALAXY_DATA_INDEX_DIR" "--num_snps=$num_snps"
+    #if $override_metadata.choice == "0":
+      "--ref_chrom_col=${input.metadata.ref}" "--ref_pos_col=${input.metadata.rPos}" "--ref_species=${input.metadata.dbkey}"
+    #else
+      "--ref_chrom_col=$ref_col" "--ref_pos_col=$rpos_col" "--ref_species=$ref_species"
+    #end if
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Selected SNPS dataset">
+      <validator type="unspecified_build" message="This dataset does not have a reference species and cannot be used with this tool" />
+    </param>
+    <param name="num_snps" type="integer" value="10" optional="false" min="1" label="Number of SNPs"/>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="choose columns">
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="ref_col" type="data_column" data_ref="input" numerical="false" label="Column with reference chromosome"/>
+        <param name="rpos_col" type="data_column" data_ref="input" numerical="true" label="Column with reference position"/>
+        <param name="ref_species" type="select" label="Choose reference species">
+          <options from_file="gd.ref_species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <outputs>
+    <data format="gd_snp" name="output" metadata_source="input"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp"/>
+      <param name="num_snps" value="100"/>
+      <param name="choice" value="0"/>
+      <output name="output" file="test_out/select_snps/select_snps.gd_snp" />
+    </test>
+  </tests>
+
+
+  <help>
+
+**What it does**
+
+  This tool attempts to select a specified number of SNPs from the dataset, making them
+  approximately uniformly spaced relative to the reference genome. The number
+  actually selected may be slightly more than the specified number.
+
+-----
+
+**Example**
+
+- input file::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+- output file::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    etc.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff specify.xml
--- a/specify.xml	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-<tool id="gd_specify" name="Specify Individuals" version="1.0.0">
-  <description>: Define a collection of individuals from a gd_snp dataset</description>
-
-  <command interpreter="bash">
-    echo.bash "$input" "$output"
-    #for $individual in str($individuals).split(',')
-        #set $individual_idx = $input.dataset.metadata.individual_names.index($individual)
-        #set $individual_col = str( $input.dataset.metadata.individual_columns[$individual_idx] )
-        #set $arg = '\t'.join([$individual_col, $individual, ''])
-        "$arg"
-    #end for
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="gd_snp" label="SNP dataset"/>
-    <param name="individuals" type="select" display="checkboxes" multiple="true" label="Individuals to include">
-      <options>
-        <filter type="data_meta" ref="input" key="individual_names" />
-      </options>
-      <validator type="no_options" message="You must select at least one individual."/>
-    </param>
-    <param name="outname" type="text" size="20" label="Label for this collection">
-      <validator type="empty_field" message="You must enter a label."/>
-      #used to be "Individuals from ${input.hid}"
-    </param>
-  </inputs>
-
-  <outputs>
-    <data name="output" format="gd_indivs" label="${outname}" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="test_in/sample.gd_snp" ftype="gd_snp" />
-      <param name="individuals" value="PB1,PB2" />
-      <output name="output" file="test_in/a.gd_indivs" />
-    </test>
-  </tests>
-
-  <help>
-
-**Dataset formats**
-
-The input dataset is in gd_snp_ format;
-the output is in gd_indivs_ format.  (`Dataset missing?`_)
-
-.. _gd_snp: ./static/formatHelp.html#gd_snp
-.. _gd_indivs: ./static/formatHelp.html#gd_indivs
-.. _Dataset missing?: ./static/formatHelp.html
-
------
-
-**What it does**
-
-This tool makes a list of selected entities (the sets of four columns
-representing individuals or groups) from a gd_snp dataset.  It does not copy
-the SNP data; it just records which entities should be considered as belonging
-to some collection or population.  The label you specify is used to name the
-output dataset in your history.  This list can then be used to instruct other
-tools to work on just part of the original gd_snp dataset.
-
------
-
-**Example**
-
-- input::
-
-   Contig161_chr1_4641264_4641879   115  C  T  73.5   chr1   4641382  C   6  0  2  45   8  0  2  51   15  0  2  72   5  0  2  42   6  0  2  45  10  0  2  57   Y  54  0.323  0
-   Contig48_chr1_10150253_10151311   11  A  G  94.3   chr1  10150264  A   1  0  2  30   1  0  2  30    1  0  2  30   3  0  2  36   1  0  2  30   1  0  2  30   Y  22  +99.   0
-   Contig20_chr1_21313469_21313570   66  C  T  54.0   chr1  21313534  C   4  0  2  39   4  0  2  39    5  0  2  42   4  0  2  39   4  0  2  39   5  0  2  42   N   1  +99.   0
-   etc.
-
-- input metadata::
-
-   #{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc",
-   #"1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q",
-   #"pair","dist","prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],
-   #"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
-
-- output when individuals PB1, PB2, and PB3 are selected::
-
-   9   PB1
-   13  PB2
-   17  PB3
-
-  </help>
-</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff specify_restriction_enzymes.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/specify_restriction_enzymes.py	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from optparse import OptionParser
+import genome_diversity as gd
+
+def main_function( parse_arguments=None ):
+    if parse_arguments is None:
+        parse_arguments = lambda arguments: ( None, arguments )
+    def main_decorator( to_decorate ):
+        def decorated_main( arguments=None ):
+            if arguments is None:
+                arguments = sys.argv
+            options, arguments = parse_arguments( arguments )
+            rc = 1
+            try:
+                rc = to_decorate( options, arguments )
+            except Exception, err:
+                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
+                traceback.print_exc()
+            finally:
+                sys.exit( rc )
+        return decorated_main
+    return main_decorator
+
+def parse_arguments( arguments ):
+    parser = OptionParser()
+    parser.add_option('--input',
+                        type='string', dest='input',
+                        help='file of selected SNPs')
+    parser.add_option('--output',
+                        type='string', dest='output',
+                        help='output file')
+    parser.add_option('--primers_loc',
+                        type='string', dest='primers_loc',
+                        help='primers .loc file')
+    parser.add_option('--scaffold_col',
+                        type="int", dest='scaffold_col',
+                        help='scaffold column in the input file')
+    parser.add_option('--pos_col',
+                        type="int", dest='pos_col',
+                        help='position column in the input file')
+    parser.add_option('--enzyme_list',
+                        type="string", dest='enzyme_list_string',
+                        help='comma separated list of enzymes')
+    parser.add_option('--species',
+                        type="string", dest='species',
+                        help='species')
+    return parser.parse_args( arguments[1:] )
+
+
+@main_function( parse_arguments )
+def main( options, arguments ):
+    if not options.input:
+        raise RuntimeError( 'missing --input option' )
+    if not options.output:
+        raise RuntimeError( 'missing --output option' )
+    if not options.primers_loc:
+        raise RuntimeError( 'missing --primers_loc option' )
+    if not options.scaffold_col:
+        raise RuntimeError( 'missing --scaffold_col option' )
+    if not options.pos_col:
+        raise RuntimeError( 'missing --pos_col option' )
+    if not options.enzyme_list_string:
+        raise RuntimeError( 'missing --enzyme_list option' )
+    if not options.species:
+        raise RuntimeError( 'missing --species option' )
+    
+    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
+
+    out_fh = gd._openfile( options.output, 'w' )
+
+    enzyme_dict = {}
+    for enzyme in options.enzyme_list_string.split( ',' ):
+        enzyme = enzyme.strip()
+        if enzyme:
+            enzyme_dict[enzyme] = 1
+
+    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
+    file_root, file_ext = os.path.splitext( primer_data_file )
+    primer_index_file = file_root + ".cdb"
+    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
+
+    comments_printed = False
+
+    while snps.next():
+        seq, pos = snps.get_seq_pos()
+        enzyme_list = primers.get_enzymes( seq, pos )
+        for enzyme in enzyme_list:
+            if enzyme in enzyme_dict:
+                if not comments_printed:
+                    for comment in snps.comments:
+                        out_fh.write( "%s\n" % comment )
+                    comments_printed = True
+                out_fh.write( "%s\n" % snps.line )
+                break
+
+    out_fh.close()
+
+if __name__ == "__main__":
+    main()
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff specify_restriction_enzymes.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/specify_restriction_enzymes.xml	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,90 @@
+<tool id="gd_specify_restriction_enzymes" name="Differential Cleavage" version="1.0.0">
+  <description>: Select SNPs differentially cut by specified restriction enzymes</description>
+
+  <command interpreter="python">
+    specify_restriction_enzymes.py "--input=$input" "--output=$output" "--primers_loc=${GALAXY_DATA_INDEX_DIR}/gd.primers.loc"
+    #if $override_metadata.choice == "0":
+      "--scaffold_col=${input.metadata.scaffold}" "--pos_col=${input.metadata.pos}" "--species=${input.metadata.species}"
+    #else
+      "--scaffold_col=$scaf_col" "--pos_col=$pos_col" "--species=$species"
+    #end if
+    "--enzyme_list=$enzymes"
+  </command>
+
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Selected SNPS dataset"/>
+    <conditional name="override_metadata">
+      <param name="choice" type="select" format="integer" label="choose columns">
+        <option value="0" selected="true">No, get columns from metadata</option>
+        <option value="1" >Yes, choose columns</option>
+      </param>
+      <when value="0" />
+      <when value="1">
+        <param name="scaf_col" type="data_column" data_ref="input" numerical="false" label="Column with scaffold"/>
+        <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
+        <param name="species" type="select" label="Choose species">
+          <options from_file="gd.species.txt">
+            <column name="name" index="1"/>
+            <column name="value" index="0"/>
+          </options>
+        </param>
+      </when>
+    </conditional>
+
+    <param name="enzymes" type="select" display="checkboxes" multiple="true" label="Choose enzymes">
+        <options from_file="gd.restriction_enzymes.txt">
+            <column name="name" index="0"/>
+            <column name="value" index="1"/>
+        </options>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data format="gd_snp" name="output" metadata_source="input"/>
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_out/select_snps/select_snps.gd_snp" ftype="gd_snp" />
+      <param name="choice" value="0" />
+      <param name="enzymes" value="Bsp1286I,HaeII,RsaI" />
+      <output name="output" file="test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp" />
+    </test>
+  </tests>
+
+  <help>
+
+**What it does**
+
+  It selects the SNPs that are differentially cut by at least one of the
+  specified restriction enzymes. The enzymes are required to cut the amplified
+  segment (for the specified PCR primers) only at the SNP.
+
+-----
+
+**Example**
+
+- input file::
+
+    chr2_75111355_75112576    314  A  C  L  F  chr2   75111676  C  F  15  4  53   2   9  48   Y  96   0.369  0.355  0.396  0
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr10_7434473_7435447    524   T  C  S  S  chr10  7435005   T  S  11  5  90   14  0  69   Y  626  0.066  0.406  0.727  0
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr15_64470252_64471048  89    G  A  Y  Y  chr15  64470341  G  Y  5   6  109  14  0  69   Y  312  0.247  0.998  0.393  0
+    chr18_48070585_48071386  514   C  T  E  K  chr18  48071100  T  K  7   7  46   14  0  69   Y  2    0.200  0.032  0.163  0
+    chr18_50154905_50155664  304   A  G  Y  C  chr18  50155208  A  Y  4   2  17   5   1  22   Y  8    0.022  0.996  0.128  0
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_14240610_14242055  232   C  T  A  V  chr19  14240840  C  A  18  8  56   15  5  42   Y  73   0.003  0.153  0.835  0
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+- output file::
+
+    chr8_93901796_93905612   2471  A  C  A  A  chr8   93904264  A  A  8   0  51   10  2  14   Y  961  0.016  0.534  0.114  2
+    chr14_80021455_80022064  138   G  A  H  H  chr14  80021593  G  H  14  0  69   9   6  124  Y  377  0.118  0.997  0.195  1
+    chr18_57379354_57380496  315   C  T  V  V  chr18  57379669  G  V  11  0  60   9   6  62   Y  726  0.118  0.048  0.014  1
+    chr19_39866997_39874915  3117  C  T  P  P  chr19  39870110  C  P  3   7  65   14  2  32   Y  6    0.321  0.911  0.462  4
+    etc.
+
+  </help>
+</tool>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_in/a.gd_indivs
--- a/test-data/test_in/a.gd_indivs	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-9	PB1	
-13	PB2	
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_in/b.gd_indivs
--- a/test-data/test_in/b.gd_indivs	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-17	PB3	
-21	PB4	
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_in/c.gd_indivs
--- a/test-data/test_in/c.gd_indivs	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-25	PB6	
-29	PB8	
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_in/ensembl.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_in/ensembl.tabular	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,150 @@
+ENSCAFT00000000001
+ENSCAFT00000000144
+ENSCAFT00000000160
+ENSCAFT00000000215
+ENSCAFT00000000233
+ENSCAFT00000000365
+ENSCAFT00000000507
+ENSCAFT00000000517
+ENSCAFT00000000674
+ENSCAFT00000000724
+ENSCAFT00000000760
+ENSCAFT00000000762
+ENSCAFT00000001047
+ENSCAFT00000001052
+ENSCAFT00000001063
+ENSCAFT00000001076
+ENSCAFT00000001104
+ENSCAFT00000001141
+ENSCAFT00000001146
+ENSCAFT00000001204
+ENSCAFT00000001219
+ENSCAFT00000001250
+ENSCAFT00000001352
+ENSCAFT00000001363
+ENSCAFT00000001421
+ENSCAFT00000001523
+ENSCAFT00000001575
+ENSCAFT00000001587
+ENSCAFT00000001597
+ENSCAFT00000002056
+ENSCAFT00000002100
+ENSCAFT00000002110
+ENSCAFT00000002175
+ENSCAFT00000002259
+ENSCAFT00000002460
+ENSCAFT00000002537
+ENSCAFT00000002577
+ENSCAFT00000002578
+ENSCAFT00000002660
+ENSCAFT00000002792
+ENSCAFT00000002849
+ENSCAFT00000002999
+ENSCAFT00000003163
+ENSCAFT00000003223
+ENSCAFT00000003307
+ENSCAFT00000003515
+ENSCAFT00000003560
+ENSCAFT00000003644
+ENSCAFT00000003824
+ENSCAFT00000003840
+ENSCAFT00000004092
+ENSCAFT00000004103
+ENSCAFT00000004208
+ENSCAFT00000004253
+ENSCAFT00000004311
+ENSCAFT00000004464
+ENSCAFT00000004511
+ENSCAFT00000004609
+ENSCAFT00000004673
+ENSCAFT00000004726
+ENSCAFT00000004799
+ENSCAFT00000004933
+ENSCAFT00000004993
+ENSCAFT00000005126
+ENSCAFT00000005142
+ENSCAFT00000005225
+ENSCAFT00000005323
+ENSCAFT00000005467
+ENSCAFT00000005496
+ENSCAFT00000005518
+ENSCAFT00000005653
+ENSCAFT00000005746
+ENSCAFT00000005749
+ENSCAFT00000005832
+ENSCAFT00000005972
+ENSCAFT00000006025
+ENSCAFT00000006114
+ENSCAFT00000006157
+ENSCAFT00000006219
+ENSCAFT00000006272
+ENSCAFT00000006453
+ENSCAFT00000006479
+ENSCAFT00000006507
+ENSCAFT00000006669
+ENSCAFT00000006689
+ENSCAFT00000006827
+ENSCAFT00000006891
+ENSCAFT00000007130
+ENSCAFT00000007145
+ENSCAFT00000007244
+ENSCAFT00000007375
+ENSCAFT00000007440
+ENSCAFT00000007467
+ENSCAFT00000007484
+ENSCAFT00000007527
+ENSCAFT00000007553
+ENSCAFT00000007697
+ENSCAFT00000007703
+ENSCAFT00000007747
+ENSCAFT00000007774
+ENSCAFT00000007776
+ENSCAFT00000007779
+ENSCAFT00000007859
+ENSCAFT00000007951
+ENSCAFT00000007959
+ENSCAFT00000008012
+ENSCAFT00000008063
+ENSCAFT00000008142
+ENSCAFT00000008198
+ENSCAFT00000008413
+ENSCAFT00000008540
+ENSCAFT00000008586
+ENSCAFT00000008588
+ENSCAFT00000008673
+ENSCAFT00000008678
+ENSCAFT00000008728
+ENSCAFT00000008769
+ENSCAFT00000008831
+ENSCAFT00000009074
+ENSCAFT00000009114
+ENSCAFT00000009614
+ENSCAFT00000009698
+ENSCAFT00000009710
+ENSCAFT00000010094
+ENSCAFT00000010141
+ENSCAFT00000010439
+ENSCAFT00000010496
+ENSCAFT00000010516
+ENSCAFT00000010531
+ENSCAFT00000010559
+ENSCAFT00000010593
+ENSCAFT00000010616
+ENSCAFT00000010630
+ENSCAFT00000010829
+ENSCAFT00000010865
+ENSCAFT00000010931
+ENSCAFT00000010977
+ENSCAFT00000010988
+ENSCAFT00000011187
+ENSCAFT00000011380
+ENSCAFT00000011397
+ENSCAFT00000011721
+ENSCAFT00000011730
+ENSCAFT00000011771
+ENSCAFT00000011789
+ENSCAFT00000011968
+ENSCAFT00000012081
+ENSCAFT00000012133
+ENSCAFT00000012159
+ENSCAFT00000012254
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_in/sample.gd_sap
--- a/test-data/test_in/sample.gd_sap	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,401 +0,0 @@
-#{"column_names":["contig","pos","ref","rPos","trns","pep","AA1","loc","AA2","KEGG","pred","path"],"pos":2,"rPos":4,"ref":3,"dbkey":"canFam2","scaffold":1,"species":"bear","kegg_gene":10,"kegg_path":12}
-Contig39_chr1_3261104_3261850	414	chr1	3261546	ENSCAFT00000000001	ENSCAFP00000000001	S	667	F	476153	probably damaging	cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
-Contig62_chr1_19011969_19012646	265	chr1	19012240	ENSCAFT00000000144	ENSCAFP00000000125	*	161	R	483960	probably damaging	N
-Contig36_chr1_20102654_20103213	365	chr1	20103029	ENSCAFT00000000160	ENSCAFP00000000140	R	407	Q	610160	possibly damaging	N
-Contig136_chr10_3710404_3714591	3079	chr10	3713499	ENSCAFT00000000215	ENSCAFP00000000194	T	103	P	U	benign	N
-Contig36_chr1_23682012_23682647	374	chr1	23682388	ENSCAFT00000000233	ENSCAFP00000000210	N	234	S	483973	benign	N
-Contig163_chr10_4573526_4574494	487	chr10	4574010	ENSCAFT00000000365	ENSCAFP00000000332	R	186	K	474414	benign	cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis
-Contig55_chr1_40056604_40059808	2081	chr1	40058686	ENSCAFT00000000507	ENSCAFP00000000458	I	247	K	484023	possibly damaging	N
-Contig17_chr1_40203628_40205630	1417	chr1	40205044	ENSCAFT00000000517	ENSCAFP00000000468	N	109	S	476233	benign	N
-Contig97_chr1_44847984_44848380	285	chr1	44848272	ENSCAFT00000000674	ENSCAFP00000000618	Q	27	R	611986	benign	N
-Contig214_chr10_16106753_16106969	121	chr10	16106873	ENSCAFT00000000724	ENSCAFP00000000668	A	301	T	609478	benign	N
-Contig75_chr1_45731970_45732932	436	chr1	45732397	ENSCAFT00000000760	ENSCAFP00000000701	I	490	V	U	benign	N
-Contig33_chr1_45614845_45617413	1835	chr1	45616685	ENSCAFT00000000760	ENSCAFP00000000701	A	4390	V	U	benign	N
-Contig95_chr10_18829724_18831056	914	chr10	18830645	ENSCAFT00000000762	ENSCAFP00000000703	A	512	V	U	possibly damaging	N
-Contig197_chr13_8622062_8623071	606	chr13	8622665	ENSCAFT00000001047	ENSCAFP00000000959	T	406	I	475067	possibly damaging	cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways
-Contig243_chr10_19959210_19960069	701	chr10	19959858	ENSCAFT00000001052	ENSCAFP00000000964	E	1345	K	U	benign	N
-Contig137_chr13_10622950_10624043	1039	chr13	10623979	ENSCAFT00000001063	ENSCAFP00000000975	E	10	K	481999	benign	N
-Contig137_chr13_10622950_10624043	1006	chr13	10623946	ENSCAFT00000001063	ENSCAFP00000000975	R	21	C	481999	probably damaging	N
-Contig115_chr12_4411478_4412322	124	chr12	4411614	ENSCAFT00000001076	ENSCAFP00000000986	R	177	H	U	benign	N
-Contig150_chr12_4438230_4439944	385	chr12	4438614	ENSCAFT00000001104	ENSCAFP00000001014	Y	277	D	607591	benign	N
-Contig84_chr1_52076858_52077103	80	chr1	52076943	ENSCAFT00000001141	ENSCAFP00000001046	C	147	Y	484064	benign	N
-Contig29_chr13_13215547_13217183	793	chr13	13216352	ENSCAFT00000001146	ENSCAFP00000001050	P	1	R	475076	probably damaging	N
-Contig251_chr10_22876556_22877097	152	chr10	22876714	ENSCAFT00000001204	ENSCAFP00000001103	E	1162	D	481203	benign	N
-Contig21_chr10_22964856_22965302	202	chr10	22965058	ENSCAFT00000001219	ENSCAFP00000001115	P	6	Q	474465	benign	N
-Contig199_chr12_5083018_5084534	453	chr12	5083472	ENSCAFT00000001250	ENSCAFP00000001144	I	185	T	481729.481731	benign	N.cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis
-Contig41_chr13_21629998_21630487	161	chr13	21630157	ENSCAFT00000001352	ENSCAFP00000001239	P	729	S	482026	possibly damaging	cfa00565=Ether lipid metabolism
-Contig16_chr13_21786766_21788016	169	chr13	21786927	ENSCAFT00000001363	ENSCAFP00000001249	V	1142	A	475084	benign	cfa03022=Basal transcription factors
-Contig60_chr1_60333035_60333884	731	chr1	60333755	ENSCAFT00000001421	ENSCAFP00000001307	V	400	I	484096	benign	N
-Contig44_chr13_24555640_24556298	499	chr13	24556139	ENSCAFT00000001523	ENSCAFP00000001400	N	660	S	475088	benign	N
-Contig153_chr12_5955114_5958935	2950	chr12	5958094	ENSCAFT00000001575	ENSCAFP00000001449	E	13	D	481744	benign	cfa04141=Protein processing in endoplasmic reticulum
-Contig146_chr13_25076435_25077249	723	chr13	25077165	ENSCAFT00000001587	ENSCAFP00000001461	T	9	S	482035	benign	N
-Contig81_chr13_25579918_25582207	874	chr13	25580772	ENSCAFT00000001597	ENSCAFP00000001469	E	62	G	609411	benign	N
-Contig159_chr10_28604683_28606028	753	chr10	28605433	ENSCAFT00000002056	ENSCAFP00000001903	S	79	P	610014	benign	N
-Contig30_chr11_29945215_29949829	3973	chr11	29949181	ENSCAFT00000002100	ENSCAFP00000001944	M	282	T	U	benign	N
-Contig102_chr10_29039231_29041280	829	chr10	29040065	ENSCAFT00000002110	ENSCAFP00000001953	R	311	Q	481249	unknown	N
-Contig187_chr1_78583588_78584279	250	chr1	78583839	ENSCAFT00000002175	ENSCAFP00000002014	K	176	R	476310	benign	N
-Contig199_chr1_79234891_79237527	384	chr1	79235278	ENSCAFT00000002259	ENSCAFP00000002095	V	403	A	484151	benign	N
-Contig119_chr12_12212738_12214663	1005	chr12	12213720	ENSCAFT00000002460	ENSCAFP00000002280	R	749	Q	481785	possibly damaging	N
-Contig119_chr12_12212738_12214663	918	chr12	12213633	ENSCAFT00000002460	ENSCAFP00000002280	R	778	Q	481785	benign	N
-Contig39_chr14_10730123_10732539	335	chr14	10730462	ENSCAFT00000002537	ENSCAFP00000002356	V	1179	E	U	benign	N
-Contig41_chr1_84886710_84894794	3494	chr1	84890207	ENSCAFT00000002577	ENSCAFP00000002394	E	1089	K	484157	possibly damaging	N
-Contig182_chr12_13881114_13883427	1690	chr12	13882828	ENSCAFT00000002578	ENSCAFP00000002395	S	99	G	608906	benign	N
-Contig34_chr11_48151988_48152712	198	chr11	48152205	ENSCAFT00000002660	ENSCAFP00000002468	C	587	R	U	possibly damaging	N
-Contig37_chr10_34118256_34119269	437	chr10	34118687	ENSCAFT00000002792	ENSCAFP00000002588	A	377	T	474523	benign	N
-Contig21_chr14_16091274_16093278	716	chr14	16091997	ENSCAFT00000002849	ENSCAFP00000002642	R	126	C	475216	probably damaging	N
-Contig57_chr1_90983602_90984717	559	chr1	90984158	ENSCAFT00000002999	ENSCAFP00000002781	A	226	V	U	benign	N
-Contig45_chr12_15798569_15798849	141	chr12	15798709	ENSCAFT00000003163	ENSCAFP00000002938	N	342	S	474921	benign	cfa03040=Spliceosome
-Contig83_chr12_17852905_17859596	2392	chr12	17855305	ENSCAFT00000003223	ENSCAFP00000002995	E	770	Q	474925	benign	N
-Contig41_chr12_18725392_18725889	169	chr12	18725560	ENSCAFT00000003307	ENSCAFP00000003070	R	80	Q	609995	benign	N
-Contig9_chr14_26125779_26127414	486	chr14	26126264	ENSCAFT00000003515	ENSCAFP00000003259	P	123	T	482316	benign	N
-Contig132_chr1_101565951_101566612	255	chr1	101566210	ENSCAFT00000003560	ENSCAFP00000003298	L	588	F	U	unknown	N
-Contig142_chr1_102093954_102094392	121	chr1	102094072	ENSCAFT00000003644	ENSCAFP00000003373	K	120	E	484216	benign	cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00970=Aminoacyl-tRNA biosynthesis
-Contig129_chr14_34071666_34074617	2313	chr14	34073957	ENSCAFT00000003824	ENSCAFP00000003537	T	282	I	475249	probably damaging	N
-Contig147_chr14_34262125_34262938	340	chr14	34262468	ENSCAFT00000003840	ENSCAFP00000003553	I	70	V	482333	benign	N
-Contig52_chr12_36031985_36035244	1237	chr12	36033208	ENSCAFT00000004092	ENSCAFP00000003784	Y	564	H	474960	benign	N
-Contig176_chr1_105494865_105495258	119	chr1	105494995	ENSCAFT00000004103	ENSCAFP00000003793	A	406	V	484298	benign	N
-Contig60_chr11_63130652_63131816	702	chr11	63131349	ENSCAFT00000004208	ENSCAFP00000003892	V	260	I	481637	benign	N
-Contig9_chr10_53579958_53582510	688	chr10	53580646	ENSCAFT00000004253	ENSCAFP00000003937	S	191	G	100534006.100534007.474588	benign	N
-Contig93_chr14_38451661_38452163	221	chr14	38451882	ENSCAFT00000004311	ENSCAFP00000003990	A	420	V	482346	benign	N
-Contig70_chr12_42859511_42860010	180	chr12	42859693	ENSCAFT00000004464	ENSCAFP00000004126	P	7	S	481892	possibly damaging	N
-Contig28_chr12_43447144_43449156	1136	chr12	43448279	ENSCAFT00000004511	ENSCAFP00000004169	V	582	M	481893	benign	N
-Contig18_chr13_62535238_62535697	227	chr13	62535471	ENSCAFT00000004609	ENSCAFP00000004263	E	277	D	611755	benign	N
-Contig282_chr1_108960925_108962235	205	chr1	108961141	ENSCAFT00000004673	ENSCAFP00000004325	A	149	V	611817	benign	N
-Contig110_chr1_109196028_109197290	987	chr1	109197021	ENSCAFT00000004726	ENSCAFP00000004374	E	330	D	610047	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection
-Contig89_chr11_69097905_69099099	568	chr11	69098443	ENSCAFT00000004799	ENSCAFP00000004445	E	1317	G	U	benign	N
-Contig118_chr14_46155051_46155557	173	chr14	46155218	ENSCAFT00000004933	ENSCAFP00000004572	S	110	L	482382	benign	cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis
-Contig54_chr12_51910786_51912716	682	chr12	51911460	ENSCAFT00000004993	ENSCAFP00000004630	H	2889	Y	474995	benign	cfa03008=Ribosome biogenesis in eukaryotes
-Contig95_chr10_67698730_67699605	267	chr10	67698997	ENSCAFT00000005126	ENSCAFP00000004751	P	45	L	U	benign	N
-Contig265_chr17_3177908_3178389	332	chr17	3178241	ENSCAFT00000005142	ENSCAFP00000004763	A	306	P	606804	benign	N
-Contig322_chr17_4977962_4979371	1122	chr17	4979079	ENSCAFT00000005225	ENSCAFP00000004836	T	319	I	475647	possibly damaging	N
-Contig48_chr11_71453437_71456331	1725	chr11	71455160	ENSCAFT00000005323	ENSCAFP00000004927	A	226	V	U	benign	N
-Contig51_chr16_4789440_4790118	484	chr16	4789915	ENSCAFT00000005467	ENSCAFP00000005065	Q	318	H	U	benign	N
-Contig32_chr12_57224809_57225619	146	chr12	57224960	ENSCAFT00000005496	ENSCAFP00000005093	A	273	T	481925	benign	N
-Contig6_chr14_59310933_59312532	615	chr14	59311551	ENSCAFT00000005518	ENSCAFP00000005112	Y	304	H	492302	probably damaging	cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion
-Contig89_chr11_74391566_74395656	2856	chr11	74394408	ENSCAFT00000005653	ENSCAFP00000031395	R	450	H	403417	benign	cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis
-Contig15_chr1_109713951_109714808	645	chr1	109714594	ENSCAFT00000005746	ENSCAFP00000005319	R	783	K	476410	benign	cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway
-Contig47_chr17_11258085_11259619	360	chr17	11258455	ENSCAFT00000005749	ENSCAFP00000005322	V	778	L	610007	benign	N
-Contig1_chr19_4352123_4352541	311	chr19	4352427	ENSCAFT00000005832	ENSCAFP00000005401	H	7	Y	403584	benign	cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis
-Contig57_chr12_66915864_66916357	337	chr12	66916199	ENSCAFT00000005972	ENSCAFP00000005534	F	1242	L	475012	benign	N
-Contig36_chr17_16182220_16182772	282	chr17	16182494	ENSCAFT00000006025	ENSCAFP00000005583	V	13	I	482980	possibly damaging	N
-Contig64_chr19_15052202_15053292	240	chr19	15052443	ENSCAFT00000006114	ENSCAFP00000005658	I	175	V	483829	benign	N
-Contig169_chr12_69415779_69417261	1136	chr12	69416908	ENSCAFT00000006157	ENSCAFP00000005701	D	85	N	475021	possibly damaging	N
-Contig200_chr18_15803806_15804082	169	chr18	15803976	ENSCAFT00000006219	ENSCAFP00000005760	A	66	V	483261	benign	cfa04972=Pancreatic secretion.cfa04978=Mineral absorption
-Contig6_chr18_15814044_15814404	97	chr18	15814150	ENSCAFT00000006219	ENSCAFP00000005760	A	413	S	483261	benign	cfa04972=Pancreatic secretion.cfa04978=Mineral absorption
-Contig104_chr1_110433641_110434230	183	chr1	110433810	ENSCAFT00000006272	ENSCAFP00000005811	A	315	T	484394	benign	cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
-Contig52_chr18_17851226_17851871	284	chr18	17851509	ENSCAFT00000006453	ENSCAFP00000005976	T	311	M	475893	probably damaging	N
-Contig63_chr16_12167721_12168304	388	chr16	12168099	ENSCAFT00000006479	ENSCAFP00000006000	M	634	V	U	benign	N
-Contig101_chr20_4702659_4703738	441	chr20	4703092	ENSCAFT00000006507	ENSCAFP00000006027	G	635	D	484622	probably damaging	cfa03030=DNA replication.cfa04110=Cell cycle
-Contig53_chr19_21456428_21457881	408	chr19	21456840	ENSCAFT00000006669	ENSCAFP00000006174	R	247	L	476094	possibly damaging	N
-Contig58_chr18_19883250_19884312	250	chr18	19883498	ENSCAFT00000006689	ENSCAFP00000006194	*	503	Y	475897	benign	N
-Contig122_chr15_17034758_17035049	142	chr15	17034893	ENSCAFT00000006827	ENSCAFP00000006320	R	117	P	U	benign	N
-Contig131_chr18_20356930_20357227	113	chr18	20357041	ENSCAFT00000006891	ENSCAFP00000006378	V	55	L	610021	benign	N
-Contig117_chr22_5859195_5860740	654	chr22	5859850	ENSCAFT00000007130	ENSCAFP00000006603	S	139	N	485445	benign	cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction
-Contig91_chr17_23506302_23507213	322	chr17	23506624	ENSCAFT00000007145	ENSCAFP00000006614	V	1644	I	607961	benign	N
-Contig3_chr21_16586556_16586852	105	chr21	16586661	ENSCAFT00000007244	ENSCAFP00000006709	C	33	Y	476781	possibly damaging	N
-Contig62_chr2_22645987_22646907	357	chr2	22646352	ENSCAFT00000007375	ENSCAFP00000006833	V	657	F	403767	probably damaging	cfa04977=Vitamin digestion and absorption
-Contig52_chr15_18032498_18034281	880	chr15	18033373	ENSCAFT00000007440	ENSCAFP00000006895	P	227	A	482516	benign	N
-Contig131_chr23_6679385_6679850	198	chr23	6679592	ENSCAFT00000007467	ENSCAFP00000006915	R	136	G	485576	possibly damaging	N
-Contig157_chr22_10584088_10586765	232	chr22	10584326	ENSCAFT00000007484	ENSCAFP00000006926	M	610	T	609336	benign	N
-Contig164_chr2_24336024_24340161	2420	chr2	24338436	ENSCAFT00000007527	ENSCAFP00000006969	S	824	C	607108	probably damaging	N
-Contig109_chr2_24557417_24558710	808	chr2	24558229	ENSCAFT00000007553	ENSCAFP00000006994	L	606	V	487123	benign	cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency
-Contig194_chr15_18573761_18574204	142	chr15	18573904	ENSCAFT00000007697	ENSCAFP00000007130	V	381	I	475382	benign	N
-Contig133_chr23_9924894_9925887	125	chr23	9925016	ENSCAFT00000007703	ENSCAFP00000007136	P	355	S	477019	benign	cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer
-Contig31_chr23_10199273_10203629	4073	chr23	10203350	ENSCAFT00000007747	ENSCAFP00000007179	A	1844	V	U	benign	N
-Contig21_chr23_10308212_10309269	513	chr23	10308732	ENSCAFT00000007774	ENSCAFP00000007206	K	72	R	477021	benign	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy
-Contig211_chr1_114924893_114925515	171	chr1	114925067	ENSCAFT00000007776	ENSCAFP00000007208	P	1988	A	U	benign	N
-Contig35_chr2_27160577_27161526	804	chr2	27161367	ENSCAFT00000007779	ENSCAFP00000007211	G	473	R	478007.478008	probably damaging	cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome
-Contig79_chr17_24285444_24286769	1263	chr17	24286694	ENSCAFT00000007859	ENSCAFP00000007285	S	209	T	483010	benign	N
-Contig74_chr23_10871047_10871362	70	chr23	10871116	ENSCAFT00000007951	ENSCAFP00000007365	I	474	V	U	benign	N
-Contig34_chr16_18928689_18932806	3409	chr16	18932072	ENSCAFT00000007959	ENSCAFP00000007370	A	3754	S	482810.611087	benign	cfa00310=Lysine degradation
-Contig52_chr21_24452521_24454405	725	chr21	24453245	ENSCAFT00000008012	ENSCAFP00000007418	M	289	T	485173	possibly damaging	N
-Contig261_chr1_115563599_115564561	560	chr1	115564156	ENSCAFT00000008063	ENSCAFP00000007465	A	63	T	484489	possibly damaging	N
-Contig62_chr19_41037398_41039465	159	chr19	41037564	ENSCAFT00000008142	ENSCAFP00000007541	C	744	Y	476128	possibly damaging	N
-Contig84_chr1_115960693_115962811	1467	chr1	115962120	ENSCAFT00000008198	ENSCAFP00000007593	W	61	R	612489	benign	N
-Contig135_chr23_14160194_14160717	270	chr23	14160468	ENSCAFT00000008413	ENSCAFP00000007796	V	298	I	U	benign	N
-Contig41_chr17_26203621_26205196	1407	chr17	26205028	ENSCAFT00000008540	ENSCAFP00000007913	H	172	R	483021	benign	N
-Contig260_chr1_116076701_116078120	746	chr1	116077446	ENSCAFT00000008586	ENSCAFP00000007956	T	2486	I	484499	benign	N
-Contig19_chr23_14811332_14815323	1987	chr23	14813327	ENSCAFT00000008588	ENSCAFP00000007958	S	690	L	U	unknown	N
-Contig180_chr2_35061773_35062172	166	chr2	35061941	ENSCAFT00000008673	ENSCAFP00000008039	T	920	M	478018	probably damaging	N
-Contig106_chr21_26153874_26154496	107	chr21	26153984	ENSCAFT00000008678	ENSCAFP00000008044	A	458	T	485188	benign	N
-Contig3_chr19_45625337_45630123	2563	chr19	45627887	ENSCAFT00000008728	ENSCAFP00000008094	V	1264	I	U	benign	N
-Contig51_chr22_48760401_48761638	636	chr22	48761047	ENSCAFT00000008769	ENSCAFP00000008132	R	1071	K	485523	benign	cfa02010=ABC transporters.cfa04976=Bile secretion
-Contig10_chr15_21173640_21174011	212	chr15	21173839	ENSCAFT00000008831	ENSCAFP00000008192	V	191	I	475398	benign	N
-Contig6_chr24_14680423_14681438	782	chr24	14681208	ENSCAFT00000009074	ENSCAFP00000008417	H	562	R	485769	possibly damaging	cfa04330=Notch signaling pathway
-Contig60_chr9_4528464_4529207	262	chr9	4528727	ENSCAFT00000009114	ENSCAFP00000008453	C	24	F	483354	possibly damaging	N
-Contig54_chr15_29510545_29512205	400	chr15	29510955	ENSCAFT00000009614	ENSCAFP00000008928	H	190	R	475416	benign	N
-Contig46_chr25_5067588_5068089	39	chr25	5067627	ENSCAFT00000009698	ENSCAFP00000009003	S	17	N	486001	benign	N
-Contig126_chr25_5114359_5115799	643	chr25	5114996	ENSCAFT00000009710	ENSCAFP00000009013	R	1952	C	486002	possibly damaging	N
-Contig41_chr26_3455305_3455893	329	chr26	3455620	ENSCAFT00000010094	ENSCAFP00000009363	S	909	A	486223	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
-Contig55_chr26_3463883_3465235	1074	chr26	3464998	ENSCAFT00000010094	ENSCAFP00000009363	R	1273	S	486223	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
-Contig63_chr26_3467460_3468420	195	chr26	3467661	ENSCAFT00000010094	ENSCAFP00000009363	E	1542	Q	486223	benign	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
-Contig13_chr16_32259141_32259752	344	chr16	32259472	ENSCAFT00000010141	ENSCAFP00000009407	I	326	T	482857	benign	cfa04360=Axon guidance
-Contig59_chr21_32994329_32995926	1195	chr21	32995538	ENSCAFT00000010439	ENSCAFP00000009680	H	230	R	610992	benign	N
-Contig39_chr20_24938452_24941620	1292	chr20	24939734	ENSCAFT00000010496	ENSCAFP00000009730	S	28	P	415126	benign	cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma
-Contig2_chr18_28546360_28546760	277	chr18	28546640	ENSCAFT00000010516	ENSCAFP00000009748	P	471	S	U	benign	N
-Contig23_chr20_25560598_25562858	928	chr20	25561520	ENSCAFT00000010531	ENSCAFP00000009762	T	749	I	484693	benign	N
-Contig209_chr18_28672330_28672791	376	chr18	28672689	ENSCAFT00000010559	ENSCAFP00000009790	A	33	D	483405	unknown	N
-Contig261_chr18_28694652_28696968	1808	chr18	28696427	ENSCAFT00000010559	ENSCAFP00000009790	P	1443	L	483405	possibly damaging	N
-Contig30_chr25_12008255_12009009	151	chr25	12008417	ENSCAFT00000010593	ENSCAFP00000009822	Q	151	H	U	benign	N
-Contig46_chr29_3065854_3067420	1265	chr29	3067078	ENSCAFT00000010616	ENSCAFP00000009842	V	3253	A	474176	benign	cfa03450=Non-homologous end-joining.cfa04110=Cell cycle
-Contig59_chr28_3755477_3757019	935	chr28	3756419	ENSCAFT00000010630	ENSCAFP00000009853	R	923	Q	486770	possibly damaging	N
-Contig90_chr29_6393993_6395503	951	chr29	6394948	ENSCAFT00000010829	ENSCAFP00000010033	Y	257	C	486944	benign	N
-Contig42_chr16_39015800_39016389	319	chr16	39016119	ENSCAFT00000010865	ENSCAFP00000010068	D	71	N	U	possibly damaging	N
-Contig95_chr21_34533214_34535079	1133	chr21	34534321	ENSCAFT00000010931	ENSCAFP00000010131	E	118	G	485368	benign	N
-Contig82_chr21_34524815_34525170	247	chr21	34525072	ENSCAFT00000010931	ENSCAFP00000010131	Q	499	R	485368	benign	N
-Contig32_chr24_22727492_22727986	147	chr24	22727648	ENSCAFT00000010977	ENSCAFP00000010173	P	278	L	U	possibly damaging	N
-Contig45_chr16_42405571_42406148	269	chr16	42405837	ENSCAFT00000010988	ENSCAFP00000010184	H	406	R	482891	benign	cfa04145=Phagosome
-Contig66_chr15_43321121_43321872	642	chr15	43321764	ENSCAFT00000011187	ENSCAFP00000010364	F	543	L	475441	benign	N
-Contig184_chr27_5103641_5104991	275	chr27	5103979	ENSCAFT00000011380	ENSCAFP00000010541	V	864	A	U	benign	N
-Contig88_chr17_39320200_39320765	204	chr17	39320404	ENSCAFT00000011397	ENSCAFP00000010558	S	1911	N	475750	benign	cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection
-Contig8_chr16_47195242_47195504	193	chr16	47195429	ENSCAFT00000011721	ENSCAFP00000010862	S	4369	W	475621	benign	N
-Contig84_chr27_5882441_5882771	145	chr27	5882579	ENSCAFT00000011730	ENSCAFP00000010871	C	289	S	486534	benign	N
-Contig42_chr24_25316755_25317362	320	chr24	25317091	ENSCAFT00000011771	ENSCAFP00000010910	G	22	S	477193	benign	N
-Contig45_chr24_25318544_25319490	734	chr24	25319299	ENSCAFT00000011771	ENSCAFP00000010910	V	187	A	477193	benign	N
-Contig31_chr24_25434125_25435133	853	chr24	25434975	ENSCAFT00000011789	ENSCAFP00000010928	S	91	I	609978	benign	N
-Contig20_chr3_10579133_10580085	600	chr3	10579729	ENSCAFT00000011968	ENSCAFP00000011099	K	165	E	488881	benign	cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome
-Contig45_chr2_54585564_54588038	1047	chr2	54586611	ENSCAFT00000012081	ENSCAFP00000011198	T	969	M	478082	benign	cfa04621=NOD-like receptor signaling pathway
-Contig156_chr1_122375741_122376035	168	chr1	122375904	ENSCAFT00000012133	ENSCAFP00000011248	R	628	K	611998	benign	N
-Contig153_chr1_124036982_124040108	1588	chr1	124038585	ENSCAFT00000012159	ENSCAFP00000011272	A	887	T	484609	benign	N
-Contig32_chr24_26900375_26900913	394	chr24	26900761	ENSCAFT00000012254	ENSCAFP00000011358	H	51	Y	U	benign	N
-Contig103_chr16_48829082_48829675	123	chr16	48829205	ENSCAFT00000012381	ENSCAFP00000011471	E	369	G	475632	possibly damaging	N
-Contig25_chr18_41490135_41493501	534	chr18	41490665	ENSCAFT00000012414	ENSCAFP00000011503	R	703	C	483489	probably damaging	cfa04520=Adherens junction.cfa04670=Leukocyte transendothelial migration
-Contig69_chr16_49314879_49317228	1810	chr16	49316689	ENSCAFT00000012456	ENSCAFP00000011541	P	431	L	475636	probably damaging	cfa00565=Ether lipid metabolism
-Contig71_chr17_42734055_42736474	2240	chr17	42736298	ENSCAFT00000012478	ENSCAFP00000011561	R	307	Q	483083	benign	cfa00830=Retinol metabolism
-Contig17_chr17_43378842_43379885	305	chr17	43379148	ENSCAFT00000012676	ENSCAFP00000011740	T	196	M	U	probably damaging	N
-Contig195_chr27_7047911_7049009	555	chr27	7048468	ENSCAFT00000012942	ENSCAFP00000011978	R	881	L	477608	benign	N
-Contig112_chr30_4254316_4256576	1478	chr30	4255785	ENSCAFT00000012974	ENSCAFP00000012007	V	2939	I	U	benign	N
-Contig43_chr20_39124486_39124798	114	chr20	39124607	ENSCAFT00000013097	ENSCAFP00000012118	G	325	R	607274	possibly damaging	N
-Contig96_chr16_55849292_55849592	194	chr16	55849494	ENSCAFT00000013360	ENSCAFP00000012363	A	41	S	482932	benign	cfa04060=Cytokine-cytokine receptor interaction.cfa04150=mTOR signaling pathway.cfa04510=Focal adhesion.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05212=Pancreatic cancer.cfa05219=Bladder cancer
-Contig91_chr17_51684551_51689453	4154	chr17	51688687	ENSCAFT00000013395	ENSCAFP00000012395	P	306	L	475784	benign	N
-Contig192_chr26_12794366_12794712	143	chr26	12794506	ENSCAFT00000014076	ENSCAFP00000013021	V	2478	I	477486	benign	N
-Contig191_chr31_30109152_30109760	212	chr31	30109363	ENSCAFT00000014113	ENSCAFP00000013055	A	1813	T	487735	probably damaging	N
-Contig116_chr24_29683980_29684819	101	chr24	29684079	ENSCAFT00000014115	ENSCAFP00000013057	R	836	C	485868	probably damaging	N
-Contig8_chr32_9413601_9414435	74	chr32	9413675	ENSCAFT00000014257	ENSCAFP00000013183	N	236	K	478452	probably damaging	cfa00270=Cysteine and methionine metabolism
-Contig90_chr21_43253791_43254774	189	chr21	43253974	ENSCAFT00000014325	ENSCAFP00000013248	I	758	V	U	benign	N
-Contig76_chr24_30292767_30294101	552	chr24	30293321	ENSCAFT00000014346	ENSCAFP00000013267	A	349	T	U	benign	N
-Contig21_chr25_37121451_37122072	177	chr25	37121616	ENSCAFT00000014616	ENSCAFP00000013518	V	157	L	486118	benign	N
-Contig15_chr36_6357141_6362626	5226	chr36	6362346	ENSCAFT00000014702	ENSCAFP00000013598	N	138	K	607626	possibly damaging	N
-Contig64_chr17_54734453_54734993	109	chr17	54734552	ENSCAFT00000014707	ENSCAFP00000013603	S	302	L	483124	benign	N
-Contig91_chr18_46134014_46136042	330	chr18	46134347	ENSCAFT00000014736	ENSCAFP00000013630	A	214	S	483635	benign	cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system
-Contig111_chr31_31237314_31238628	920	chr31	31238220	ENSCAFT00000014822	ENSCAFP00000013714	S	143	C	478408	benign	N
-Contig77_chr38_3502296_3503058	349	chr38	3502639	ENSCAFT00000015260	ENSCAFP00000014122	K	666	E	478932	benign	N
-Contig59_chr38_3998294_3999004	369	chr38	3998672	ENSCAFT00000015347	ENSCAFP00000014201	V	791	I	U	benign	N
-Contig123_chr31_34367825_34368648	664	chr31	34368468	ENSCAFT00000015534	ENSCAFP00000014373	H	204	Q	U	possibly damaging	N
-Contig43_chr30_11874641_11875130	198	chr30	11874850	ENSCAFT00000015654	ENSCAFP00000014488	R	3422	C	U	benign	N
-Contig9_chr20_40741488_40743247	1027	chr20	40742525	ENSCAFT00000015816	ENSCAFP00000014638	M	183	V	484744	benign	N
-Contig137_chr5_7048977_7051042	863	chr5	7049840	ENSCAFT00000015844	ENSCAFP00000014662	A	311	V	479391	benign	N
-Contig9_chr28_17675067_17680985	1564	chr28	17676618	ENSCAFT00000015971	ENSCAFP00000014772	R	515	P	477805	unknown	N
-Contig126_chr30_12286682_12287475	407	chr30	12287101	ENSCAFT00000016062	ENSCAFP00000014854	V	450	I	487517	benign	cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways
-Contig127_chr30_12287497_12288447	608	chr30	12288095	ENSCAFT00000016062	ENSCAFP00000014854	T	495	M	487517	benign	cfa00052=Galactose metabolism.cfa00500=Starch and sucrose metabolism.cfa01100=Metabolic pathways
-Contig13_chr38_5058391_5058630	66	chr38	5058458	ENSCAFT00000016099	ENSCAFP00000014887	F	412	L	478943	benign	N
-Contig169_chr35_19985467_19986000	455	chr35	19985921	ENSCAFT00000016165	ENSCAFP00000014950	T	175	I	478733	benign	N
-Contig2_chr35_21794536_21795092	291	chr35	21794865	ENSCAFT00000016208	ENSCAFP00000014992	V	84	A	488238	benign	cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways
-Contig141_chr26_19278751_19279229	364	chr26	19279128	ENSCAFT00000016284	ENSCAFP00000015064	N	29	S	404011	benign	cfa00564=Glycerophospholipid metabolism.cfa00565=Ether lipid metabolism.cfa00590=Arachidonic acid metabolism.cfa00591=Linoleic acid metabolism.cfa00592=alpha-Linolenic acid metabolism.cfa01100=Metabolic pathways.cfa04010=MAPK signaling pathway.cfa04270=Vascular smooth muscle contraction.cfa04370=VEGF signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04724=Glutamatergic synapse.cfa04730=Long-term depression.cfa04912=GnRH signaling pathway.cfa04972=Pancreatic secretion.cfa04975=Fat digestion and absorption.cfa05145=Toxoplasmosis
-Contig179_chr3_40781459_40782026	285	chr3	40781763	ENSCAFT00000016410	ENSCAFP00000015182	D	1174	N	488699	benign	N
-Contig237_chr21_53631024_53632458	203	chr21	53631227	ENSCAFT00000016459	ENSCAFP00000015227	C	47	W	403799	probably damaging	cfa04664=Fc epsilon RI signaling pathway.cfa05310=Asthma
-Contig186_chr2_71203100_71204111	202	chr2	71203303	ENSCAFT00000016485	ENSCAFP00000015250	S	188	T	478144	benign	cfa00330=Arginine and proline metabolism.cfa01100=Metabolic pathways
-Contig1_chr28_18779291_18780149	325	chr28	18779619	ENSCAFT00000016578	ENSCAFP00000015340	N	245	Y	U	probably damaging	N
-Contig166_chr4_77425871_77426835	797	chr4	77426667	ENSCAFT00000016670	ENSCAFP00000015429	D	115	G	479370	benign	cfa00970=Aminoacyl-tRNA biosynthesis
-Contig35_chr24_36806524_36807086	367	chr24	36806891	ENSCAFT00000016727	ENSCAFP00000015478	F	345	L	485910	benign	N
-Contig45_chr37_8610877_8611425	194	chr37	8611078	ENSCAFT00000016761	ENSCAFP00000015511	D	2849	N	488452	possibly damaging	N
-Contig39_chr28_19446540_19447838	1068	chr28	19447566	ENSCAFT00000016791	ENSCAFP00000015537	A	1596	E	U	benign	N
-Contig161_chr18_51013230_51015381	1494	chr18	51014735	ENSCAFT00000016827	ENSCAFP00000015571	L	977	V	475999	benign	N
-Contig25_chr28_19619108_19621267	1728	chr28	19620832	ENSCAFT00000016848	ENSCAFP00000034237	I	108	V	609723	benign	N
-Contig33_chr20_42063173_42064259	623	chr20	42063789	ENSCAFT00000017070	ENSCAFP00000015794	V	179	M	U	probably damaging	N
-Contig39_chr38_14681397_14682234	384	chr38	14681781	ENSCAFT00000017072	ENSCAFP00000015796	H	282	N	488593	unknown	N
-Contig6_chr32_27303975_27304541	425	chr32	27304407	ENSCAFT00000017178	ENSCAFP00000015896	S	354	T	610098	benign	N
-Contig173_chr38_17709765_17711029	179	chr38	17709941	ENSCAFT00000017240	ENSCAFP00000015955	G	464	R	U	benign	N
-Contig52_chr32_27452924_27453332	91	chr32	27452999	ENSCAFT00000017249	ENSCAFP00000015964	A	22	S	U	benign	N
-Contig319_chr34_14684259_14684663	353	chr34	14684613	ENSCAFT00000017314	ENSCAFP00000016025	R	5	Q	478632	benign	N
-Contig32_chr2_72269353_72269814	349	chr2	72269708	ENSCAFT00000017327	ENSCAFP00000016037	P	853	L	487317	possibly damaging	N
-Contig206_chr9_18720001_18720613	155	chr9	18720160	ENSCAFT00000017373	ENSCAFP00000016082	D	1621	E	480456	benign	cfa02010=ABC transporters
-Contig35_chr37_10562149_10562621	74	chr37	10562222	ENSCAFT00000017444	ENSCAFP00000016153	I	975	V	478858	benign	cfa04727=GABAergic synapse
-Contig1_chr30_12655575_12656916	370	chr30	12655947	ENSCAFT00000017777	ENSCAFP00000016457	L	639	M	608886	probably damaging	N
-Contig63_chr27_23738716_23739879	1131	chr27	23739850	ENSCAFT00000017892	ENSCAFP00000016566	P	642	L	486627	benign	N
-Contig44_chr28_28123120_28124627	1348	chr28	28124495	ENSCAFT00000017967	ENSCAFP00000016639	V	261	A	477827	benign	N
-Contig23_chrX_6416128_6417014	455	chrX	6416585	ENSCAFT00000018017	ENSCAFP00000016684	H	111	R	491733	possibly damaging	N
-Contig31_chr7_8282189_8286932	3631	chr7	8285875	ENSCAFT00000018057	ENSCAFP00000016724	L	655	P	490260	benign	N
-Contig318_chr6_8706066_8706350	76	chr6	8706142	ENSCAFT00000018106	ENSCAFP00000016769	K	318	N	607700	possibly damaging	cfa04062=Chemokine signaling pathway.cfa04145=Phagosome.cfa04380=Osteoclast differentiation.cfa04666=Fc gamma R-mediated phagocytosis.cfa04670=Leukocyte transendothelial migration.cfa05140=Leishmaniasis
-Contig36_chr32_33046881_33048369	1118	chr32	33047990	ENSCAFT00000018307	ENSCAFP00000016954	E	555	A	403657	benign	cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04060=Cytokine-cytokine receptor interaction.cfa04144=Endocytosis.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04810=Regulation of actin cytoskeleton.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05212=Pancreatic cancer.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05218=Melanoma.cfa05219=Bladder cancer.cfa05223=Non-small cell lung cancer
-Contig32_chr5_14476595_14477214	347	chr5	14476948	ENSCAFT00000018571	ENSCAFP00000017201	V	209	A	610296	benign	N
-Contig88_chr34_19031138_19031937	343	chr34	19031477	ENSCAFT00000018684	ENSCAFP00000017309	K	670	R	478645	benign	cfa00280=Valine, leucine and isoleucine degradation.cfa01100=Metabolic pathways
-Contig188_chr25_47927372_47928085	557	chr25	47927941	ENSCAFT00000018758	ENSCAFP00000017379	K	228	R	486167	benign	cfa00561=Glycerolipid metabolism.cfa00564=Glycerophospholipid metabolism.cfa01100=Metabolic pathways.cfa04070=Phosphatidylinositol signaling system
-Contig36_chr37_12924359_12924740	86	chr37	12924449	ENSCAFT00000018786	ENSCAFP00000017406	D	187	Y	608849	probably damaging	cfa00280=Valine, leucine and isoleucine degradation.cfa00350=Tyrosine metabolism.cfa00380=Tryptophan metabolism.cfa00750=Vitamin B6 metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways
-Contig3_chr34_19471626_19472377	337	chr34	19471956	ENSCAFT00000018788	ENSCAFP00000017408	R	239	Q	488096	possibly damaging	N
-Contig80_chr4_11155760_11156827	952	chr4	11156735	ENSCAFT00000018796	ENSCAFP00000017416	S	661	N	479204	benign	cfa00564=Glycerophospholipid metabolism.cfa04146=Peroxisome
-Contig56_chr8_7093747_7095987	683	chr8	7094428	ENSCAFT00000018813	ENSCAFP00000017431	P	126	R	490620	unknown	N
-Contig82_chr8_7111986_7114065	1351	chr8	7113329	ENSCAFT00000018871	ENSCAFP00000017488	R	608	H	480255	probably damaging	cfa00010=Glycolysis / Gluconeogenesis.cfa00020=Citrate cycle (TCA cycle).cfa00620=Pyruvate metabolism.cfa01100=Metabolic pathways.cfa03320=PPAR signaling pathway.cfa04910=Insulin signaling pathway.cfa04920=Adipocytokine signaling pathway.cfa04964=Proximal tubule bicarbonate reclamation
-Contig172_chr33_28585454_28586084	228	chr33	28585687	ENSCAFT00000018884	ENSCAFP00000017500	R	36	K	478584	benign	N
-Contig113_chr5_16682954_16684491	688	chr5	16683641	ENSCAFT00000018997	ENSCAFP00000017606	F	41	L	489360	benign	N
-Contig36_chr28_31449413_31452160	2111	chr28	31451506	ENSCAFT00000019041	ENSCAFP00000017650	P	252	H	477834	benign	cfa04144=Endocytosis
-Contig80_chr3_55628026_55628800	392	chr3	55628403	ENSCAFT00000019070	ENSCAFP00000017677	R	805	K	403913	benign	cfa00480=Glutathione metabolism.cfa01100=Metabolic pathways.cfa04614=Renin-angiotensin system.cfa04640=Hematopoietic cell lineage
-Contig99_chr7_11816365_11819255	806	chr7	11817201	ENSCAFT00000019101	ENSCAFP00000017707	C	305	G	490276	benign	N
-Contig114_chr4_12744102_12745318	148	chr4	12744256	ENSCAFT00000019279	ENSCAFP00000017880	I	700	V	U	benign	N
-Contig82_chr7_13056757_13058281	974	chr7	13057742	ENSCAFT00000019316	ENSCAFP00000017915	S	283	N	609933	benign	cfa00564=Glycerophospholipid metabolism
-Contig280_chr25_51367477_51367885	70	chr25	51367542	ENSCAFT00000019610	ENSCAFP00000018191	S	97	L	U	benign	N
-Contig35_chr20_43508791_43509352	460	chr20	43509254	ENSCAFT00000019627	ENSCAFP00000018204	V	77	A	608455	benign	cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease
-Contig36_chr20_43509362_43510980	1484	chr20	43510860	ENSCAFT00000019627	ENSCAFP00000018204	D	181	N	608455	benign	cfa00190=Oxidative phosphorylation.cfa01100=Metabolic pathways.cfa04260=Cardiac muscle contraction.cfa05010=Alzheimer's disease.cfa05012=Parkinson's disease.cfa05016=Huntington's disease
-Contig59_chr25_51807653_51809044	1064	chr25	51808739	ENSCAFT00000019760	ENSCAFP00000018330	R	235	K	U	benign	N
-Contig96_chr36_17712997_17714068	556	chr36	17713559	ENSCAFT00000019807	ENSCAFP00000018374	T	423	I	478789	benign	N
-Contig163_chr28_34927368_34929275	1128	chr28	34928486	ENSCAFT00000019866	ENSCAFP00000018425	A	2659	T	477850	benign	N
-Contig74_chr33_31230250_31230874	246	chr33	31230493	ENSCAFT00000019938	ENSCAFP00000018492	G	113	S	488016	probably damaging	N
-Contig130_chr7_15553315_15558308	3186	chr7	15556497	ENSCAFT00000020009	ENSCAFP00000018561	K	1513	N	U	benign	N
-Contig160_chr2_76816412_76817166	354	chr2	76816779	ENSCAFT00000020143	ENSCAFP00000018683	I	190	V	478173	benign	N
-Contig219_chr33_31871568_31871771	81	chr33	31871646	ENSCAFT00000020195	ENSCAFP00000018733	N	346	H	U	probably damaging	N
-Contig254_chr24_50001599_50001992	151	chr24	50001767	ENSCAFT00000020266	ENSCAFP00000018803	R	239	Q	U	benign	N
-Contig40_chr37_15283702_15285945	1908	chr37	15285621	ENSCAFT00000020408	ENSCAFP00000018937	A	809	G	U	unknown	N
-Contig59_chr20_43702094_43703358	450	chr20	43702540	ENSCAFT00000020438	ENSCAFP00000018965	S	217	A	U	benign	N
-Contig75_chr3_57465650_57466327	377	chr3	57466017	ENSCAFT00000020863	ENSCAFP00000019371	L	205	F	609716	probably damaging	N
-Contig155_chr2_79195879_79199423	2014	chr2	79197892	ENSCAFT00000021154	ENSCAFP00000019645	G	549	S	U	benign	N
-Contig155_chr2_79195879_79199423	3136	chr2	79199014	ENSCAFT00000021154	ENSCAFP00000019645	R	923	C	U	probably damaging	N
-Contig59_chr5_19784971_19787384	1310	chr5	19786293	ENSCAFT00000021222	ENSCAFP00000019707	V	171	I	479428.489393	benign	cfa03320=PPAR signaling pathway
-Contig41_chr30_14304605_14305465	206	chr30	14304816	ENSCAFT00000021612	ENSCAFP00000020069	A	157	G	U	benign	N
-Contig47_chr20_45043804_45044476	317	chr20	45044117	ENSCAFT00000021659	ENSCAFP00000020114	V	281	I	609323	benign	N
-Contig46_chr4_22849549_22849829	123	chr4	22849673	ENSCAFT00000021752	ENSCAFP00000020204	V	646	M	U	probably damaging	N
-Contig141_chr7_22360980_22361690	242	chr7	22361233	ENSCAFT00000021777	ENSCAFP00000020227	K	1862	R	U	unknown	N
-Contig59_chr30_14758622_14760653	1186	chr30	14759817	ENSCAFT00000021792	ENSCAFP00000020241	S	284	R	609256	benign	N
-Contig57_chr27_39696388_39698349	1026	chr27	39697428	ENSCAFT00000021846	ENSCAFP00000020293	Q	588	R	477699	benign	cfa04610=Complement and coagulation cascades
-Contig83_chr27_40151814_40153141	738	chr27	40152551	ENSCAFT00000022064	ENSCAFP00000020490	S	191	R	477702	benign	N
-Contig105_chr6_11901733_11904968	406	chr6	11902145	ENSCAFT00000022289	ENSCAFP00000020701	Y	55	H	479732	probably damaging	cfa04621=NOD-like receptor signaling pathway
-Contig43_chr36_25298890_25299602	235	chr36	25299132	ENSCAFT00000022319	ENSCAFP00000020728	E	11731	K	610299.610339	unknown	N
-Contig3_chr36_25193150_25202641	2802	chr36	25195983	ENSCAFT00000022319	ENSCAFP00000020728	I	30137	V	610299.610339	benign	N
-Contig585_chr3_61201332_61201904	139	chr3	61201468	ENSCAFT00000022529	ENSCAFP00000020918	L	97	V	479067	benign	cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway
-Contig1_chr20_46714929_46715937	434	chr20	46715327	ENSCAFT00000022571	ENSCAFP00000020958	A	18	P	484804	unknown	N
-Contig7_chr8_29376780_29378260	158	chr8	29376937	ENSCAFT00000022576	ENSCAFP00000020962	T	852	A	490678	benign	N
-Contig74_chr8_29656170_29657212	595	chr8	29656776	ENSCAFT00000022697	ENSCAFP00000021080	E	974	K	490682	possibly damaging	cfa04010=MAPK signaling pathway.cfa04012=ErbB signaling pathway.cfa04062=Chemokine signaling pathway.cfa04320=Dorso-ventral axis formation.cfa04510=Focal adhesion.cfa04540=Gap junction.cfa04630=Jak-STAT signaling pathway.cfa04650=Natural killer cell mediated cytotoxicity.cfa04660=T cell receptor signaling pathway.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04722=Neurotrophin signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa04912=GnRH signaling pathway.cfa05160=Hepatitis C.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma.cfa05213=Endometrial cancer.cfa05214=Glioma.cfa05215=Prostate cancer.cfa05220=Chronic myeloid leukemia.cfa05221=Acute myeloid leukemia.cfa05223=Non-small cell lung cancer
-Contig45_chr4_25273541_25274402	608	chr4	25274121	ENSCAFT00000022760	ENSCAFP00000021140	S	30	F	479239	probably damaging	cfa04360=Axon guidance
-Contig96_chr37_26111249_26111450	108	chr37	26111364	ENSCAFT00000022884	ENSCAFP00000021256	K	260	R	478902	benign	cfa03450=Non-homologous end-joining
-Contig196_chr3_62434637_62435063	203	chr3	62434823	ENSCAFT00000022915	ENSCAFP00000021284	L	174	P	488785	benign	N
-Contig15_chr6_12238116_12239737	1287	chr6	12239420	ENSCAFT00000022961	ENSCAFP00000021328	E	165	K	479735	benign	N
-Contig175_chr5_27267391_27267870	57	chr5	27267451	ENSCAFT00000023032	ENSCAFP00000021395	N	1094	S	479450	benign	cfa04110=Cell cycle.cfa04115=p53 signaling pathway.cfa04210=Apoptosis.cfa05166=HTLV-I infection
-Contig110_chr20_47192181_47193618	93	chr20	47192262	ENSCAFT00000023054	ENSCAFP00000021407	A	308	P	484814	probably damaging	N
-Contig9_chr4_26730063_26730585	245	chr4	26730316	ENSCAFT00000023087	ENSCAFP00000021437	E	153	D	489044	benign	N
-Contig1_chr34_35420831_35421658	73	chr34	35420908	ENSCAFT00000023111	ENSCAFP00000021457	V	251	I	488144	benign	N
-Contig199_chr2_79696091_79697603	751	chr2	79696840	ENSCAFT00000023253	ENSCAFP00000021593	D	54	A	U	possibly damaging	N
-Contig146_chrX_38946913_38947473	307	chrX	38947225	ENSCAFT00000023268	ENSCAFP00000021608	L	160	V	612457	possibly damaging	N
-Contig63_chr9_23532151_23533554	1297	chr9	23533421	ENSCAFT00000023438	ENSCAFP00000021767	Q	279	R	490958	benign	N
-Contig89_chr5_32060784_32061151	293	chr5	32061079	ENSCAFT00000023913	ENSCAFP00000022199	W	106	*	489430	probably damaging	N
-Contig15_chr3_65640843_65642155	1100	chr3	65641942	ENSCAFT00000023933	ENSCAFP00000022218	V	383	A	479080	benign	N
-Contig49_chr26_33571748_33572620	689	chr26	33572452	ENSCAFT00000024062	ENSCAFP00000022339	R	478	W	486440	benign	N
-Contig96_chr20_48055741_48057197	524	chr20	48056259	ENSCAFT00000024100	ENSCAFP00000022374	R	172	Q	U	benign	N
-Contig104_chr20_48062263_48062546	210	chr20	48062492	ENSCAFT00000024100	ENSCAFP00000022374	V	775	G	U	probably damaging	N
-Contig33_chr37_28794567_28796956	2144	chr37	28796718	ENSCAFT00000024137	ENSCAFP00000022408	E	279	Q	488536	benign	N
-Contig24_chr7_32005266_32005660	212	chr7	32005479	ENSCAFT00000024154	ENSCAFP00000022424	T	92	M	U	probably damaging	N
-Contig174_chr18_56896461_56897594	274	chr18	56896734	ENSCAFT00000024637	ENSCAFP00000022858	V	157	L	483779	benign	cfa04130=SNARE interactions in vesicular transport
-Contig55_chr20_48811642_48812027	299	chr20	48811941	ENSCAFT00000024761	ENSCAFP00000022970	H	993	R	476678	benign	N
-Contig220_chr18_56925351_56927006	920	chr18	56926246	ENSCAFT00000024787	ENSCAFP00000022995	P	420	Q	476051	possibly damaging	cfa03022=Basal transcription factors.cfa05168=Herpes simplex infection
-Contig12_chr8_39044824_39045409	359	chr8	39045181	ENSCAFT00000024804	ENSCAFP00000023011	I	280	T	612894	possibly damaging	N
-Contig23_chr3_72567678_72570858	1313	chr3	72568976	ENSCAFT00000024846	ENSCAFP00000023051	L	298	P	488826	benign	N
-Contig190_chr7_35896301_35896811	232	chr7	35896528	ENSCAFT00000024892	ENSCAFP00000023095	R	3	L	480092	unknown	cfa00020=Citrate cycle (TCA cycle).cfa01100=Metabolic pathways.cfa05200=Pathways in cancer.cfa05211=Renal cell carcinoma
-Contig119_chr20_49114009_49114654	266	chr20	49114270	ENSCAFT00000024934	ENSCAFP00000023135	F	339	L	484849	benign	N
-Contig47_chr8_41487304_41487682	210	chr8	41487515	ENSCAFT00000025088	ENSCAFP00000023286	S	1743	L	490729	possibly damaging	N
-Contig67_chr2_84099157_84100880	345	chr2	84099493	ENSCAFT00000025109	ENSCAFP00000023307	I	60	L	U	benign	N
-Contig33_chr20_49727730_49730958	2192	chr20	49729935	ENSCAFT00000025308	ENSCAFP00000023495	T	448	R	U	probably damaging	N
-Contig33_chr20_49727730_49730958	2907	chr20	49730606	ENSCAFT00000025308	ENSCAFP00000023495	W	493	L	U	benign	N
-Contig93_chr8_42181027_42183022	694	chr8	42181716	ENSCAFT00000025462	ENSCAFP00000023641	L	782	P	U	benign	N
-Contig131_chrX_44937490_44940040	950	chrX	44938456	ENSCAFT00000025663	ENSCAFP00000023835	V	120	M	491894	benign	N
-Contig100_chrX_44915404_44918232	1832	chrX	44917224	ENSCAFT00000025663	ENSCAFP00000023835	R	1212	Q	491894	benign	N
-Contig100_chrX_44915404_44918232	920	chrX	44916331	ENSCAFT00000025663	ENSCAFP00000023835	L	1377	V	491894	benign	N
-Contig123_chr9_26132942_26133532	310	chr9	26133253	ENSCAFT00000025948	ENSCAFP00000024090	I	232	V	491022	benign	N
-Contig34_chr6_17772839_17773548	489	chr6	17773329	ENSCAFT00000026008	ENSCAFP00000024146	E	377	Q	U	benign	N
-Contig382_chr7_43383655_43383893	190	chr7	43383854	ENSCAFT00000026053	ENSCAFP00000024188	R	123	C	U	possibly damaging	N
-Contig163_chr2_87404548_87404792	132	chr2	87404673	ENSCAFT00000026251	ENSCAFP00000024378	D	239	N	U	benign	N
-Contig15_chr3_91850893_91851323	75	chr3	91850967	ENSCAFT00000026343	ENSCAFP00000024465	S	722	N	595148	benign	cfa04360=Axon guidance
-Contig141_chr7_44385686_44386047	166	chr7	44385857	ENSCAFT00000026393	ENSCAFP00000024510	L	166	P	490412	benign	cfa04810=Regulation of actin cytoskeleton
-Contig161_chr2_87840986_87841705	540	chr2	87841516	ENSCAFT00000026485	ENSCAFP00000024598	F	678	C	478233	probably damaging	cfa03018=RNA degradation
-Contig177_chr9_27497479_27498192	354	chr9	27497831	ENSCAFT00000026613	ENSCAFP00000024719	A	175	V	491046	possibly damaging	N
-Contig162_chr6_20156115_20157725	81	chr6	20156197	ENSCAFT00000026687	ENSCAFP00000024793	T	702	M	489923.489924.607168	benign	N
-Contig8_chr9_28287278_28288276	469	chr9	28287755	ENSCAFT00000026707	ENSCAFP00000024813	A	75	P	491060	benign	N
-Contig166_chr7_45276673_45277595	235	chr7	45276916	ENSCAFT00000026881	ENSCAFP00000024984	V	525	I	490428	benign	N
-Contig16_chr8_51223078_51223662	481	chr8	51223563	ENSCAFT00000026967	ENSCAFP00000025070	R	869	Q	490790	benign	N
-Contig65_chr9_29792446_29793465	893	chr9	29793341	ENSCAFT00000027073	ENSCAFP00000025173	S	81	A	491082	benign	N
-Contig175_chr6_30926774_30927470	446	chr6	30927229	ENSCAFT00000027269	ENSCAFP00000025361	S	663	T	403453	benign	cfa02010=ABC transporters.cfa04977=Vitamin digestion and absorption
-Contig45_chr30_33024389_33025619	471	chr30	33024857	ENSCAFT00000027320	ENSCAFP00000025407	G	986	A	487608	benign	N
-Contig60_chr20_53087461_53088013	184	chr20	53087649	ENSCAFT00000027519	ENSCAFP00000025591	S	556	L	611163	benign	N
-Contig98_chr5_37073086_37073674	378	chr5	37073467	ENSCAFT00000027596	ENSCAFP00000025664	V	38	M	479499	probably damaging	cfa04130=SNARE interactions in vesicular transport
-Contig64_chr9_36235086_36235751	475	chr9	36235563	ENSCAFT00000027673	ENSCAFP00000025737	D	260	E	491111	benign	cfa04970=Salivary secretion
-Contig72_chr30_35330469_35330831	236	chr30	35330709	ENSCAFT00000027712	ENSCAFP00000025770	G	386	C	478353	probably damaging	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy
-Contig12_chr8_66066327_66066629	89	chr8	66066402	ENSCAFT00000027927	ENSCAFP00000025970	K	158	R	490836	benign	N
-Contig212_chr8_66173086_66174259	622	chr8	66173712	ENSCAFT00000027950	ENSCAFP00000025993	K	114	Q	480421	benign	N
-Contig176_chr7_48083671_48084458	311	chr7	48083983	ENSCAFT00000027972	ENSCAFP00000026015	R	128	H	480148	probably damaging	N
-Contig3_chr4_58820541_58821952	265	chr4	58820806	ENSCAFT00000027979	ENSCAFP00000026022	A	31	T	489166	benign	N
-Contig24_chr7_48238665_48239174	383	chr7	48239049	ENSCAFT00000028007	ENSCAFP00000026049	T	227	M	480151	probably damaging	N
-Contig25_chr6_26340448_26341519	657	chr6	26341104	ENSCAFT00000028115	ENSCAFP00000026155	S	128	L	479811	possibly damaging	N
-Contig212_chr5_38871122_38871621	302	chr5	38871429	ENSCAFT00000028231	ENSCAFP00000026253	P	2265	L	489507	benign	N
-Contig147_chr6_27310627_27310983	100	chr6	27310719	ENSCAFT00000028327	ENSCAFP00000026344	V	154	A	U	benign	N
-Contig160_chr6_27318582_27318861	67	chr6	27318647	ENSCAFT00000028327	ENSCAFP00000026344	K	325	R	U	benign	N
-Contig18_chr4_61023435_61026038	385	chr4	61023825	ENSCAFT00000028363	ENSCAFP00000026377	P	4110	L	479323	benign	N
-Contig162_chr30_40685605_40687049	343	chr30	40685956	ENSCAFT00000028463	ENSCAFP00000026472	A	416	P	487646	benign	N
-Contig68_chr20_54017481_54018354	221	chr20	54017705	ENSCAFT00000028500	ENSCAFP00000026509	W	539	R	U	benign	N
-Contig50_chr7_59076761_59079381	2353	chr7	59079104	ENSCAFT00000028551	ENSCAFP00000026557	V	1487	I	490492	benign	N
-Contig51_chr7_59079274_59084588	2611	chr7	59081905	ENSCAFT00000028551	ENSCAFP00000026557	A	575	V	490492	benign	N
-Contig3_chr20_54855789_54856135	37	chr20	54855833	ENSCAFT00000028813	ENSCAFP00000026796	F	6015	S	U	unknown	N
-Contig157_chr5_43472186_43472528	168	chr5	43472353	ENSCAFT00000028826	ENSCAFP00000026807	R	355	Q	489526	benign	cfa00010=Glycolysis / Gluconeogenesis.cfa00340=Histidine metabolism.cfa00350=Tyrosine metabolism.cfa00360=Phenylalanine metabolism.cfa00410=beta-Alanine metabolism.cfa00980=Metabolism of xenobiotics by cytochrome P450.cfa00982=Drug metabolism - cytochrome P450.cfa01100=Metabolic pathways
-Contig80_chr20_55281094_55281971	129	chr20	55281228	ENSCAFT00000028936	ENSCAFP00000026914	T	931	A	U	benign	N
-Contig214_chr8_74493164_74493474	188	chr8	74493346	ENSCAFT00000029054	ENSCAFP00000027017	R	94	C	U	probably damaging	N
-Contig259_chr20_55571618_55572503	186	chr20	55571803	ENSCAFT00000029100	ENSCAFP00000027059	K	526	Q	485001	benign	N
-Contig180_chr9_41668066_41668716	357	chr9	41668451	ENSCAFT00000029122	ENSCAFP00000027081	E	990	D	491145	benign	cfa03410=Base excision repair
-Contig61_chr4_63087183_63089623	491	chr4	63087672	ENSCAFT00000029130	ENSCAFP00000027089	A	20	S	U	benign	N
-Contig261_chrX_94412915_94414298	488	chrX	94413396	ENSCAFT00000029188	ENSCAFP00000027142	D	329	E	U	unknown	N
-Contig58_chr4_70221679_70223505	1749	chr4	70223432	ENSCAFT00000029501	ENSCAFP00000027423	T	324	S	403721	benign	cfa04060=Cytokine-cytokine receptor interaction.cfa04080=Neuroactive ligand-receptor interaction.cfa04630=Jak-STAT signaling pathway
-Contig21_chr7_77985141_77986170	827	chr7	77985962	ENSCAFT00000029651	ENSCAFP00000027557	A	855	S	490545	benign	N
-Contig93_chrX_104176429_104177974	811	chrX	104177246	ENSCAFT00000029709	ENSCAFP00000027610	T	719	M	492128	benign	cfa03008=Ribosome biogenesis in eukaryotes
-Contig175_chr9_46116277_46118268	1090	chr9	46117366	ENSCAFT00000029722	ENSCAFP00000027622	Q	693	H	U	benign	N
-Contig134_chr4_76495667_76496825	860	chr4	76496507	ENSCAFT00000029827	ENSCAFP00000027720	I	113	V	612589	benign	cfa00250=Alanine, aspartate and glutamate metabolism.cfa00260=Glycine, serine and threonine metabolism.cfa01100=Metabolic pathways
-Contig247_chr6_31967574_31967796	158	chr6	31967732	ENSCAFT00000029875	ENSCAFP00000027765	P	750	T	489999	benign	N
-Contig6_chr7_81650872_81657348	3786	chr7	81654636	ENSCAFT00000030050	ENSCAFP00000027927	S	501	C	480218	benign	N
-Contig122_chr5_57147596_57148457	360	chr5	57147964	ENSCAFT00000030140	ENSCAFP00000028007	T	713	I	479558	benign	N
-Contig83_chr20_58039274_58039724	380	chr20	58039649	ENSCAFT00000030192	ENSCAFP00000028056	E	142	K	611866	benign	N
-Contig42_chr5_58023274_58024296	585	chr5	58023845	ENSCAFT00000030282	ENSCAFP00000028135	V	415	A	489580	benign	N
-Contig248_chr20_58217741_58219717	751	chr20	58218495	ENSCAFT00000030285	ENSCAFP00000028138	G	278	S	485038	unknown	N
-Contig127_chr6_39501489_39501966	83	chr6	39501576	ENSCAFT00000030381	ENSCAFP00000028228	N	155	S	490020	benign	N
-Contig123_chr6_39499974_39501056	816	chr6	39500798	ENSCAFT00000030381	ENSCAFP00000028228	A	195	P	490020	benign	N
-Contig247_chr6_39576694_39577607	493	chr6	39577171	ENSCAFT00000030386	ENSCAFP00000028233	S	745	N	490021	benign	N
-Contig6_chr9_50725202_50725646	143	chr9	50725344	ENSCAFT00000030726	ENSCAFP00000028560	M	12	T	491218	benign	N
-Contig221_chr6_41879771_41881379	766	chr6	41880519	ENSCAFT00000030883	ENSCAFP00000028717	A	184	T	606755	benign	N
-Contig231_chr5_60474911_60475630	279	chr5	60475186	ENSCAFT00000030960	ENSCAFP00000028794	C	505	Y	489618	possibly damaging	N
-Contig99_chr5_63306202_63308496	2063	chr5	63308224	ENSCAFT00000031146	ENSCAFP00000028978	A	421	V	U	unknown	N
-Contig245_chr5_66149146_66149848	349	chr5	66149499	ENSCAFT00000031407	ENSCAFP00000029234	R	207	Q	479601	benign	cfa00760=Nicotinate and nicotinamide metabolism.cfa01100=Metabolic pathways
-Contig305_chr5_67253589_67254394	375	chr5	67253954	ENSCAFT00000031570	ENSCAFP00000029391	R	203	Q	U	possibly damaging	N
-Contig94_chr9_56873843_56875505	1578	chr9	56875408	ENSCAFT00000031743	ENSCAFP00000029555	P	2937	S	U	benign	N
-Contig107_chr5_71317862_71318113	71	chr5	71317944	ENSCAFT00000031781	ENSCAFP00000029590	M	281	V	U	benign	N
-Contig134_chr9_57426140_57427208	236	chr9	57426380	ENSCAFT00000031798	ENSCAFP00000029606	V	89	I	480698	benign	cfa00590=Arachidonic acid metabolism.cfa01100=Metabolic pathways
-Contig60_chr12_5631507_5632392	818	chr12	5632313	ENSCAFT00000031814	ENSCAFP00000029621	Y	1697	C	481734	unknown	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04974=Protein digestion and absorption.cfa05146=Amoebiasis
-Contig132_chr5_73710776_73711271	149	chr5	73710927	ENSCAFT00000031848	ENSCAFP00000029653	T	1323	M	489696	probably damaging	N
-Contig39_chr9_59278364_59279024	398	chr9	59278757	ENSCAFT00000032068	ENSCAFP00000029863	A	957	T	480718	benign	N
-Contig177_chr9_61212763_61213621	700	chr9	61213430	ENSCAFT00000032171	ENSCAFP00000029958	D	79	N	U	benign	N
-Contig67_chr6_62507717_62510152	1055	chr6	62508787	ENSCAFT00000032186	ENSCAFP00000029972	I	212	M	479959	probably damaging	cfa00380=Tryptophan metabolism.cfa00450=Selenocompound metabolism.cfa01100=Metabolic pathways
-Contig66_chr6_64570039_64570630	325	chr6	64570365	ENSCAFT00000032239	ENSCAFP00000030024	A	862	G	479964	benign	cfa04740=Olfactory transduction.cfa04972=Pancreatic secretion
-Contig22_chr6_64809414_64810661	626	chr6	64810027	ENSCAFT00000032269	ENSCAFP00000030052	Q	559	K	490179	benign	N
-Contig50_chr5_85052459_85052865	55	chr5	85052515	ENSCAFT00000032431	ENSCAFP00000030201	S	32	G	479688	benign	N
-Contig25_chr5_85095840_85098495	1627	chr5	85097474	ENSCAFT00000032433	ENSCAFP00000030203	F	681	S	U	benign	N
-Contig25_chr5_85480673_85480982	186	chr5	85480860	ENSCAFT00000032493	ENSCAFP00000030260	A	180	T	610026	possibly damaging	cfa05010=Alzheimer's disease
-Contig19_chr5_24601128_24602241	685	chr5	24601813	ENSCAFT00000035141	ENSCAFP00000030364	T	695	S	U	benign	N
-Contig59_chr26_11519273_11520242	659	chr26	11519937	ENSCAFT00000035276	ENSCAFP00000030520	P	160	L	403557	probably damaging	cfa03015=mRNA surveillance pathway.cfa04114=Oocyte meiosis.cfa04270=Vascular smooth muscle contraction.cfa04510=Focal adhesion.cfa04720=Long-term potentiation.cfa04728=Dopaminergic synapse.cfa04810=Regulation of actin cytoskeleton.cfa04910=Insulin signaling pathway.cfa05168=Herpes simplex infection
-Contig27_chr12_23130802_23131771	353	chr12	23131154	ENSCAFT00000035307	ENSCAFP00000030552	V	565	M	474935	probably damaging	N
-Contig31_chr1_8052327_8053606	234	chr1	8052570	ENSCAFT00000035442	ENSCAFP00000030703	C	153	S	U	possibly damaging	N
-Contig59_chr20_40539078_40540678	1223	chr20	40540302	ENSCAFT00000035532	ENSCAFP00000030804	H	285	R	403502	benign	cfa04620=Toll-like receptor signaling pathway.cfa05142=Chagas disease (American trypanosomiasis).cfa05143=African trypanosomiasis.cfa05144=Malaria.cfa05152=Tuberculosis.cfa05162=Measles.cfa05168=Herpes simplex infection
-Contig152_chr6_25356961_25358151	701	chr6	25357665	ENSCAFT00000035750	ENSCAFP00000031044	P	479	S	608555	benign	cfa04142=Lysosome
-Contig18_chr9_58576258_58576773	215	chr9	58576474	ENSCAFT00000035914	ENSCAFP00000031224	K	118	E	480706	benign	N
-Contig8_chr15_38734005_38734403	242	chr15	38734244	ENSCAFT00000035916	ENSCAFP00000031226	A	237	V	611996	possibly damaging	N
-Contig76_chr3_30625909_30626247	159	chr3	30626069	ENSCAFT00000036198	ENSCAFP00000031549	T	135	S	479171	benign	cfa00260=Glycine, serine and threonine metabolism.cfa00270=Cysteine and methionine metabolism.cfa01100=Metabolic pathways
-Contig86_chr37_14528768_14530343	873	chr37	14529628	ENSCAFT00000036570	ENSCAFP00000031969	V	738	D	478875.609202	possibly damaging	cfa04060=Cytokine-cytokine receptor interaction.cfa04350=TGF-beta signaling pathway
-Contig9_chr5_54124181_54125739	1134	chr5	54125291	ENSCAFT00000036640	ENSCAFP00000032043	A	187	T	610286	benign	N
-Contig107_chr9_8990420_8991676	1178	chr9	8991591	ENSCAFT00000036774	ENSCAFP00000032186	T	55	M	483288	benign	N
-Contig47_chr12_20319418_20320775	1212	chr12	20320622	ENSCAFT00000036825	ENSCAFP00000032241	K	606	T	474930	benign	cfa00280=Valine, leucine and isoleucine degradation.cfa00630=Glyoxylate and dicarboxylate metabolism.cfa00640=Propanoate metabolism.cfa01100=Metabolic pathways
-Contig4_chr2_45195542_45196115	233	chr2	45195785	ENSCAFT00000037022	ENSCAFP00000032463	D	833	N	478055	possibly damaging	N
-Contig8_chr8_77227029_77227651	339	chr8	77227366	ENSCAFT00000037096	ENSCAFP00000032544	T	61	A	490895.612602	benign	cfa04020=Calcium signaling pathway.cfa04145=Phagosome.cfa04640=Hematopoietic cell lineage.cfa04650=Natural killer cell mediated cytotoxicity.cfa04662=B cell receptor signaling pathway.cfa04664=Fc epsilon RI signaling pathway.cfa04666=Fc gamma R-mediated phagocytosis.cfa04672=Intestinal immune network for IgA production.cfa05140=Leishmaniasis.cfa05143=African trypanosomiasis.cfa05146=Amoebiasis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05162=Measles.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05340=Primary immunodeficiency.cfa05414=Dilated cardiomyopathy.cfa05416=Viral myocarditis
-Contig2_chr7_60049092_60051693	266	chr7	60049361	ENSCAFT00000038176	ENSCAFP00000033857	T	195	M	U	probably damaging	N
-Contig31_chr30_24179816_24187402	4867	chr30	24184686	ENSCAFT00000038211	ENSCAFP00000033897	G	103	S	U	benign	N
-Contig9_chr27_48250956_48251793	192	chr27	48251161	ENSCAFT00000038256	ENSCAFP00000033944	T	166	M	477739	probably damaging	N
-Contig45_chr27_43537046_43537944	568	chr27	43537599	ENSCAFT00000038301	ENSCAFP00000033996	M	69	I	611773	benign	cfa04010=MAPK signaling pathway.cfa04810=Regulation of actin cytoskeleton.cfa05200=Pathways in cancer.cfa05218=Melanoma
-Contig133_chr18_28371600_28372547	83	chr18	28371695	ENSCAFT00000038383	ENSCAFP00000034090	L	102	Q	475933	probably damaging	N
-Contig11_chr28_8532951_8533892	511	chr28	8533462	ENSCAFT00000038937	ENSCAFP00000034728	R	19	C	477763	probably damaging	cfa03008=Ribosome biogenesis in eukaryotes.cfa03013=RNA transport
-Contig1_chr14_5733966_5735336	783	chr14	5734754	ENSCAFT00000039094	ENSCAFP00000034905	A	166	T	U	benign	N
-Contig48_chr27_6001075_6001818	392	chr27	6001478	ENSCAFT00000039109	ENSCAFP00000034919	R	103	H	U	probably damaging	N
-Contig40_chr11_43589173_43590288	973	chr11	43590138	ENSCAFT00000039148	ENSCAFP00000034962	R	1617	P	481557	benign	N
-Contig1_chr14_30424688_30425258	179	chr14	30424861	ENSCAFT00000039390	ENSCAFP00000035239	T	648	I	475245	benign	cfa04666=Fc gamma R-mediated phagocytosis.cfa04810=Regulation of actin cytoskeleton
-Contig58_chr8_7461111_7462065	323	chr8	7461423	ENSCAFT00000039451	ENSCAFP00000035309	L	112	F	U	benign	N
-Contig1_chr25_43094809_43095852	908	chr25	43095708	ENSCAFT00000039609	ENSCAFP00000035483	W	18	G	U	unknown	N
-Contig114_chr25_43076436_43076800	141	chr25	43076581	ENSCAFT00000039609	ENSCAFP00000035483	S	45	C	U	unknown	N
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_in/sample.gd_snp
--- a/test-data/test_in/sample.gd_snp	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,402 +0,0 @@
-#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
-#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
-Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0
-Contig48_chr1_10150253_10151311	11	A	G	94.3	chr1	10150264	A	1	0	2	30	1	0	2	30	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	Y	22	+99.	0
-Contig20_chr1_21313469_21313570	66	C	T	54.0	chr1	21313534	C	4	0	2	39	4	0	2	39	5	0	2	42	4	0	2	39	4	0	2	39	5	0	2	42	N	1	+99.	0
-Contig86_chr1_30984450_30985684	670	C	T	365.0	chr1	30985133	C	9	0	2	54	10	0	2	57	13	0	2	66	3	0	2	36	9	0	2	54	7	0	2	48	Y	145	0.031	0
-Contig5_chr1_32562160_32563940	1215	G	T	163.0	chr1	32563356	G	17	0	2	78	19	0	2	84	20	0	2	87	14	0	2	69	12	0	2	63	10	0	2	57	Y	17	0.251	0
-Contig110_chr1_33385093_33386888	510	C	T	270.0	chr1	33385587	A	14	0	2	69	11	0	2	60	19	0	2	84	11	0	2	60	10	0	2	57	13	0	2	66	Y	13	0.126	0
-Contig100_chr1_33562920_33564288	743	C	T	178.0	chr1	33563655	C	6	0	2	45	10	0	2	57	8	0	2	51	5	0	2	42	13	0	2	66	7	0	2	48	Y	13	0.090	3
-Contig7_chr1_37302355_37302489	97	A	G	59.2	chr1	37302452	G	3	0	2	36	8	0	2	51	5	0	2	42	8	0	2	51	7	0	2	48	6	0	2	45	N	56	2.812	0
-Contig62_chr1_41880715_41882180	1078	T	G	57.6	chr1	41881785	T	14	0	2	69	15	0	2	72	16	0	2	75	13	0	2	66	8	0	2	51	10	0	2	57	Y	21	0.477	0
-Contig47_chr1_48409178_48409384	37	C	T	134.0	chr1	48409215	T	5	0	2	42	6	0	2	45	8	0	2	51	9	0	2	54	4	0	2	39	6	0	2	45	N	66	+99.	0
-Contig119_chr1_49647683_49650077	1618	C	A	99.7	chr1	49649276	A	8	0	2	51	11	0	2	60	10	0	2	57	9	0	2	54	10	0	2	57	14	0	2	69	Y	16	0.166	0
-Contig21_chr1_60697952_60699446	307	G	A	51.9	chr1	60698265	G	12	0	2	63	9	0	2	54	4	0	2	39	6	0	2	45	9	0	2	54	4	0	2	39	Y	98	0.507	0
-Contig131_chr1_62319542_62320564	169	C	G	103.0	chr1	62319709	C	12	0	2	63	12	0	2	66	14	0	2	69	12	0	2	63	9	0	2	54	9	0	2	54	Y	73	0.307	1
-Contig14_chr1_63450425_63450680	101	T	A	102.0	chr1	63450530	T	8	0	2	51	10	0	2	57	18	0	2	81	8	0	2	51	8	0	2	34	8	0	2	51	N	99	1.085	0
-Contig83_chr1_63869778_63869942	40	T	C	23.7	chr1	63869819	C	5	0	2	42	7	0	2	48	2	0	2	33	4	0	2	39	6	0	2	48	4	0	2	39	N	654	1.364	0
-Contig30_chr1_64702572_64703138	178	A	T	117.0	chr1	64702750	T	10	0	2	57	10	0	2	57	20	0	2	87	21	0	2	90	6	0	2	45	12	0	2	63	Y	50	3.872	0
-Contig101_chr1_69868406_69868872	287	G	A	14.6	chr1	69868689	G	13	0	2	66	17	0	2	78	10	0	2	57	8	0	2	51	7	0	2	48	8	0	2	51	N	137	0.305	0
-Contig35_chr1_74482577_74482791	170	G	A	45.4	chr1	74482751	A	3	0	2	36	4	0	2	39	13	0	2	66	2	0	2	33	5	0	2	42	2	0	2	33	N	20	+99.	3
-Contig49_chr1_83865731_83865944	85	G	A	34.1	chr1	-1	N	4	0	2	39	4	0	2	39	8	0	2	51	2	0	2	33	5	0	2	42	4	0	2	39	N	-1	1.485	0
-Contig64_chr1_87343284_87345672	163	T	A	3.76	chr1	87343443	C	0	2	2	1	0	0	-1	0	5	0	2	42	2	0	2	33	0	1	2	14	0	0	-1	0	N	3	0.039	2
-Contig20_chr1_110679280_110679687	181	C	T	87.4	chr1	110679454	-	1	0	2	30	7	0	2	48	4	0	2	39	2	0	2	33	2	0	2	33	0	0	-1	0	N	31	0.660	2
-Contig129_chr1_117547123_117548666	926	G	A	126.0	chr1	117548059	G	19	0	2	84	9	0	2	54	11	0	2	60	10	0	2	57	12	0	2	63	11	0	2	60	Y	64	0.049	0
-Contig7_chr1_125154638_125154844	190	G	T	130.0	chr1	125154818	A	5	0	2	42	4	0	2	39	7	0	2	48	2	0	2	33	7	0	2	48	4	0	2	39	N	33	+99.	0
-Contig222_chr2_9817738_9818143	220	C	T	888.0	chr2	9817960	C	17	0	2	78	12	0	2	63	20	0	2	87	8	0	2	51	11	0	2	60	12	0	2	63	Y	76	0.093	1
-Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
-Contig10_chr2_40859744_40860534	637	G	A	888.0	chr2	40860397	A	3	0	2	36	3	0	2	36	2	0	2	33	7	0	2	48	6	0	2	45	8	0	2	51	Y	42	1.435	0
-Contig52_chr2_41421981_41422725	604	C	A	888.0	chr2	41422583	A	17	0	2	78	18	0	2	81	14	0	2	69	17	0	2	78	12	0	2	63	14	0	2	69	Y	44	0.882	0
-Contig94_chr2_43869105_43870358	220	G	A	888.0	chr2	43869333	G	12	0	2	63	18	0	2	81	11	0	2	60	15	0	2	72	12	0	2	63	13	0	2	66	Y	1	0.156	0
-Contig34_chr2_48444129_48444939	695	C	T	134.0	chr2	48444828	C	14	0	2	69	8	0	2	51	16	0	2	75	17	0	2	78	9	0	2	54	15	0	2	72	Y	161	0.375	0
-Contig6_chr2_56859179_56859956	671	T	C	999.9	chr2	56859851	T	15	0	2	72	18	0	2	81	20	0	2	90	19	0	2	84	19	0	2	84	24	0	2	99	N	28	5.308	1
-Contig115_chr2_61631913_61632510	310	G	T	999.3	chr2	61632216	G	7	0	2	48	9	0	2	54	7	0	2	48	11	0	2	60	10	0	2	57	10	0	2	57	N	13	0.184	0
-Contig31_chr2_67331584_67331785	39	C	T	999.0	chr2	67331623	C	11	0	2	60	10	0	2	57	7	0	2	48	9	0	2	54	2	0	2	33	4	0	2	39	N	110	0.647	1
-Contig92_chr2_75906683_75907774	773	T	C	85.4	chr2	75907438	C	12	0	2	63	12	0	2	63	17	0	2	78	8	0	2	51	8	0	2	51	13	0	2	66	Y	93	0.166	0
-Contig163_chr2_76402959_76404830	221	C	T	127.0	chr2	76403181	C	4	0	2	42	10	0	2	57	9	0	2	54	11	0	2	60	7	0	2	48	9	0	2	54	Y	54	0.178	1
-Contig177_chr2_79559305_79560033	168	C	T	5.67	chr2	79559476	A	2	0	2	33	3	0	2	36	1	0	2	30	2	0	2	33	0	0	-1	0	1	0	2	30	N	56	0.257	0
-Contig8_chr2_82945728_82945839	61	T	C	223.0	chr2	-1	N	2	0	2	33	4	0	2	39	9	0	2	54	3	0	2	36	5	0	2	42	0	0	-1	0	N	-1	+99.	1
-Contig59_chr2_85243022_85243758	506	G	A	96.3	chr2	85243509	T	9	0	2	54	11	0	2	60	12	0	2	63	14	0	2	69	10	0	2	57	7	0	2	48	Y	6	0.459	0
-Contig56_chr3_17326225_17327548	387	G	C	91.2	chr3	17326591	G	14	0	2	69	13	0	2	66	15	0	2	72	15	0	2	72	13	0	2	66	12	0	2	63	Y	20	0.225	3
-Contig108_chr3_46210055_46210874	367	A	G	21.0	chr3	46210423	A	19	0	2	84	10	0	2	57	16	0	2	75	14	0	2	69	20	0	2	87	11	0	2	60	N	236	0.028	1
-Contig16_chr3_47113407_47114449	322	G	A	105.0	chr3	47113713	G	13	0	2	66	17	0	2	78	15	0	2	72	6	0	2	45	11	0	2	60	11	0	2	60	Y	114	0.132	5
-Contig3_chr3_47564810_47565251	262	T	G	112.0	chr3	47565104	T	14	0	2	69	16	0	2	75	20	0	2	87	10	0	2	57	9	0	2	54	8	0	2	51	Y	24	0.073	1
-Contig35_chr3_49662401_49662929	270	A	T	96.1	chr3	49662652	A	14	0	2	69	11	0	2	60	23	0	2	96	13	0	2	66	12	0	2	63	11	0	2	60	Y	36	3.583	2
-Contig97_chr3_49820354_49821631	1069	G	A	44.1	chr3	49821402	G	9	0	2	54	9	0	2	54	6	0	2	45	10	0	2	57	5	0	2	42	8	0	2	51	N	6	0.201	2
-Contig1_chr3_51588422_51589409	926	A	G	51.0	chr3	51589353	G	2	0	2	33	2	0	2	33	6	0	2	45	4	0	2	39	9	0	2	54	11	0	2	60	N	21	1.147	0
-Contig25_chr3_53260697_53262560	402	G	A	211.0	chr3	53261095	G	17	0	2	78	14	0	2	69	15	0	2	75	12	0	2	63	14	0	2	69	12	0	2	63	Y	116	1.033	0
-Contig11_chr3_53992739_53995954	2392	G	A	82.4	chr3	53995143	A	12	0	2	66	11	0	2	60	14	0	2	69	6	0	2	45	11	0	2	60	17	0	2	78	Y	358	0.321	1
-Contig236_chr3_72676275_72676473	128	G	A	278.0	chr3	72676410	G	12	0	2	63	11	0	2	60	13	0	2	66	10	0	2	57	11	0	2	60	8	0	2	51	N	36	0.496	1
-Contig48_chr3_74792236_74792388	63	T	C	111.0	chr3	74792289	-	17	0	2	78	9	0	2	54	9	0	2	54	5	0	2	42	11	0	2	60	9	0	2	54	N	-1	3.528	0
-Contig65_chr3_80727952_80728283	39	T	C	71.2	chr3	80727990	T	7	0	2	48	3	0	2	36	8	0	2	51	6	0	2	45	8	0	2	51	11	0	2	60	N	22	7.078	0
-Contig53_chr3_86407941_86409349	1406	G	A	86.9	chr3	86409317	A	5	0	2	42	5	0	2	42	4	0	2	39	10	0	2	57	8	0	2	51	12	0	2	63	N	14	3.285	1
-Contig13_chr3_92409738_92412300	718	A	G	23.3	chr3	92410450	A	12	0	2	63	16	0	2	75	18	0	2	81	13	0	2	66	22	0	2	93	7	0	2	48	Y	23	0.224	2
-Contig134_chr4_12145648_12148225	1326	C	T	164.0	chr4	12146961	C	9	0	2	54	8	0	2	51	7	0	2	48	3	0	2	36	5	0	2	42	5	0	2	42	Y	4	0.080	1
-Contig88_chr4_15557471_15557833	268	A	G	145.0	chr4	15557737	A	6	0	2	45	6	0	2	45	11	0	2	60	9	0	2	54	5	0	2	42	6	0	2	45	Y	46	4.138	0
-Contig53_chr4_18823968_18824478	149	A	G	91.3	chr4	18824115	A	18	0	2	81	15	0	2	72	21	0	2	90	13	0	2	66	9	0	2	54	12	0	2	63	N	51	0.251	0
-Contig86_chr4_24953866_24956222	1985	C	T	76.4	chr4	24955841	T	8	0	2	51	1	0	2	30	3	0	2	36	7	0	2	48	2	0	2	33	6	0	2	45	Y	12	0.357	0
-Contig19_chr4_26233601_26233991	146	G	C	51.6	chr4	26233744	G	10	0	2	57	8	0	2	51	9	0	2	54	5	0	2	42	9	0	2	54	4	0	2	39	N	41	0.163	3
-Contig78_chr4_28579975_28580134	30	T	G	19.6	chr4	28579994	-	4	0	2	39	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	2	0	2	33	N	33	0.499	0
-Contig16_chr4_30177226_30179725	621	C	T	88.4	chr4	30177859	C	20	0	2	87	13	0	2	66	13	0	2	66	11	0	2	60	8	0	2	51	8	0	2	51	Y	45	0.797	1
-Contig30_chr4_46196500_46197672	1045	A	C	33.4	chr4	46197522	C	16	0	2	75	9	0	2	54	4	0	2	39	7	0	2	48	14	0	2	69	6	0	2	45	Y	43	0.306	0
-Contig2_chr4_47039007_47039323	158	G	C	35.1	chr4	47039160	-	8	0	2	51	9	0	2	54	13	0	2	66	8	0	2	51	10	0	2	60	9	0	2	54	N	0	0.131	0
-Contig17_chr4_61310346_61311158	267	C	T	49.9	chr4	61310604	T	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	14	0	2	69	7	0	2	48	Y	219	0.098	0
-Contig26_chr4_64190783_64191295	64	A	G	162.0	chr4	64190843	A	10	0	2	57	6	0	2	45	20	0	2	87	12	0	2	63	17	0	2	78	7	0	2	48	Y	306	7.428	0
-Contig11_chr4_65500960_65501654	634	T	C	107.0	chr4	65501585	T	13	0	2	66	14	0	2	69	13	0	2	66	13	0	2	66	6	0	2	45	18	0	2	81	Y	10	6.849	0
-Contig38_chr4_67768488_67768982	113	A	G	102.0	chr4	67768598	A	9	0	2	54	8	0	2	51	9	0	2	54	11	0	2	60	10	0	2	57	7	0	2	48	Y	188	3.175	0
-Contig30_chr4_70978564_70979580	596	A	G	164.0	chr4	70979151	A	15	0	2	72	12	0	2	63	20	0	2	87	14	0	2	69	15	0	2	72	15	0	2	72	Y	111	2.458	2
-Contig72_chr4_74225793_74226492	674	A	G	110.0	chr4	74226472	A	5	0	2	42	3	0	2	36	2	0	2	33	3	0	2	36	7	0	2	48	4	0	2	39	Y	115	+99.	1
-Contig32_chr4_75618955_75620254	301	T	C	333.0	chr4	75619257	C	10	0	2	57	8	0	2	51	12	0	2	63	20	0	2	87	12	0	2	63	14	0	2	69	Y	34	0.163	2
-Contig31_chr5_4734956_4736547	1166	C	T	133.0	chr5	4736132	C	14	0	2	69	8	0	2	51	17	0	2	78	4	0	2	39	9	0	2	54	12	0	2	63	Y	1	0.021	0
-Contig113_chr5_11052263_11052603	28	C	T	38.2	chr5	11052280	C	1	2	1	12	3	2	1	10	5	0	2	42	2	1	2	13	3	0	2	36	8	0	2	51	Y	161	+99.	0
-Contig30_chr5_15698241_15699076	396	G	T	76.6	chr5	15698633	T	8	0	2	51	9	0	2	54	10	0	2	57	7	0	2	48	11	0	2	60	8	0	2	54	Y	65	0.009	0
-Contig36_chr5_17709244_17710004	373	T	C	281.0	chr5	17709624	T	6	0	2	45	9	0	2	54	7	0	2	48	4	0	2	39	10	0	2	57	4	0	2	39	Y	16	0.131	0
-Contig13_chr5_21881138_21881562	227	A	G	251.0	chr5	21881356	A	11	0	2	60	20	0	2	87	22	0	2	93	10	0	2	57	10	0	2	57	21	0	2	90	Y	182	2.013	0
-Contig5_chr5_23188121_23190168	1841	C	T	141.0	chr5	23189975	C	20	0	2	87	19	0	2	84	22	0	2	93	16	0	2	75	18	0	2	81	14	0	2	69	N	45	0.355	0
-Contig6_chr5_26899813_26900498	97	A	C	88.6	chr5	26899910	A	15	0	2	72	14	0	2	69	27	0	2	108	15	0	2	72	13	0	2	69	12	0	2	63	Y	92	7.370	3
-Contig314_chr5_34019166_34019319	72	C	A	20.1	chr5	-1	N	6	0	2	45	9	0	2	54	4	0	2	39	4	0	2	39	9	0	2	54	5	0	2	42	N	-1	+99.	4
-Contig147_chr5_38980258_38980559	221	C	T	40.8	chr5	38980477	C	15	0	2	72	15	0	2	72	19	0	2	84	10	0	2	57	12	0	2	63	20	0	2	87	Y	11	4.576	0
-Contig115_chr5_48119079_48120169	151	C	T	78.3	chr5	48119234	C	17	0	2	78	10	0	2	57	14	0	2	69	16	0	2	75	8	0	2	51	12	0	2	63	Y	205	0.320	0
-Contig45_chr5_50892738_50892968	169	C	A	25.8	chr5	50892911	C	10	0	2	57	7	0	2	48	10	0	2	60	6	0	2	45	6	0	2	45	13	0	2	66	N	244	0.497	1
-Contig40_chr5_51484164_51484696	14	A	G	53.3	chr5	51484180	A	6	0	2	45	4	0	2	39	4	0	2	39	3	0	2	36	0	0	2	13	3	0	2	36	N	63	+99.	1
-Contig40_chr5_51664286_51667573	861	C	T	148.0	chr5	51665149	C	20	0	2	87	21	0	2	90	20	0	2	87	11	0	2	60	16	0	2	75	15	0	2	72	Y	207	0.080	1
-Contig15_chr5_51889708_51891244	882	A	G	149.0	chr5	51890581	G	13	0	2	66	18	0	2	81	17	0	2	78	22	0	2	93	15	0	2	72	22	0	2	93	Y	7	0.025	1
-Contig143_chr5_57231364_57232010	294	T	C	78.5	chr5	57231644	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	10	0	2	57	6	0	2	45	Y	73	0.337	2
-Contig13_chr5_57609985_57610584	496	C	T	50.5	chr5	57610476	C	17	0	2	78	9	0	2	54	6	0	2	45	8	0	2	51	10	0	2	57	12	0	2	63	N	77	2.022	1
-Contig230_chr5_58486998_58487280	227	T	C	192.0	chr5	58487232	T	3	0	2	36	4	0	2	39	9	0	2	54	6	0	2	45	4	0	2	39	7	0	2	48	N	24	0.100	2
-Contig385_chr5_60122961_60123128	15	C	G	136.0	chr5	60122976	C	0	0	-1	0	0	0	-1	0	1	0	2	30	1	0	2	30	3	0	2	36	0	0	-1	0	N	100	+99.	2
-Contig143_chr5_65121393_65122035	558	C	A	127.0	chr5	65121959	A	0	0	-1	0	5	0	2	42	3	0	2	36	4	0	2	39	0	0	-1	0	4	0	2	39	Y	285	0.391	1
-Contig32_chr5_70852360_70853289	282	G	A	114.0	chr5	70852623	G	16	0	2	75	11	0	2	60	13	0	2	66	12	0	2	63	13	0	2	66	7	0	2	48	Y	33	0.276	0
-Contig215_chr5_70946445_70947428	363	T	G	28.2	chr5	70946809	C	4	0	2	39	0	5	0	12	9	0	2	54	6	0	2	45	3	3	2	1	9	0	2	54	N	43	0.153	0
-Contig100_chr5_71189678_71190590	813	C	T	30.8	chr5	71190523	C	11	0	2	60	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	Y	8	0.362	1
-Contig45_chr5_76133561_76134403	388	A	G	103.0	chr5	76133941	G	3	0	2	36	8	0	2	51	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	57	0.038	0
-Contig61_chr5_90202541_90204393	909	C	T	101.0	chr5	90203461	T	7	0	2	48	5	0	2	42	14	0	2	69	3	0	2	36	5	0	2	42	8	0	2	51	Y	64	1.448	0
-Contig111_chr6_5821219_5822519	1060	A	G	68.1	chr6	5822321	T	7	0	2	48	6	0	2	45	11	0	2	60	9	0	2	54	3	0	2	36	12	0	2	63	Y	7	0.231	1
-Contig220_chr6_10671338_10672441	999	T	C	36.3	chr6	10672322	T	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	5	0	2	42	9	0	2	54	Y	1	1.667	0
-Contig226_chr6_17361986_17362884	418	G	C	251.0	chr6	17362406	G	6	0	2	45	8	0	2	51	7	0	2	48	9	0	2	54	7	0	2	48	7	0	2	48	Y	7	0.147	0
-Contig380_chr6_18173971_18174169	180	C	T	4.87	chr6	18174144	T	0	0	-1	0	4	0	2	39	7	0	2	48	2	0	2	33	2	0	2	33	1	0	2	30	N	56	2.589	0
-Contig51_chr6_20231207_20231785	161	A	G	70.5	chr6	20231375	G	13	0	2	66	5	0	2	42	8	0	2	51	2	0	2	36	5	0	2	42	5	0	2	42	Y	153	1.754	0
-Contig102_chr6_30271329_30271577	39	T	G	139.0	chr6	30271371	G	3	0	2	36	4	0	2	39	6	0	2	45	1	0	2	30	4	0	2	39	4	0	2	39	N	15	1.159	0
-Contig217_chr6_31393824_31394218	97	G	A	115.0	chr6	31393921	G	9	0	2	54	19	0	2	84	15	0	2	72	12	0	2	63	7	0	2	48	10	0	2	57	N	45	0.477	0
-Contig186_chr6_31928098_31928245	73	G	A	117.0	chr6	-1	N	5	0	2	42	8	0	2	51	2	0	2	33	4	0	2	39	1	0	2	30	5	0	2	42	N	-1	0.276	1
-Contig52_chr6_33188498_33188724	123	G	A	59.0	chr6	-1	N	5	0	2	42	13	0	2	66	8	0	2	51	4	0	2	39	9	0	2	54	9	0	2	54	N	-1	0.880	1
-Contig102_chr6_38743009_38743435	290	A	G	178.0	chr6	38743311	A	11	0	2	60	13	0	2	66	9	0	2	54	11	0	2	60	12	0	2	63	13	0	2	66	Y	34	0.148	4
-Contig81_chr6_49018353_49019532	179	C	A	72.5	chr6	49018530	A	15	0	2	72	13	0	2	66	19	0	2	72	8	0	2	51	12	0	2	63	16	0	2	75	Y	15	0.145	1
-Contig112_chr6_51024554_51024851	100	A	G	121.0	chr6	51024654	A	10	0	2	57	12	0	2	63	9	0	2	54	13	0	2	66	14	0	2	69	17	0	2	78	N	75	4.287	0
-Contig40_chr6_51412751_51413807	227	T	C	94.5	chr6	51412975	C	5	0	2	42	8	0	2	51	7	0	2	48	9	0	2	54	11	0	2	60	10	0	2	57	Y	4	5.661	0
-Contig47_chr6_69073222_69074767	1315	T	C	212.0	chr6	69074558	T	20	0	2	87	17	0	2	78	18	0	2	81	12	0	2	63	17	0	2	78	7	0	2	48	Y	9	0.652	0
-Contig30_chr6_74848932_74849059	57	C	G	46.3	chr6	74848993	C	7	0	2	48	7	0	2	33	6	0	2	45	7	0	2	48	5	0	2	42	6	0	2	45	N	-1	+99.	1
-Contig84_chr7_6648683_6650255	1297	G	A	110.0	chr7	6649988	G	18	0	2	81	9	0	2	54	22	0	2	77	16	0	2	75	20	0	2	87	6	0	2	45	Y	83	0.166	0
-Contig239_chr7_13007379_13007700	275	A	G	39.8	chr7	13007642	A	8	0	2	51	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	5	0	2	42	N	46	1.511	3
-Contig119_chr7_18310707_18310948	23	A	T	133.0	chr7	18310729	A	6	0	2	45	5	0	2	42	10	0	2	57	5	0	2	42	2	0	2	33	2	0	2	33	N	4553	+99.	0
-Contig93_chr7_18513377_18513741	173	T	C	130.0	chr7	18513533	C	15	0	2	72	11	0	2	60	18	0	2	81	6	0	2	45	10	0	2	57	14	0	2	69	Y	115	0.174	0
-Contig133_chr7_19603333_19603776	414	C	G	31.9	chr7	19603734	G	10	0	2	57	4	0	2	39	4	0	2	39	5	0	2	42	9	0	2	54	9	0	2	54	N	78	+99.	5
-Contig132_chr7_20426224_20428145	1815	A	G	28.3	chr7	20428041	A	11	1	2	43	12	0	2	63	19	0	2	84	23	0	2	96	14	0	2	69	10	0	2	57	N	11	0.264	0
-Contig206_chr7_26281823_26282074	103	C	A	101.0	chr7	26281925	T	11	0	2	60	16	0	2	61	19	0	2	84	6	0	2	45	19	0	2	84	16	0	2	75	N	-1	0.947	1
-Contig116_chr7_45858984_45859111	38	T	C	73.2	chr7	-1	N	2	0	2	33	1	0	2	30	3	0	2	36	2	0	2	33	2	0	2	33	1	0	2	30	N	-1	3.442	0
-Contig38_chr7_50681997_50682600	42	T	C	92.4	chr7	50682037	G	6	0	2	45	2	0	2	33	10	0	2	57	12	0	2	63	5	0	2	42	6	0	2	45	Y	94	0.146	0
-Contig55_chr7_53147505_53148974	894	A	G	68.4	chr7	53148397	G	22	0	2	93	13	0	2	66	16	0	2	75	8	0	2	51	16	0	2	75	11	0	2	60	Y	19	0.060	0
-Contig4_chr7_53685534_53688206	1709	C	G	76.2	chr7	53687225	C	18	0	2	81	17	0	2	78	18	0	2	81	15	0	2	72	14	0	2	69	14	0	2	69	Y	32	0.659	1
-Contig61_chr7_55832923_55834065	506	T	C	185.0	chr7	55833450	C	9	0	2	54	10	0	2	57	22	0	2	93	12	0	2	63	12	0	2	63	7	0	2	48	Y	1	0.019	0
-Contig91_chr8_12804505_12805470	409	C	A	111.0	chr8	12804906	C	8	0	2	51	10	0	2	57	15	0	2	72	12	0	2	63	14	0	2	69	15	0	2	72	N	145	0.175	0
-Contig30_chr8_17147743_17147923	13	G	A	105.0	chr8	17147756	A	1	3	1	19	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	3	0	2	36	N	6	+99.	0
-Contig8_chr8_27811135_27812620	333	C	T	37.9	chr8	27811458	C	4	0	2	39	11	0	2	60	18	0	2	81	5	0	2	42	6	0	2	45	5	0	2	42	Y	1	0.272	0
-Contig66_chr8_28273102_28273660	175	G	C	81.6	chr8	28273263	T	9	0	2	54	17	0	2	78	19	0	2	84	8	0	2	51	16	0	2	75	19	0	2	84	Y	3	2.735	0
-Contig84_chr8_31375511_31376456	443	T	C	125.0	chr8	31375954	T	10	0	2	57	15	0	2	72	27	0	2	108	18	0	2	81	16	0	2	75	9	0	2	54	Y	2	0.650	0
-Contig18_chr8_32575859_32577431	264	T	C	151.0	chr8	32576124	T	20	0	2	87	14	0	2	69	17	0	2	78	14	0	2	69	13	0	2	66	14	0	2	69	Y	17	0.915	1
-Contig54_chr8_40913908_40916451	1275	G	A	175.0	chr8	40915190	G	10	0	2	57	8	0	2	51	11	0	2	60	7	0	2	48	8	0	2	51	9	0	2	54	Y	21	0.056	3
-Contig93_chr8_44658786_44659075	180	T	G	55.3	chr8	44658964	T	4	0	2	39	3	0	2	36	6	0	2	45	5	0	2	45	5	0	2	42	4	0	2	39	N	14	0.188	0
-Contig17_chr8_57490059_57490498	69	G	T	97.4	chr8	57490127	A	2	0	2	33	11	0	2	60	15	0	2	72	16	0	2	75	8	0	2	51	10	0	2	57	N	40	0.522	5
-Contig66_chr8_58562376_58563446	345	C	G	5.74	chr8	58562721	C	14	0	2	69	12	0	2	63	9	0	2	57	10	0	2	57	9	0	2	54	10	0	2	57	Y	6	0.685	0
-Contig44_chr8_71186368_71188207	1455	G	T	147.0	chr8	71187818	G	4	10	1	74	3	0	2	36	20	0	2	87	12	0	2	63	8	0	2	51	10	0	2	57	Y	88	0.036	0
-Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
-Contig96_chr9_39008495_39009278	215	A	C	98.7	chr9	39008708	C	7	0	2	48	13	0	2	66	28	0	2	111	16	0	2	75	17	0	2	78	17	0	2	78	Y	8	0.427	1
-Contig22_chr10_15505382_15505589	172	T	C	38.5	chr10	15505548	T	2	0	2	33	6	0	2	45	8	0	2	51	8	0	2	51	9	0	2	54	12	0	2	63	N	284	2.861	0
-Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
-Contig63_chr10_42716594_42719945	1018	A	G	88.7	chr10	42717616	G	13	0	2	66	14	0	2	69	13	0	2	66	12	0	2	63	18	0	2	81	5	0	2	42	Y	25	1.740	0
-Contig22_chr10_43255307_43255570	81	C	A	37.2	chr10	43255383	C	15	0	2	72	18	0	2	81	22	0	2	93	16	0	2	75	11	0	2	60	12	0	2	63	N	62	0.450	0
-Contig9_chr10_51475063_51476054	770	C	T	57.3	chr10	51475839	C	6	0	2	45	16	0	2	75	16	0	2	75	13	0	2	66	9	0	2	54	9	2	2	21	N	80	0.394	0
-Contig42_chr10_53816543_53818392	1642	G	A	27.5	chr10	53818172	A	7	0	2	48	13	0	2	66	17	0	2	78	14	0	2	69	19	0	2	84	16	0	2	75	N	1	0.433	0
-Contig36_chr10_53992615_53993741	229	G	C	86.2	chr10	53992846	G	17	0	2	78	14	0	2	69	13	0	2	66	15	0	2	72	12	0	2	63	15	0	2	72	N	23	1.912	0
-Contig20_chr10_58141129_58141750	575	C	T	46.1	chr10	58141701	C	7	0	2	48	8	0	2	51	9	0	2	54	3	0	2	36	4	0	2	39	9	0	2	54	N	1	4.264	0
-Contig26_chr10_59510973_59511899	146	C	A	29.0	chr10	59511126	C	8	0	2	51	13	0	2	66	18	0	2	81	13	0	2	66	10	0	2	57	7	0	2	48	Y	208	1.077	0
-Contig72_chr11_7142765_7143772	146	G	A	152.0	chr11	7142911	A	8	0	2	51	8	0	2	51	24	0	2	99	10	0	2	57	17	0	2	78	11	0	2	60	Y	90	1.137	0
-Contig103_chr11_8844784_8845095	214	T	G	135.0	chr11	8844993	T	1	1	2	12	10	0	2	57	5	4	1	26	2	3	1	13	2	7	1	34	1	1	2	13	Y	75	0.731	0
-Contig9_chr11_9904571_9905983	1284	C	T	151.0	chr11	9905857	C	16	0	2	75	19	0	2	84	17	0	2	78	16	0	2	75	12	0	2	63	13	1	2	44	Y	11	0.422	1
-Contig35_chr11_22459883_22460855	714	T	G	54.9	chr11	22460577	T	3	0	2	36	1	0	2	30	3	0	2	36	2	0	2	33	2	0	2	33	0	0	-1	0	N	24	0.382	0
-Contig7_chr11_40017076_40017630	352	C	T	46.3	chr11	40017422	C	7	0	2	48	9	0	2	54	6	0	2	45	8	0	2	51	16	0	2	75	9	0	2	54	Y	44	0.336	0
-Contig108_chr11_42953408_42955156	367	A	G	89.4	chr11	42953779	A	17	0	2	78	11	0	2	60	14	0	2	69	20	0	2	87	14	0	2	69	17	0	2	78	Y	118	0.784	1
-Contig82_chr11_43490732_43490862	60	C	T	47.3	chr11	-1	N	0	0	-1	0	0	0	-1	0	1	0	2	30	3	0	2	36	1	1	2	19	1	0	2	30	N	-1	6.763	0
-Contig16_chr11_53408448_53408790	187	A	G	153.0	chr11	53408638	A	7	0	2	48	9	0	2	54	18	0	2	81	10	0	2	57	11	0	2	60	12	0	2	63	Y	116	1.367	0
-Contig21_chr12_18403415_18404381	586	G	T	34.5	chr12	18403983	-	13	0	2	66	16	0	2	75	25	0	2	102	12	0	2	63	12	0	2	63	14	0	2	69	Y	12	0.068	0
-Contig33_chr12_19804073_19804529	178	T	C	69.4	chr12	19804261	T	13	0	2	66	13	0	2	66	22	0	2	93	11	0	2	60	12	0	2	63	18	0	2	81	Y	11	1.571	0
-Contig41_chr12_25565452_25566993	475	G	T	6.29	chr12	25565926	G	15	0	2	72	14	0	2	69	10	0	2	57	15	0	2	72	18	0	2	81	19	0	2	84	N	10	2.231	1
-Contig9_chr12_27204351_27204696	239	A	G	145.0	chr12	27204587	A	7	0	2	48	8	0	2	51	12	0	2	63	8	0	2	51	11	0	2	60	11	0	2	60	Y	14	0.046	0
-Contig45_chr12_30548282_30550498	448	C	T	124.0	chr12	30548703	-	9	0	2	54	11	0	2	60	22	0	2	93	19	0	2	84	12	0	2	63	12	0	2	63	Y	66	0.305	0
-Contig46_chr12_35571846_35572563	58	G	C	83.2	chr12	35571906	G	4	0	2	39	10	0	2	57	11	0	2	60	6	0	2	45	10	0	2	57	6	0	2	45	Y	55	+99.	1
-Contig28_chr12_42075871_42076044	136	G	A	134.0	chr12	42076006	A	6	0	2	45	5	0	2	42	7	0	2	48	7	0	2	48	2	0	2	33	4	0	2	39	N	3	9.479	0
-Contig16_chr12_42386141_42387454	194	A	G	161.0	chr12	42386323	A	11	0	2	60	8	0	2	54	23	0	2	96	17	0	2	78	6	0	2	45	13	0	2	66	Y	7	0.927	1
-Contig42_chr12_44424628_44425829	255	A	G	84.4	chr12	44424879	A	12	0	2	63	19	0	2	84	23	0	2	96	15	0	2	72	18	0	2	81	14	0	2	69	Y	18	1.190	2
-Contig10_chr12_44447953_44449698	63	C	T	105.0	chr12	44448020	C	11	0	2	60	9	0	2	54	12	0	2	63	10	0	2	57	15	0	2	72	8	0	2	51	Y	31	11.791	0
-Contig5_chr12_53880670_53882675	1221	A	C	99.4	chr12	53881888	A	16	0	2	75	18	0	2	81	23	0	2	96	10	0	2	57	15	0	2	72	17	0	2	78	Y	31	0.061	0
-Contig86_chr12_56715356_56716464	818	T	C	166.0	chr12	56716164	T	20	0	2	87	16	0	2	75	16	0	2	75	14	0	2	69	13	0	2	66	7	0	2	48	Y	22	1.092	0
-Contig3_chr12_65021967_65024097	238	T	G	92.6	chr12	65022205	T	17	0	2	78	14	0	2	69	16	0	2	75	9	0	2	54	13	0	2	66	15	0	2	72	Y	258	0.117	0
-Contig43_chr12_66499742_66500010	121	G	T	41.5	chr12	66499866	G	12	0	2	63	4	0	2	39	8	0	2	51	6	0	2	45	10	0	2	57	6	0	2	45	N	42	0.421	0
-Contig14_chr12_71364692_71365311	20	A	C	103.0	chr12	71364712	A	7	0	2	48	3	0	2	36	5	0	2	42	1	0	2	30	2	0	2	33	3	0	2	36	Y	35	+99.	0
-Contig37_chr13_15910164_15910426	245	G	A	32.9	chr13	-1	N	3	4	1	41	4	0	2	39	3	0	2	36	4	0	2	39	3	0	2	36	10	0	2	57	N	-1	2.159	1
-Contig107_chr13_26045881_26046290	341	C	G	81.4	chr13	26046230	C	16	0	2	75	20	0	2	90	14	0	2	69	15	0	2	72	9	0	2	54	9	0	2	54	Y	51	4.510	0
-Contig251_chr13_28498333_28501066	864	T	G	296.0	chr13	28499180	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	5	0	2	42	6	0	2	45	Y	9	0.068	0
-Contig154_chr13_36777857_36778736	356	G	A	95.5	chr13	36778225	A	6	0	2	45	11	0	2	60	11	0	2	60	9	0	2	54	13	0	2	66	8	0	2	51	Y	59	0.192	0
-Contig37_chr13_42529793_42530857	150	G	T	192.0	chr13	42529926	G	18	0	2	81	14	0	2	69	16	0	2	75	14	0	2	69	8	0	2	51	11	0	2	60	N	22	0.795	5
-Contig47_chr13_47045833_47046626	257	A	C	28.5	chr13	47046097	A	13	0	2	66	10	0	2	57	17	0	2	78	20	0	2	87	15	0	2	72	9	0	2	57	N	129	0.468	0
-Contig42_chr13_47730018_47730856	254	A	G	75.1	chr13	47730294	A	13	0	2	66	6	0	2	45	12	0	2	63	9	0	2	54	16	0	2	75	11	0	2	63	Y	630	0.049	1
-Contig55_chr13_53467708_53468101	221	T	G	132.0	chr13	53467925	T	25	0	2	102	12	0	2	63	26	0	2	105	7	0	2	48	16	0	2	75	16	0	2	75	N	20	5.717	1
-Contig49_chr13_55103679_55105532	503	G	A	76.0	chr13	55104178	G	21	0	2	90	19	0	2	84	18	0	2	81	20	0	2	87	8	9	1	89	17	0	2	78	Y	20	0.259	1
-Contig42_chr13_64785759_64786045	14	C	G	22.8	chr13	64785772	C	2	0	2	33	2	0	2	33	4	0	2	39	7	0	2	48	8	0	2	51	2	0	2	33	N	527	+99.	1
-Contig66_chr13_66021813_66022244	319	C	T	125.0	chr13	66022136	C	11	0	2	60	16	0	2	75	15	0	2	75	12	0	2	63	17	0	2	78	8	0	2	51	N	14	0.055	3
-Contig48_chr14_11839435_11843272	3014	A	G	163.0	chr14	11842446	A	10	0	2	57	8	0	2	51	13	0	2	66	10	0	2	57	5	0	2	42	10	0	2	57	Y	31	0.908	0
-Contig9_chr14_23353717_23354432	80	G	A	61.3	chr14	23353797	G	3	0	2	36	6	0	2	45	11	0	2	60	8	0	2	51	4	0	2	39	2	4	1	35	Y	11	0.444	0
-Contig14_chr14_24131180_24133488	1633	G	A	131.0	chr14	24132818	G	21	0	2	90	16	0	2	75	12	0	2	63	10	0	2	57	11	0	2	60	20	0	2	87	Y	36	0.347	0
-Contig28_chr14_26905747_26909514	975	G	C	3.13	chr14	26906723	G	16	0	2	75	10	0	2	57	12	0	2	63	15	0	2	72	10	0	2	57	7	0	2	48	N	287	0.117	2
-Contig14_chr14_29616948_29618316	109	G	A	80.3	chr14	29617053	-	17	0	2	78	16	0	2	75	16	0	2	75	10	0	2	57	17	0	2	78	19	0	2	84	Y	32	1.051	0
-Contig24_chr14_29728478_29728839	242	T	A	107.0	chr14	29728724	T	2	0	2	33	12	0	2	63	10	0	2	57	12	0	2	63	5	0	2	42	9	0	2	54	N	70	2.712	0
-Contig76_chr14_30028102_30029179	1046	C	T	38.5	chr14	30029169	T	3	0	2	36	6	0	2	45	9	0	2	54	7	0	2	48	9	0	2	54	8	0	2	51	Y	96	+99.	0
-Contig115_chr14_31417207_31417574	259	A	G	12.1	chr14	31417454	G	13	0	2	66	15	0	2	72	21	0	2	90	12	0	2	63	13	0	2	66	9	0	2	54	N	28	5.379	2
-Contig70_chr14_46653662_46653790	111	G	A	46.7	chr14	46653768	G	7	0	2	48	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	10	0	2	57	N	21	+99.	2
-Contig43_chr14_49991855_49993511	918	A	G	112.0	chr14	49992767	G	15	0	2	72	10	0	2	57	11	0	2	63	9	0	2	54	12	0	2	63	9	0	2	54	Y	6	0.314	1
-Contig64_chr14_56768376_56768902	473	C	T	29.0	chr14	56768832	C	15	0	2	72	11	0	2	60	14	0	2	69	14	0	2	69	7	0	2	48	9	0	2	54	Y	91	8.281	0
-Contig60_chr15_18493036_18494316	150	G	A	92.6	chr15	18493188	G	9	0	2	54	13	0	2	66	9	0	2	54	6	0	2	45	5	0	2	42	12	0	2	63	Y	45	0.125	0
-Contig213_chr15_19567788_19568626	196	A	C	13.9	chr15	19567992	A	4	0	2	39	2	0	2	33	7	0	2	48	4	0	2	39	4	0	2	39	6	0	2	45	Y	111	0.043	0
-Contig59_chr15_22138344_22138535	120	G	C	142.0	chr15	22138470	C	11	0	2	60	10	0	2	57	18	0	2	81	4	0	2	39	10	0	2	57	15	0	2	72	N	8	2.553	0
-Contig112_chr15_26772864_26773267	374	C	T	21.6	chr15	26773244	C	4	0	2	39	4	0	2	39	5	0	2	42	2	0	2	33	4	0	2	39	3	0	2	36	N	18	+99.	0
-Contig24_chr15_26894765_26895003	155	G	A	87.6	chr15	-1	N	6	0	2	45	5	0	2	42	7	0	2	48	4	0	2	39	4	0	2	39	2	0	2	33	N	-1	0.178	0
-Contig2_chr15_33944796_33947182	1860	G	A	99.5	chr15	33946654	G	10	0	2	57	11	0	2	60	16	0	2	75	14	0	2	69	14	0	2	69	16	0	2	75	Y	16	0.252	0
-Contig73_chr15_34690052_34691332	714	T	C	130.0	chr15	34690769	T	7	0	2	48	7	0	2	48	17	0	2	78	9	0	2	54	9	0	2	54	4	0	2	39	Y	7	6.003	0
-Contig68_chr15_37747190_37747426	126	G	A	130.0	chr15	37747331	G	14	0	2	69	14	0	2	69	11	0	2	63	19	0	2	84	13	0	2	66	21	0	2	90	N	229	0.255	0
-Contig35_chr15_41400484_41400672	160	A	C	143.0	chr15	-1	N	1	0	2	30	2	0	2	33	0	0	-1	0	2	0	2	33	3	0	2	36	2	0	2	33	N	-1	+99.	0
-Contig104_chr15_45106954_45107158	70	A	T	64.4	chr15	45107015	A	6	0	2	45	6	0	2	45	19	0	2	84	7	0	2	48	7	0	2	48	3	0	2	36	N	202	4.319	0
-Contig119_chr16_6160274_6160477	180	G	A	54.8	chr16	6160457	G	7	0	2	48	6	0	2	45	12	0	2	63	3	0	2	36	11	0	2	60	10	0	2	57	N	42	+99.	0
-Contig126_chr16_10611887_10612152	150	G	T	145.0	chr16	10612037	G	14	0	2	69	9	0	2	54	11	0	2	63	8	0	2	51	8	0	2	51	11	0	2	60	N	15	0.104	6
-Contig114_chr16_12565220_12565676	10	G	A	134.0	chr16	12565230	G	0	0	-1	0	2	0	2	33	2	0	2	33	0	0	-1	0	1	0	2	30	1	0	2	30	N	333	+99.	0
-Contig43_chr16_20200090_20200514	70	A	G	58.6	chr16	20200154	A	11	0	2	60	15	0	2	72	15	0	2	72	6	0	2	45	9	0	2	54	12	0	2	63	Y	2	0.466	1
-Contig60_chr16_28079136_28080263	588	T	G	157.0	chr16	28079739	T	22	0	2	93	20	0	2	87	22	0	2	93	17	0	2	78	12	0	2	63	10	0	2	57	Y	105	5.999	1
-Contig70_chr16_33758668_33759655	104	A	T	58.1	chr16	33758772	A	6	0	2	45	7	0	2	48	17	0	2	78	14	0	2	69	8	0	2	51	10	0	2	57	N	54	0.162	0
-Contig66_chr16_37935682_37935831	116	T	C	99.2	chr16	37935802	C	12	0	2	63	6	0	2	45	19	0	2	84	12	0	2	63	13	0	2	66	17	0	2	78	N	266	+99.	2
-Contig16_chr16_40451506_40451643	84	A	G	59.8	chr16	40451592	A	7	0	2	48	5	0	2	42	7	0	2	48	13	0	2	66	14	0	2	69	19	0	2	84	N	45	5.061	0
-Contig53_chr16_49888293_49888587	260	G	A	108.0	chr16	49888550	A	4	0	2	39	1	0	2	30	3	0	2	36	5	0	2	42	2	0	2	33	2	0	2	33	Y	9	0.261	1
-Contig31_chr17_12128267_12129637	205	G	A	90.5	chr17	12128484	G	7	0	2	48	6	0	2	45	6	0	2	45	11	0	2	60	7	0	2	48	4	0	2	39	Y	10	0.246	0
-Contig50_chr17_12247973_12249183	889	G	T	47.6	chr17	12248878	G	0	1	2	9	8	0	2	51	9	2	2	21	7	2	2	21	15	0	2	72	0	3	0	9	Y	1	1.181	0
-Contig1_chr17_12979232_12980380	808	G	T	12.3	chr17	12980028	G	18	0	2	81	12	0	2	63	21	0	2	90	13	0	2	66	22	0	2	93	18	0	2	81	Y	9	0.336	1
-Contig63_chr17_14186372_14186928	54	C	T	70.7	chr17	14186427	C	6	0	2	45	2	0	2	33	5	0	2	42	6	0	2	45	3	0	2	36	3	0	2	36	Y	11	0.560	3
-Contig42_chr17_23434859_23438330	2100	C	T	39.5	chr17	23436985	T	4	0	2	39	7	0	2	48	7	0	2	48	3	0	2	36	6	0	2	45	2	0	2	33	Y	25	0.344	0
-Contig63_chr17_23796320_23796814	220	A	G	54.0	chr17	23796536	G	6	0	2	45	4	0	2	39	5	0	2	42	6	0	2	45	4	0	2	39	6	0	2	45	Y	139	0.067	1
-Contig76_chr17_24107434_24107834	316	T	C	141.0	chr17	24107726	T	19	0	2	84	15	0	2	72	20	0	2	87	16	0	2	75	11	0	2	60	18	0	2	81	Y	30	0.175	2
-Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
-Contig59_chr17_26790302_26795045	287	C	T	45.1	chr17	26790582	C	8	0	2	51	6	0	2	45	13	0	2	66	6	0	2	45	15	0	2	72	12	0	2	63	Y	75	0.019	1
-Contig99_chr17_27018324_27019378	446	G	A	31.1	chr17	27018776	G	14	0	2	69	12	0	2	63	14	0	2	69	10	0	2	57	9	0	2	54	11	0	2	60	Y	13	0.290	4
-Contig125_chr17_27739115_27739410	63	G	A	107.0	chr17	27739177	G	8	0	2	51	11	0	2	60	16	0	2	75	8	0	2	51	4	0	2	39	15	0	2	72	N	100	0.819	0
-Contig115_chr17_37489899_37490101	159	G	A	62.4	chr17	37490067	G	4	0	2	39	3	0	2	36	4	0	2	39	4	0	2	39	3	0	2	36	6	0	2	45	N	4	1.411	1
-Contig180_chr17_45154356_45154925	524	A	G	146.0	chr17	45154886	G	7	0	2	48	9	0	2	54	7	0	2	48	9	0	2	54	4	0	2	39	8	0	2	51	Y	11	+99.	2
-Contig61_chr17_48221795_48223545	1404	T	A	177.0	chr17	48223216	T	15	0	2	72	14	0	2	69	24	0	2	99	17	0	2	78	18	0	2	81	24	0	2	99	Y	161	0.633	2
-Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
-Contig229_chr18_3706523_3708577	1076	A	G	83.9	chr18	3707630	A	11	0	2	60	13	0	2	66	26	0	2	105	11	0	2	60	15	0	2	72	17	0	2	78	Y	63	0.445	0
-Contig24_chr18_14049894_14050480	24	A	G	123.0	chr18	14049918	A	5	0	2	42	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	5	0	2	42	Y	17	+99.	0
-Contig30_chr18_18771753_18772121	39	C	G	48.5	chr18	18771787	C	2	0	2	33	5	0	2	42	2	0	2	33	6	0	2	45	3	0	2	36	2	0	2	33	N	5	0.135	0
-Contig123_chr18_19916160_19916379	116	G	A	79.2	chr18	19916272	A	14	0	2	69	12	0	2	63	14	0	2	69	6	0	2	45	11	0	2	60	10	0	2	57	N	26	0.172	0
-Contig82_chr18_27305489_27306229	566	C	T	49.5	chr18	27306051	A	6	0	2	45	6	0	2	45	10	0	2	57	11	0	2	60	6	0	2	45	7	0	2	48	N	1	0.349	0
-Contig71_chr18_34324706_34326687	136	G	A	151.0	chr18	34324841	G	9	0	2	54	9	0	2	54	17	0	2	78	8	0	2	51	11	0	2	60	10	0	2	57	Y	2	2.129	2
-Contig16_chr18_34672093_34673044	538	T	C	58.2	chr18	34672635	T	8	0	2	51	15	0	2	72	16	0	2	75	15	0	2	72	9	0	2	57	18	0	2	81	Y	8	0.214	1
-Contig96_chr18_38492535_38493333	624	G	A	119.0	chr18	38493162	T	17	0	2	78	12	0	2	63	13	0	2	66	16	0	2	75	8	0	2	51	15	0	2	72	Y	127	0.131	0
-Contig226_chr18_47753756_47754666	427	T	C	21.1	chr18	47754215	T	10	0	2	57	4	0	2	39	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	42	0.522	0
-Contig170_chr18_49411558_49412230	94	C	A	74.3	chr18	49411655	C	14	0	2	69	10	0	2	57	9	0	2	54	10	0	2	57	3	0	2	36	3	0	2	36	N	9	1.457	0
-Contig192_chr18_49419342_49420737	1058	C	T	42.8	chr18	49420381	A	3	0	2	36	4	0	2	39	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	Y	34	2.107	2
-Contig64_chr18_55979770_55980315	49	G	A	89.1	chr18	55979824	G	3	0	2	36	9	0	2	54	7	0	2	51	4	0	2	39	3	0	2	36	3	0	2	36	Y	-1	2.124	0
-Contig20_chr18_58130301_58130735	112	A	G	74.4	chr18	58130413	A	12	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	6	0	2	45	6	0	2	45	Y	10	0.290	0
-Contig146_chr19_5221790_5223013	143	A	G	114.0	chr19	5221916	-	1	0	2	30	4	0	2	39	3	0	2	36	5	0	2	42	2	0	2	33	5	0	2	42	Y	12	0.870	0
-Contig13_chr19_7739961_7740118	26	C	G	220.0	chr19	-1	N	3	0	2	36	1	0	2	30	2	0	2	33	3	0	2	36	1	0	2	30	2	0	2	33	N	-1	+99.	0
-Contig67_chr19_12398520_12399367	499	C	T	161.0	chr19	12399017	C	10	0	2	57	11	0	2	60	20	0	2	87	14	0	2	69	24	0	2	99	8	0	2	51	Y	137	5.634	0
-Contig66_chr19_16285672_16287223	996	C	T	190.0	chr19	16286674	C	9	0	2	57	14	0	2	69	16	0	2	78	17	0	2	78	8	0	2	51	22	0	2	93	Y	40	0.110	0
-Contig129_chr19_25541958_25542221	202	T	C	68.1	chr19	25542154	C	11	0	2	60	19	0	2	84	10	0	2	60	17	0	2	78	9	0	2	54	12	0	2	63	N	-1	2.551	1
-Contig152_chr19_34274440_34275622	1072	C	T	48.0	chr19	34275509	T	1	0	2	30	2	0	2	33	1	0	2	30	1	0	2	30	4	0	2	39	5	0	2	42	N	71	0.309	0
-Contig29_chr19_37339947_37341911	1692	C	T	211.0	chr19	37341631	C	15	0	2	72	20	0	2	87	11	0	2	60	15	0	2	72	3	0	2	36	12	0	2	63	Y	7	0.096	0
-Contig39_chr19_47709708_47711327	444	C	T	36.8	chr19	47710148	T	10	0	2	57	4	0	2	39	8	0	2	51	9	0	2	54	6	0	2	45	6	0	2	45	Y	95	1.251	1
-Contig60_chr19_54013816_54014398	281	A	G	138.0	chr19	54014103	C	6	0	2	45	15	0	2	72	7	0	2	48	10	0	2	57	15	0	2	72	10	0	2	57	Y	188	1.271	0
-Contig251_chr19_56559098_56559626	452	T	C	3.36	chr19	56559549	T	12	0	2	63	13	0	2	66	21	0	2	90	15	0	2	72	14	0	2	69	11	0	2	60	N	1	0.117	0
-Contig50_chr20_12138509_12141975	3206	C	A	248.0	chr20	12141763	C	8	0	2	51	15	0	2	72	14	0	2	69	6	0	2	45	10	0	2	57	7	0	2	48	Y	2	0.384	0
-Contig36_chr20_32631363_32632049	176	G	A	24.1	chr20	32631526	G	7	0	2	48	14	0	2	69	19	0	2	84	14	0	2	69	15	0	2	72	16	0	2	75	N	50	1.150	0
-Contig39_chr20_36316398_36316498	57	C	T	30.3	chr20	36316455	C	2	0	2	33	0	1	2	8	0	0	-1	0	0	1	2	10	0	0	-1	0	0	0	-1	0	N	-483	+99.	0
-Contig32_chr20_36468058_36468869	66	C	T	40.4	chr20	36468127	C	6	0	2	45	3	0	2	36	4	0	2	39	5	0	2	42	3	0	2	36	4	0	2	39	N	59	0.281	0
-Contig24_chr20_38203888_38204900	834	C	T	132.0	chr20	38204731	C	9	0	2	54	17	0	2	78	20	0	2	87	8	0	2	51	11	0	2	60	17	0	2	78	Y	14	0.397	0
-Contig79_chr20_44263127_44264103	456	G	T	31.5	chr20	44263573	G	22	0	2	93	16	0	2	75	15	0	2	72	19	0	2	84	13	0	2	66	26	0	2	105	Y	8	3.250	0
-Contig26_chr20_45878482_45878787	197	A	G	160.0	chr20	45878672	A	17	0	2	78	15	0	2	72	11	0	2	63	17	0	2	78	12	0	2	63	10	0	2	57	N	14	0.535	0
-Contig119_chr20_46550670_46551383	609	G	A	139.0	chr20	46551277	G	7	0	2	48	17	0	2	78	19	0	2	84	20	0	2	87	9	0	2	54	15	0	2	72	Y	7	0.488	1
-Contig50_chr21_4178523_4178687	121	G	A	362.0	chr21	4178640	G	8	0	2	51	14	0	2	69	5	0	2	42	3	0	2	36	11	0	2	60	4	0	2	39	N	392	0.483	0
-Contig103_chr21_10177255_10177765	121	G	A	125.0	chr21	10177367	G	12	0	2	63	10	0	2	57	10	0	2	57	17	0	2	78	14	0	2	69	7	0	2	51	Y	37	0.213	3
-Contig1_chr21_10805534_10806399	766	A	G	146.0	chr21	10806301	G	10	0	2	57	6	0	2	45	9	0	2	54	6	0	2	45	7	0	2	48	5	0	2	42	Y	20	0.319	0
-Contig46_chr21_21029492_21030645	443	C	T	5.37	chr21	21029910	C	15	0	2	72	11	0	2	60	16	0	2	75	15	0	2	72	13	0	2	66	6	0	2	45	Y	96	3.737	0
-Contig129_chr21_31045749_31046924	381	A	G	129.0	chr21	31046141	A	19	0	2	84	8	0	2	51	23	0	2	96	12	0	2	63	15	0	2	72	18	0	2	81	Y	69	0.028	2
-Contig23_chr21_31651123_31651986	840	C	T	71.3	chr21	31651957	T	6	0	2	45	9	0	2	54	8	0	2	51	10	0	2	57	4	0	2	39	7	0	2	48	Y	105	2.977	3
-Contig64_chr21_43341847_43342031	84	T	C	114.0	chr21	43341926	T	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	6	0	2	45	7	0	2	48	N	10	3.954	2
-Contig60_chr21_43475347_43475824	175	C	T	8.05	chr21	43475551	T	6	0	2	45	7	0	2	48	13	0	2	66	6	0	2	45	14	0	2	69	14	0	2	69	N	45	0.058	0
-Contig64_chr21_45377513_45377872	19	C	T	60.7	chr21	-1	N	3	0	2	36	2	0	2	33	1	0	2	30	0	0	-1	0	3	0	2	36	1	0	2	30	N	-1	+99.	1
-Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0
-Contig46_chr22_9416920_9417467	381	G	A	145.0	chr22	9417259	G	10	0	2	57	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	7	0	2	48	Y	154	0.242	0
-Contig86_chr22_9440787_9441725	713	T	G	119.0	chr22	9441488	G	6	0	2	45	12	0	2	63	10	0	2	57	11	0	2	60	13	0	2	66	16	0	2	75	Y	132	0.218	0
-Contig16_chr22_15636960_15637372	236	A	C	9.79	chr22	15637192	T	4	0	2	39	5	0	2	42	12	0	2	63	7	0	2	48	6	0	2	45	11	0	2	60	Y	5	2.163	0
-Contig4_chr22_16114310_16114546	128	G	C	101.0	chr22	16114432	G	10	0	2	57	13	0	2	66	20	0	2	87	20	0	2	87	16	0	2	75	9	0	2	54	N	19	0.526	0
-Contig23_chr22_34612023_34612568	167	C	G	92.3	chr22	34612181	C	11	0	2	60	18	0	2	81	13	0	2	66	8	0	2	51	12	0	2	63	14	0	2	69	Y	7	0.409	0
-Contig4_chr22_38252245_38253712	799	A	C	159.0	chr22	38253064	A	18	0	2	81	15	0	2	72	15	0	2	72	20	0	2	87	27	0	2	108	15	0	2	72	Y	90	4.330	0
-Contig122_chr22_48412466_48414788	1888	C	T	125.0	chr22	48414355	T	16	0	2	75	15	0	2	72	16	0	2	75	14	0	2	72	12	0	2	63	7	0	2	48	N	42	0.122	0
-Contig77_chr22_49764414_49764875	353	C	A	148.0	chr22	49764777	C	7	4	1	65	18	0	2	81	16	0	2	75	20	0	2	87	4	3	1	52	9	4	1	67	Y	12	0.941	0
-Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
-Contig348_chr22_62406104_62406495	189	C	A	134.0	chr22	62406302	A	9	0	2	54	14	0	2	69	11	0	2	60	10	0	2	57	12	0	2	63	6	0	2	45	Y	5	0.912	0
-Contig133_chr23_3525134_3526502	1223	A	G	201.0	chr23	3526387	A	11	0	2	60	13	0	2	66	23	0	2	96	21	0	2	90	13	0	2	66	10	0	2	57	Y	61	1.359	0
-Contig111_chr23_7058063_7058181	107	G	A	108.0	chr23	7058162	A	8	0	2	51	8	0	2	51	7	0	2	48	2	0	2	33	5	0	2	42	6	0	2	45	N	3	+99.	0
-Contig79_chr23_7844129_7844837	110	C	A	141.0	chr23	7844237	T	13	0	2	66	15	0	2	72	17	0	2	78	12	0	2	63	15	0	2	72	16	0	2	75	Y	40	0.339	0
-Contig38_chr23_9201002_9201725	597	C	T	155.0	chr23	9201609	T	17	0	2	78	8	0	2	51	13	0	2	66	5	0	2	42	11	0	2	60	7	0	2	48	Y	167	0.633	1
-Contig33_chr23_20672540_20674320	347	T	A	91.4	chr23	20672885	A	11	0	2	60	14	0	2	69	15	0	2	72	7	0	2	48	12	0	2	63	18	0	2	81	Y	31	0.452	1
-Contig35_chr23_28447813_28449115	70	T	A	21.3	chr23	28447881	T	9	0	2	54	8	0	2	51	10	0	2	57	9	0	2	54	10	0	2	57	12	0	2	63	N	251	0.163	1
-Contig51_chr23_30590939_30591162	140	C	T	142.0	chr23	30591080	C	14	0	2	69	4	0	2	39	10	0	2	57	12	0	2	63	14	0	2	69	4	0	2	39	N	13	1.658	0
-Contig57_chr23_32216351_32216721	179	T	G	143.0	chr23	32216534	T	15	0	2	72	15	0	2	72	23	0	2	96	13	0	2	66	16	0	2	75	15	0	2	72	N	32	1.387	1
-Contig93_chr23_35744841_35745791	40	A	T	30.4	chr23	35744880	T	6	0	2	45	7	0	2	48	7	0	2	48	2	0	2	33	5	0	2	42	5	0	2	42	Y	50	2.173	0
-Contig99_chr23_42543966_42544147	14	G	A	357.0	chr23	42543980	G	4	0	2	39	2	0	2	33	3	0	2	36	3	0	2	36	1	0	2	30	2	0	2	33	N	69	+99.	0
-Contig32_chr23_48285289_48286638	186	T	C	176.0	chr23	48285470	T	18	0	2	81	12	0	2	63	16	0	2	75	13	0	2	66	9	0	2	54	9	0	2	54	Y	4	4.238	1
-Contig50_chr24_22515247_22516072	761	C	T	243.0	chr24	22515981	T	11	0	2	60	10	0	2	57	8	0	2	51	9	0	2	54	18	0	2	81	8	0	2	51	Y	1	0.190	0
-Contig92_chr24_28935897_28936321	13	G	A	47.1	chr24	-1	N	2	0	2	33	1	0	2	30	0	0	-1	0	0	0	-1	0	1	0	2	30	0	0	-1	0	Y	-1	+99.	2
-Contig84_chr24_29196623_29199644	466	C	T	126.0	chr24	29197091	T	7	0	2	48	11	0	2	60	8	0	2	51	7	0	2	48	11	0	2	60	15	0	2	72	Y	42	0.215	0
-Contig35_chr24_30150986_30151507	492	A	C	114.0	chr24	30151448	A	5	0	2	42	2	0	2	33	2	0	2	33	3	0	2	36	3	0	2	36	5	0	2	42	N	41	2.587	6
-Contig61_chr24_30465488_30465834	149	G	T	68.2	chr24	30465637	G	13	0	2	66	4	2	2	11	18	0	2	81	11	0	2	60	11	0	2	60	9	0	2	54	N	99	0.105	2
-Contig145_chr24_34778364_34778898	163	T	C	372.0	chr24	34778541	C	10	0	2	57	8	0	2	51	12	0	2	63	12	0	2	63	6	1	2	31	7	0	2	48	Y	40	0.037	0
-Contig34_chr24_36147443_36150244	2679	C	T	140.0	chr24	36150125	C	13	0	2	66	7	0	2	48	14	0	2	69	14	0	2	69	10	0	2	57	13	0	2	66	N	282	0.099	1
-Contig164_chr24_46598127_46599206	84	C	T	105.0	chr24	46598214	C	13	0	2	66	12	0	2	63	15	0	2	72	15	0	2	72	11	0	2	60	8	0	2	51	Y	22	1.262	1
-Contig144_chr25_4011170_4013134	541	A	G	160.0	chr25	4011690	A	12	0	2	63	17	0	2	78	13	0	2	66	13	0	2	66	13	0	2	66	13	0	2	66	Y	5	0.087	0
-Contig81_chr25_6103472_6104760	699	G	A	378.0	chr25	6104190	A	14	0	2	69	16	0	2	75	13	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	Y	33	0.789	2
-Contig152_chr25_7486442_7487609	75	A	G	11.6	chr25	7486515	A	17	0	2	78	13	0	2	66	8	0	2	51	16	0	2	75	8	0	2	51	6	0	2	45	N	2	0.158	0
-Contig24_chr25_7695778_7698612	2714	C	T	130.0	chr25	7698446	C	16	0	2	75	13	0	2	66	22	0	2	93	17	0	2	78	10	0	2	57	17	0	2	78	Y	27	0.346	0
-Contig89_chr25_8635170_8636009	586	G	C	209.0	chr25	8635744	G	13	0	2	66	13	0	2	66	21	0	2	93	14	0	2	69	15	0	2	72	15	0	2	72	Y	14	0.067	0
-Contig77_chr25_10796299_10796481	2	T	C	17.3	chr25	-1	N	1	0	2	30	0	0	-1	0	1	0	2	30	0	0	-1	0	0	0	-1	0	0	0	-1	0	N	-1	+99.	0
-Contig73_chr25_14177327_14177474	125	A	C	6.85	chr25	14177464	A	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	N	27	+99.	1
-Contig59_chr25_18196776_18197707	785	G	A	112.0	chr25	18197551	G	8	10	1	42	27	0	2	108	21	0	2	90	18	0	2	81	10	0	2	57	14	0	2	69	N	36	3.625	0
-Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
-Contig84_chr25_42407960_42408708	55	C	T	119.0	chr25	42408013	C	6	0	2	45	9	0	2	54	11	0	2	60	9	0	2	54	7	0	2	48	8	0	2	51	Y	11	0.121	0
-Contig73_chr25_43562500_43564110	955	T	C	52.1	chr25	43563469	C	9	0	2	57	4	0	2	39	6	0	2	45	5	0	2	42	7	0	2	48	10	0	2	57	Y	4	1.406	0
-Contig37_chr25_51074433_51074885	170	A	G	102.0	chr25	51074589	G	11	0	2	60	7	0	2	48	6	0	2	45	15	0	2	72	9	0	2	54	7	0	2	48	Y	68	0.207	1
-Contig204_chr26_4311195_4311778	170	C	T	16.9	chr26	4311363	T	20	0	2	87	8	0	2	51	13	0	2	66	18	0	2	81	11	0	2	60	14	0	2	69	N	35	0.085	0
-Contig122_chr26_7622321_7623491	106	C	G	139.0	chr26	7622423	C	3	0	2	36	9	0	2	54	10	0	2	57	12	0	2	63	9	0	2	54	5	0	2	42	N	19	0.458	0
-Contig11_chr26_11062142_11062902	707	C	A	108.0	chr26	11062836	T	7	0	2	48	8	0	2	51	16	0	2	75	10	0	2	57	6	0	2	45	14	0	2	69	Y	-1	4.709	0
-Contig133_chr26_17695661_17696368	39	T	G	98.7	chr26	17695700	T	10	0	2	57	3	0	2	36	11	0	2	60	9	0	2	54	2	0	2	33	1	0	2	30	N	85	3.402	0
-Contig157_chr26_23894107_23895229	25	C	T	50.2	chr26	23894140	C	0	0	-1	0	4	0	2	39	2	0	2	33	4	0	2	39	3	0	2	36	3	0	2	36	Y	51	+99.	0
-Contig146_chr26_26622638_26623906	574	G	A	186.0	chr26	26623219	A	11	0	2	60	12	0	2	63	9	0	2	54	11	0	2	60	9	0	2	54	12	0	2	63	Y	1	0.318	0
-Contig8_chr26_27834126_27834326	140	G	A	41.7	chr26	27834268	G	13	0	2	66	7	0	2	48	13	0	2	66	11	0	2	60	12	0	2	63	6	0	2	45	N	29	0.142	1
-Contig78_chr26_31128839_31129005	123	T	C	145.0	chr26	-1	N	11	0	2	60	3	0	2	36	7	0	2	48	8	0	2	51	10	0	2	46	7	0	2	48	N	-1	1.230	1
-Contig28_chr26_32935355_32935833	289	T	C	77.9	chr26	32935638	T	15	0	2	72	22	0	2	93	15	0	2	72	9	0	2	54	15	0	2	72	17	0	2	78	Y	10	2.258	1
-Contig36_chr26_36606876_36607240	115	A	T	139.0	chr26	36606979	A	1	0	2	30	7	0	2	48	14	0	2	69	13	0	2	66	9	0	2	54	3	0	2	36	Y	8	0.071	0
-Contig135_chr27_6853874_6854079	158	C	T	116.0	chr27	6854032	T	18	0	2	81	19	0	2	84	13	0	2	66	7	0	2	48	8	0	2	51	11	0	2	60	N	4	0.060	1
-Contig47_chr27_11777710_11777915	25	A	G	67.3	chr27	11777731	A	3	0	2	36	5	0	2	42	6	0	2	45	10	0	2	57	9	0	2	54	6	0	2	45	N	97	+99.	0
-Contig23_chr27_14633002_14633153	23	G	A	128.0	chr27	14633023	A	3	0	2	36	4	0	2	39	5	0	2	42	5	0	2	42	3	0	2	36	2	0	2	33	N	240	3.881	0
-Contig31_chr27_14987233_14988055	630	A	G	48.5	chr27	14987850	G	10	0	2	57	2	0	2	33	4	0	2	39	4	0	2	39	1	0	2	30	4	0	2	39	Y	9	0.089	1
-Contig29_chr27_15428166_15429413	380	T	C	140.0	chr27	15428539	T	15	0	2	72	15	0	2	72	17	0	2	78	15	0	2	72	15	0	2	72	15	0	2	72	Y	47	0.916	1
-Contig31_chr27_19519489_19520891	129	G	T	14.9	chr27	19519624	T	12	0	2	63	19	0	2	84	20	0	2	87	16	0	2	75	10	0	2	57	11	0	2	60	Y	48	2.756	0
-Contig64_chr27_34654435_34654621	132	C	A	115.0	chr27	34654567	T	2	0	2	33	2	0	2	33	5	0	2	42	3	0	2	36	3	0	2	36	8	0	2	51	N	12	0.297	1
-Contig35_chr27_40596169_40596445	20	G	C	133.0	chr27	40596189	G	8	0	2	51	3	0	2	36	4	0	2	39	2	0	2	33	4	0	2	39	4	0	2	39	Y	4	+99.	1
-Contig85_chr27_45471750_45472022	211	G	A	53.1	chr27	45471964	G	18	0	2	81	10	0	2	57	15	0	2	72	0	13	0	36	16	0	2	75	14	0	2	69	N	75	2.502	1
-Contig131_chr28_6481806_6483783	138	C	T	36.2	chr28	6481953	C	12	0	2	63	12	0	2	63	20	0	2	87	11	0	2	60	10	0	2	57	12	0	2	63	Y	10	0.387	0
-Contig141_chr28_10027332_10028242	780	T	G	74.8	chr28	10028095	T	10	0	2	57	11	0	2	60	14	0	2	69	10	0	2	57	7	0	2	48	9	0	2	54	Y	19	3.348	0
-Contig144_chr28_15468203_15470548	743	G	A	20.0	chr28	15468942	G	13	0	2	66	12	0	2	63	10	0	2	57	11	0	2	60	16	0	2	75	7	0	2	48	N	14	0.053	0
-Contig47_chr28_21311718_21312366	541	G	A	116.0	chr28	21312258	G	9	0	2	54	6	0	2	45	12	0	2	63	6	0	2	45	5	0	2	45	12	0	2	63	N	9	0.240	0
-Contig60_chr28_30197166_30197364	92	T	C	164.0	chr28	30197258	T	10	0	2	57	13	0	2	66	15	0	2	72	16	0	2	75	12	0	2	63	11	0	2	60	N	369	1.139	0
-Contig201_chr28_36339953_36341322	260	C	T	6.36	chr28	36340213	T	4	0	2	39	0	0	-1	0	2	0	2	33	2	0	2	33	3	0	2	36	4	0	2	39	N	4	0.183	0
-Contig175_chr28_36441165_36441915	68	T	C	3.83	chr28	36441234	T	4	4	1	15	6	0	2	45	12	0	2	63	15	0	2	72	6	0	2	45	9	0	2	54	N	4	1.610	2
-Contig29_chr29_4726399_4727143	559	A	T	163.0	chr29	4726955	A	15	0	2	72	18	0	2	81	18	0	2	81	16	0	2	75	11	0	2	60	14	0	2	72	Y	161	3.114	0
-Contig48_chr29_13129286_13130137	232	A	G	92.2	chr29	13129514	G	13	0	2	66	11	0	2	60	19	0	2	84	16	0	2	75	11	0	2	60	17	0	2	78	Y	337	2.581	1
-Contig64_chr29_15736891_15737257	344	T	C	40.4	chr29	15737233	C	1	0	2	30	0	0	-1	0	0	0	-1	0	2	0	2	33	0	0	-1	0	0	0	-1	0	N	58	+99.	0
-Contig33_chr29_17000374_17000921	71	C	T	48.6	chr29	17000441	-	4	0	2	39	9	0	2	54	12	0	2	66	10	0	2	57	7	0	2	48	4	0	2	39	N	26	5.491	0
-Contig34_chr29_17581796_17584016	2105	C	T	126.0	chr29	17583890	T	14	0	2	69	11	0	2	60	18	0	2	81	12	0	2	63	10	0	2	57	10	0	2	57	Y	22	2.208	0
-Contig19_chr29_20976080_20977761	1007	G	A	115.0	chr29	20977076	G	19	0	2	84	22	0	2	93	22	0	2	93	22	0	2	93	11	0	2	60	13	0	2	66	Y	4	1.915	0
-Contig51_chr29_21149853_21150467	266	C	T	146.0	chr29	21150118	C	12	0	2	63	12	0	2	63	23	0	2	96	14	0	2	69	13	0	2	66	10	0	2	57	Y	4	0.051	0
-Contig1_chr30_5992217_5993068	106	C	T	129.0	chr30	5992319	C	10	0	2	57	11	0	2	60	7	0	2	48	11	0	2	60	10	0	2	57	12	0	2	63	Y	76	1.079	0
-Contig1_chr30_8232878_8233406	402	C	T	127.0	chr30	8233264	C	8	0	2	51	19	0	2	84	16	0	2	75	18	0	2	81	10	0	2	57	14	0	2	69	Y	358	5.283	0
-Contig108_chr30_9436961_9437520	546	C	T	39.8	chr30	9437502	C	7	0	2	48	5	0	2	42	2	0	2	33	7	0	2	48	5	0	2	42	7	0	2	48	Y	64	+99.	0
-Contig165_chr30_25804389_25804926	190	T	C	126.0	chr30	25804592	C	3	0	2	36	8	0	2	51	7	0	2	48	10	0	2	57	7	0	2	48	4	0	2	39	Y	113	0.329	0
-Contig193_chr30_27495616_27496125	434	C	A	234.0	chr30	27496024	C	13	0	2	66	16	0	2	75	25	0	2	102	16	0	2	75	13	0	2	66	14	0	2	69	Y	76	2.621	0
-Contig114_chr30_33636712_33637208	34	C	T	142.0	chr30	33636744	C	7	0	2	48	4	1	2	20	6	0	2	45	6	0	2	45	3	4	1	29	5	0	2	42	Y	14	8.028	0
-Contig38_chr31_5164423_5166573	2074	C	T	134.0	chr31	5166501	T	13	0	2	66	10	0	2	57	17	0	2	78	11	0	2	60	17	0	2	78	10	0	2	57	Y	58	+99.	0
-Contig6_chr31_9649308_9650149	431	G	T	162.0	chr31	9649742	G	31	0	2	120	23	0	2	96	17	0	2	78	17	0	2	78	10	0	2	57	16	0	2	75	Y	98	2.200	0
-Contig85_chr31_12242872_12245082	38	G	C	92.4	chr31	12242910	G	1	0	2	30	6	0	2	45	9	0	2	54	8	0	2	51	5	0	2	42	9	0	2	54	N	2	2.340	0
-Contig7_chr31_12384974_12386400	305	C	T	69.6	chr31	12385267	C	6	0	2	45	10	0	2	57	11	0	2	60	11	0	2	60	9	0	2	54	12	0	2	63	Y	44	1.165	0
-Contig90_chr31_17267583_17267778	81	C	A	143.0	chr31	17267665	C	20	0	2	87	6	0	2	45	14	0	2	72	22	0	2	93	17	0	2	78	15	0	2	72	N	7	0.565	0
-Contig68_chr31_20000241_20000597	215	C	T	131.0	chr31	20000454	T	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	0	0	-1	0	Y	5	3.383	1
-Contig137_chr31_23357653_23358568	885	G	A	119.0	chr31	23358545	G	5	0	2	42	3	0	2	36	3	0	2	36	2	0	2	33	3	0	2	36	4	0	2	39	Y	11	+99.	0
-Contig17_chr31_26433828_26434459	498	T	C	9.79	chr31	26434322	T	18	0	2	81	10	0	2	57	15	0	2	72	13	0	2	66	16	0	2	75	15	0	2	72	Y	137	4.814	0
-Contig9_chr32_19479532_19479735	12	A	G	20.7	chr32	19479544	A	1	0	2	30	2	0	2	33	1	0	2	30	5	0	2	42	3	0	2	36	3	0	2	36	N	17	+99.	0
-Contig30_chr32_25902721_25905783	208	C	G	162.0	chr32	25902927	G	11	0	2	60	13	0	2	66	11	0	2	60	12	0	2	63	7	0	2	48	11	0	2	60	Y	145	0.322	2
-Contig7_chr32_27789513_27789926	20	G	A	7.19	chr32	27789530	A	0	0	-1	0	4	0	2	39	4	0	2	39	4	0	2	39	2	0	2	33	6	0	2	45	Y	14	+99.	0
-Contig42_chr32_38900713_38901320	320	A	G	134.0	chr32	38901021	T	12	0	2	63	10	0	2	57	9	11	1	104	5	0	2	42	19	0	2	84	7	6	1	56	Y	71	0.165	0
-Contig18_chr33_22207246_22209159	1363	G	T	51.5	chr33	22208619	-	16	0	2	75	8	0	2	51	11	0	2	60	10	0	2	57	15	0	2	72	12	0	2	63	Y	59	2.560	0
-Contig104_chr33_22483642_22484187	424	C	T	140.0	chr33	22484054	T	13	0	2	66	16	0	2	75	9	0	2	54	15	0	2	72	13	0	2	66	10	0	2	57	Y	36	0.404	0
-Contig170_chr33_26189421_26189940	292	T	C	98.4	chr33	26189703	T	21	0	2	90	13	0	2	66	15	0	2	72	13	0	2	66	19	0	2	84	13	0	2	66	Y	23	0.307	0
-Contig113_chr34_13341080_13341643	236	C	T	90.7	chr34	13341316	C	4	0	2	39	2	0	2	33	8	0	2	51	4	0	2	39	8	0	2	51	3	0	2	36	Y	47	0.412	3
-Contig405_chr34_14415672_14415979	59	A	G	36.2	chr34	14415731	G	8	0	2	51	2	0	2	33	8	0	2	51	6	0	2	48	3	0	2	36	7	0	2	48	Y	45	0.405	1
-Contig21_chr34_16422980_16425681	2009	G	A	19.4	chr34	16424960	G	0	0	-1	0	0	0	-1	0	0	0	-1	0	5	0	2	42	0	0	-1	0	0	0	-1	0	Y	28	0.196	0
-Contig41_chr34_16544482_16545449	46	T	C	102.0	chr34	16544523	T	5	0	2	42	11	0	2	60	6	0	2	45	0	2	0	3	7	0	2	48	8	0	2	51	Y	215	1.156	0
-Contig8_chr34_18474513_18475673	1122	C	A	129.0	chr34	18475628	A	8	0	2	51	15	0	2	72	13	0	2	66	17	0	2	78	13	0	2	66	6	0	2	45	Y	61	0.123	2
-Contig152_chr34_31794848_31795540	242	G	A	93.2	chr34	31795093	G	11	0	2	60	24	0	2	99	17	0	2	78	15	0	2	72	18	0	2	81	17	0	2	78	Y	123	2.780	0
-Contig28_chr34_41708848_41712034	1381	A	G	78.2	chr34	41710232	A	11	0	2	60	17	0	2	78	15	0	2	72	16	0	2	75	15	0	2	72	14	0	2	69	Y	236	0.234	0
-Contig85_chr34_42798284_42800584	1845	C	T	171.0	chr34	42800126	T	5	0	2	42	7	0	2	48	6	0	2	45	7	0	2	48	6	0	2	45	2	0	2	33	Y	5	2.787	0
-Contig47_chr35_3666773_3667898	348	G	T	124.0	chr35	3667121	G	9	0	2	54	20	0	2	87	18	0	2	81	15	0	2	72	12	0	2	63	14	0	2	69	Y	285	0.235	0
-Contig195_chr35_15722500_15722741	205	G	A	4.08	chr35	15722718	G	3	0	2	36	5	0	2	42	1	0	2	30	6	0	2	45	1	0	2	30	1	0	2	30	N	43	+99.	0
-Contig101_chr35_19513178_19513697	62	C	T	112.0	chr35	19513238	C	12	0	2	63	7	0	2	48	13	0	2	66	7	0	2	48	5	0	2	42	8	0	2	51	N	115	3.135	0
-Contig19_chr35_23887144_23888282	90	C	A	10.1	chr35	23887242	-	3	3	1	12	4	4	1	19	8	6	1	37	4	3	1	11	8	3	2	7	9	3	2	11	Y	105	0.199	0
-Contig47_chr35_24382042_24382526	33	G	A	87.0	chr35	24382076	G	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	4	0	2	39	2	0	2	33	Y	71	+99.	0
-Contig77_chr35_24796947_24797172	65	A	G	52.1	chr35	24797009	A	7	0	2	48	5	0	2	42	8	0	2	51	6	0	2	45	12	0	2	63	10	0	2	57	N	11	1.401	3
-Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0
-Contig5_chr36_4562983_4563634	343	C	T	151.0	chr36	4563324	T	20	0	2	87	20	0	2	87	23	0	2	96	24	0	2	99	9	0	2	54	8	0	2	51	Y	40	1.169	0
-Contig75_chr36_7885319_7885588	53	G	A	25.7	chr36	7885372	G	10	0	2	57	8	0	2	51	13	0	2	66	7	0	2	48	4	0	2	39	7	0	2	48	N	7	2.653	0
-Contig184_chr36_18956191_18958552	187	A	G	11.5	chr36	18956371	G	10	0	2	57	11	0	2	60	21	0	2	90	14	0	2	69	7	0	2	48	4	0	2	39	N	278	1.434	2
-Contig12_chr36_21557176_21557828	513	T	A	159.0	chr36	21557695	A	11	0	2	60	14	0	2	69	21	0	2	90	12	0	2	63	15	0	2	72	11	0	2	60	Y	55	0.222	0
-Contig2_chr36_22436067_22436794	653	C	T	73.0	chr36	22436730	C	11	0	2	60	16	0	2	75	13	0	2	66	11	0	2	60	21	0	2	90	21	0	2	90	Y	9	0.534	0
-Contig133_chr36_32954045_32955409	136	A	G	116.0	chr36	32954182	A	16	0	2	75	15	0	2	72	20	0	2	87	11	0	2	60	18	0	2	81	13	0	2	66	Y	74	3.772	1
-Contig53_chr37_6665763_6665919	116	C	T	111.0	chr37	6665875	C	9	0	2	54	9	0	2	54	5	0	2	42	9	0	2	54	8	0	2	51	10	0	2	57	N	15	10.875	1
-Contig42_chr37_9589176_9591269	252	G	A	25.1	chr37	9589430	G	10	0	2	40	13	0	2	66	18	0	2	81	21	0	2	90	9	0	2	54	17	0	2	78	N	67	1.170	2
-Contig2_chr37_17134963_17136513	1140	A	C	158.0	chr37	17136092	A	14	0	2	69	24	0	2	99	17	0	2	78	16	0	2	75	15	0	2	75	13	0	2	66	Y	12	0.053	1
-Contig18_chr37_17147806_17149851	291	T	G	112.0	chr37	17148084	T	4	6	1	45	16	0	2	75	17	0	2	78	14	0	2	69	22	0	2	93	13	0	2	66	Y	41	4.442	0
-Contig64_chr37_17606895_17607534	565	C	T	30.2	chr37	17607439	A	9	0	2	54	16	0	2	75	20	0	2	87	14	0	2	69	16	0	2	75	10	0	2	57	N	20	1.622	0
-Contig126_chr37_21587881_21590621	373	G	T	132.0	chr37	21588256	G	11	0	2	60	11	0	2	60	23	0	2	96	12	0	2	63	8	0	2	51	18	0	2	81	Y	12	0.549	0
-Contig2_chr37_31197993_31198256	182	C	T	39.6	chr37	31198171	T	6	0	2	45	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	12	0	2	63	N	2	0.595	0
-Contig46_chr37_31852376_31853555	825	A	G	111.0	chr37	31853191	G	19	0	2	84	14	0	2	69	15	0	2	72	7	0	2	48	8	0	2	51	16	0	2	75	Y	17	0.128	1
-Contig7_chr38_12217200_12218387	1163	A	T	44.4	chr38	12218353	A	11	0	2	60	13	0	2	66	17	0	2	78	10	0	2	57	11	0	2	60	11	0	2	60	Y	67	+99.	0
-Contig15_chr38_12282020_12282253	150	C	T	156.0	chr38	12282164	A	17	0	2	78	11	0	2	60	19	0	2	84	14	0	2	69	5	0	2	42	14	0	2	69	Y	26	2.952	1
-Contig4_chr38_14807432_14807747	275	A	G	36.5	chr38	14807715	G	1	0	2	30	2	0	2	33	2	0	2	33	4	0	2	39	1	0	2	30	0	0	-1	0	Y	28	+99.	1
-Contig6_chr38_16185744_16186110	325	A	G	74.9	chr38	16186061	A	5	0	2	42	3	0	2	36	9	0	2	54	7	0	2	48	1	0	2	30	12	0	2	63	Y	40	+99.	0
-Contig265_chrX_2689247_2689484	114	C	G	103.0	chrX	2689356	C	11	0	2	60	9	0	2	54	13	0	2	66	16	0	2	75	14	0	2	69	10	0	2	57	N	2	9.232	1
-Contig122_chrX_6026976_6027327	330	C	T	79.4	chrX	6027303	C	3	0	2	36	3	0	2	36	3	0	2	36	4	0	2	39	3	0	2	36	6	0	2	45	Y	30	+99.	0
-Contig15_chrX_15659909_15660340	15	A	C	14.9	chrX	15659924	C	1	0	2	30	1	0	2	30	3	0	2	36	6	0	2	45	2	0	2	33	0	0	-1	0	Y	216	+99.	1
-Contig12_chrX_23243561_23244412	479	C	G	67.7	chrX	23244037	C	2	0	2	33	4	2	2	8	2	6	1	43	7	0	2	48	6	0	2	45	4	0	2	39	Y	208	1.620	0
-Contig113_chrX_26287829_26288398	385	C	T	59.6	chrX	26288213	C	9	0	2	54	9	0	2	54	17	0	2	78	11	0	2	60	3	8	1	44	4	0	2	39	N	13	0.077	0
-Contig186_chrX_29118735_29118939	192	G	A	7.01	chrX	29118931	G	1	0	2	30	7	0	2	48	4	0	2	39	5	0	2	42	8	0	2	51	4	0	2	39	N	50	+99.	0
-Contig237_chrX_31256648_31257654	165	T	A	246.0	chrX	31256814	T	7	0	2	48	23	0	2	96	19	0	2	84	17	0	2	78	14	0	2	69	8	0	2	51	Y	37	1.481	0
-Contig25_chrX_40729418_40730089	332	C	T	31.2	chrX	40729745	C	0	0	-1	0	2	0	2	33	4	0	2	39	5	0	2	42	3	0	2	36	3	0	2	36	Y	34	0.212	0
-Contig90_chrX_57430715_57431566	548	C	T	116.0	chrX	57431266	T	9	0	2	54	18	0	2	81	13	0	2	66	14	0	2	69	8	0	2	54	7	0	2	48	Y	261	0.154	1
-Contig133_chrX_84833782_84834125	182	G	A	69.7	chrX	84833962	G	5	0	2	42	18	0	2	81	12	0	2	63	19	0	2	84	6	3	1	27	7	0	2	48	N	619	0.278	0
-Contig129_chrX_90586053_90586467	135	A	T	120.0	chrX	90586195	A	1	0	2	30	6	0	2	45	8	0	2	51	5	0	2	42	1	0	2	30	2	0	2	33	N	637	0.245	0
-Contig125_chrX_93319363_93320877	349	A	C	145.0	chrX	93319721	A	4	0	2	39	6	0	2	45	11	0	2	60	10	0	2	57	13	0	2	66	6	0	2	45	Y	59	1.686	0
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/add_fst_column/add_fst_column.gd_snp
--- a/test-data/test_out/add_fst_column/add_fst_column.gd_snp	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-Contig113_chr5_11052263_11052603	28	C	T	38.2	chr5	11052280	C	1	2	1	12	3	2	1	10	5	0	2	42	2	1	2	13	3	0	2	36	8	0	2	51	Y	161	+99.	0	0.1636
-Contig215_chr5_70946445_70947428	363	T	G	28.2	chr5	70946809	C	4	0	2	39	0	5	0	12	9	0	2	54	6	0	2	45	3	3	2	1	9	0	2	54	N	43	0.153	0	0.3846
-Contig132_chr7_20426224_20428145	1815	A	G	28.3	chr7	20428041	A	11	1	2	43	12	0	2	63	19	0	2	84	23	0	2	96	14	0	2	69	10	0	2	57	N	11	0.264	0	0.0213
-Contig30_chr8_17147743_17147923	13	G	A	105.0	chr8	17147756	A	1	3	1	19	1	0	2	30	3	0	2	36	1	0	2	30	1	0	2	30	3	0	2	36	N	6	+99.	0	0.4286
-Contig44_chr8_71186368_71188207	1455	G	T	147.0	chr8	71187818	G	4	10	1	74	3	0	2	36	20	0	2	87	12	0	2	63	8	0	2	51	10	0	2	57	Y	88	0.036	0	0.4167
-Contig103_chr11_8844784_8845095	214	T	G	135.0	chr11	8844993	T	1	1	2	12	10	0	2	57	5	4	1	26	2	3	1	13	2	7	1	34	1	1	2	13	Y	75	0.731	0	0.2101
-Contig37_chr13_15910164_15910426	245	G	A	32.9	chr13	-1	N	3	4	1	41	4	0	2	39	3	0	2	36	4	0	2	39	3	0	2	36	10	0	2	57	N	-1	2.159	1	0.2222
-Contig50_chr17_12247973_12249183	889	G	T	47.6	chr17	12248878	G	0	1	2	9	8	0	2	51	9	2	2	21	7	2	2	21	15	0	2	72	0	3	0	9	Y	1	1.181	0	0.0150
-Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0	0.1429
-Contig77_chr22_49764414_49764875	353	C	A	148.0	chr22	49764777	C	7	4	1	65	18	0	2	81	16	0	2	75	20	0	2	87	4	3	1	52	9	4	1	67	Y	12	0.941	0	0.0741
-Contig61_chr24_30465488_30465834	149	G	T	68.2	chr24	30465637	G	13	0	2	66	4	2	2	11	18	0	2	81	11	0	2	60	11	0	2	60	9	0	2	54	N	99	0.105	2	0.0556
-Contig59_chr25_18196776_18197707	785	G	A	112.0	chr25	18197551	G	8	10	1	42	27	0	2	108	21	0	2	90	18	0	2	81	10	0	2	57	14	0	2	69	N	36	3.625	0	0.1250
-Contig85_chr27_45471750_45472022	211	G	A	53.1	chr27	45471964	G	18	0	2	81	10	0	2	57	15	0	2	72	0	13	0	36	16	0	2	75	14	0	2	69	N	75	2.502	1	0.3023
-Contig175_chr28_36441165_36441915	68	T	C	3.83	chr28	36441234	T	4	4	1	15	6	0	2	45	12	0	2	63	15	0	2	72	6	0	2	45	9	0	2	54	N	4	1.610	2	0.1667
-Contig114_chr30_33636712_33637208	34	C	T	142.0	chr30	33636744	C	7	0	2	48	4	1	2	20	6	0	2	45	6	0	2	45	3	4	1	29	5	0	2	42	Y	14	8.028	0	0.0435
-Contig42_chr32_38900713_38901320	320	A	G	134.0	chr32	38901021	T	12	0	2	63	10	0	2	57	9	11	1	104	5	0	2	42	19	0	2	84	7	6	1	56	Y	71	0.165	0	0.2821
-Contig41_chr34_16544482_16545449	46	T	C	102.0	chr34	16544523	T	5	0	2	42	11	0	2	60	6	0	2	45	0	2	0	3	7	0	2	48	8	0	2	51	Y	215	1.156	0	0.1429
-Contig19_chr35_23887144_23888282	90	C	A	10.1	chr35	23887242	-	3	3	1	12	4	4	1	19	8	6	1	37	4	3	1	11	8	3	2	7	9	3	2	11	Y	105	0.199	0	0.0051
-Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0	0.0986
-Contig18_chr37_17147806_17149851	291	T	G	112.0	chr37	17148084	T	4	6	1	45	16	0	2	75	17	0	2	78	14	0	2	69	22	0	2	93	13	0	2	66	Y	41	4.442	0	0.1304
-Contig12_chrX_23243561_23244412	479	C	G	67.7	chrX	23244037	C	2	0	2	33	4	2	2	8	2	6	1	43	7	0	2	48	6	0	2	45	4	0	2	39	Y	208	1.620	0	0.0256
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/average_fst/average_fst.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/average_fst/average_fst.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,1 @@
+average Fst is 0.16461, using 21 SNPs
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/coverage_distributions/coverage.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/coverage_distributions/coverage.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,39 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Coverage distributions Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 01:57:24 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="coverage.pdf">coverage.pdf</a></li>
+            <li><a href="coverage.txt">coverage.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Data source: sequence coverage</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+      </div>
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/coverage_distributions/coverage.pdf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/coverage_distributions/coverage.pdf	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,363 @@
+%PDF-1.4
+%���ρ�\r
+1 0 obj
+<<
+/CreationDate (D:20120403135724)
+/ModDate (D:20120403135724)
+/Title (R Graphics Output)
+/Producer (R 2.11.0)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<<
+/Type /Catalog
+/Pages 3 0 R
+>>
+endobj
+5 0 obj
+<<
+/Type /Page
+/Parent 3 0 R
+/Contents 6 0 R
+/Resources 4 0 R
+>>
+endobj
+6 0 obj
+<<
+/Length 7 0 R
+>>
+stream
+1 J 1 j q
+Q q 59.04 73.44 630.72 299.52 re W n
+1.000 0.000 0.000 RG
+2.25 w
+[] 0 d
+1 J
+1 j
+10.00 M
+82.40 174.26 m
+106.73 206.89 l
+131.07 206.89 l
+155.40 263.98 l
+179.73 263.98 l
+204.07 223.20 l
+228.40 312.93 l
+252.73 304.77 l
+277.07 255.83 l
+301.40 280.30 l
+325.73 312.93 l
+350.07 321.08 l
+374.40 255.83 l
+398.73 263.98 l
+423.07 231.36 l
+447.40 231.36 l
+471.73 174.26 l
+496.07 215.04 l
+520.40 174.26 l
+544.73 133.47 l
+569.07 157.95 l
+593.40 109.00 l
+617.73 109.00 l
+642.07 92.69 l
+666.40 84.53 l
+S
+Q q
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+82.40 73.44 m 569.07 73.44 l S
+82.40 73.44 m 82.40 66.24 l S
+204.07 73.44 m 204.07 66.24 l S
+325.73 73.44 m 325.73 66.24 l S
+447.40 73.44 m 447.40 66.24 l S
+569.07 73.44 m 569.07 66.24 l S
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 79.06 47.52 Tm (0) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 200.73 47.52 Tm (5) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 319.06 47.52 Tm (10) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 440.73 47.52 Tm (15) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 562.39 47.52 Tm (20) Tj
+ET
+59.04 84.53 m 59.04 345.55 l S
+59.04 84.53 m 51.84 84.53 l S
+59.04 149.79 m 51.84 149.79 l S
+59.04 215.04 m 51.84 215.04 l S
+59.04 280.30 m 51.84 280.30 l S
+59.04 345.55 m 51.84 345.55 l S
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 72.86 Tm (0.00) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 138.11 Tm (0.02) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 203.37 Tm (0.04) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 268.62 Tm (0.06) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 333.88 Tm (0.08) Tj
+ET
+59.04 73.44 m
+689.76 73.44 l
+689.76 372.96 l
+59.04 372.96 l
+59.04 73.44 l
+S
+Q q
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 348.69 18.72 Tm [(Co) 15 (v) 25 (er) 10 (age)] TJ
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 195.28 Tm [(Propor) -40 (tion)] TJ
+ET
+Q q 59.04 73.44 630.72 299.52 re W n
+1.000 1.000 0.000 RG
+2.25 w
+[] 0 d
+1 J
+1 j
+10.00 M
+82.40 157.95 m
+106.73 166.10 l
+131.07 231.36 l
+155.40 215.04 l
+179.73 280.30 l
+204.07 263.98 l
+228.40 272.14 l
+252.73 231.36 l
+277.07 345.55 l
+301.40 321.08 l
+325.73 288.45 l
+350.07 329.24 l
+374.40 255.83 l
+398.73 280.30 l
+423.07 247.67 l
+447.40 239.51 l
+471.73 215.04 l
+496.07 157.95 l
+520.40 174.26 l
+544.73 166.10 l
+569.07 133.47 l
+593.40 92.69 l
+617.73 100.85 l
+642.07 100.85 l
+666.40 100.85 l
+S
+0.000 1.000 0.000 RG
+82.40 141.63 m
+106.73 166.10 l
+131.07 182.42 l
+155.40 182.42 l
+179.73 231.36 l
+204.07 198.73 l
+228.40 206.89 l
+252.73 263.98 l
+277.07 263.98 l
+301.40 263.98 l
+325.73 239.51 l
+350.07 280.30 l
+374.40 198.73 l
+398.73 304.77 l
+423.07 231.36 l
+447.40 247.67 l
+471.73 239.51 l
+496.07 239.51 l
+520.40 215.04 l
+544.73 198.73 l
+569.07 231.36 l
+593.40 149.79 l
+617.73 166.10 l
+642.07 166.10 l
+666.40 100.85 l
+S
+0.000 1.000 1.000 RG
+82.40 133.47 m
+106.73 133.47 l
+131.07 255.83 l
+155.40 231.36 l
+179.73 272.14 l
+204.07 272.14 l
+228.40 337.40 l
+252.73 280.30 l
+277.07 280.30 l
+301.40 280.30 l
+325.73 337.40 l
+350.07 288.45 l
+374.40 296.61 l
+398.73 223.20 l
+423.07 272.14 l
+447.40 255.83 l
+471.73 239.51 l
+496.07 190.57 l
+520.40 117.16 l
+544.73 125.32 l
+569.07 149.79 l
+593.40 109.00 l
+617.73 109.00 l
+642.07 92.69 l
+666.40 92.69 l
+S
+0.000 0.000 1.000 RG
+82.40 157.95 m
+106.73 190.57 l
+131.07 215.04 l
+155.40 288.45 l
+179.73 231.36 l
+204.07 272.14 l
+228.40 272.14 l
+252.73 280.30 l
+277.07 296.61 l
+301.40 361.87 l
+325.73 329.24 l
+350.07 329.24 l
+374.40 296.61 l
+398.73 272.14 l
+423.07 215.04 l
+447.40 239.51 l
+471.73 190.57 l
+496.07 157.95 l
+520.40 166.10 l
+544.73 125.32 l
+569.07 100.85 l
+593.40 92.69 l
+617.73 109.00 l
+642.07 84.53 l
+666.40 92.69 l
+S
+1.000 0.000 1.000 RG
+82.40 198.73 m
+106.73 157.95 l
+131.07 215.04 l
+155.40 215.04 l
+179.73 304.77 l
+204.07 223.20 l
+228.40 321.08 l
+252.73 361.87 l
+277.07 280.30 l
+301.40 280.30 l
+325.73 329.24 l
+350.07 280.30 l
+374.40 337.40 l
+398.73 231.36 l
+423.07 272.14 l
+447.40 223.20 l
+471.73 174.26 l
+496.07 198.73 l
+520.40 149.79 l
+544.73 117.16 l
+569.07 100.85 l
+593.40 109.00 l
+617.73 100.85 l
+642.07 84.53 l
+666.40 100.85 l
+S
+1.000 0.000 0.000 rg
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+642.24 362.16 8.64 -7.20 re B
+1.000 1.000 0.000 rg
+642.24 347.76 8.64 -7.20 re B
+0.000 1.000 0.000 rg
+642.24 333.36 8.64 -7.20 re B
+0.000 1.000 1.000 rg
+642.24 318.96 8.64 -7.20 re B
+0.000 0.000 1.000 rg
+642.24 304.56 8.64 -7.20 re B
+1.000 0.000 1.000 rg
+642.24 290.16 8.64 -7.20 re B
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 354.25 Tm (PB1) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 339.85 Tm (PB2) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 325.45 Tm (PB3) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 311.05 Tm (PB4) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 296.65 Tm (PB6) Tj
+ET
+BT
+/F2 1 Tf 12.00 0.00 -0.00 12.00 661.68 282.25 Tm (PB8) Tj
+ET
+Q
+endstream
+endobj
+7 0 obj
+4763
+endobj
+3 0 obj
+<<
+/Type /Pages
+/Kids [
+5 0 R
+]
+/Count 1
+/MediaBox [0 0 720 432]
+>>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <</F2 9 0 R >>
+/ExtGState << >>
+>>
+endobj
+8 0 obj
+<<
+/Type /Encoding
+/BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+9 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica
+/Encoding 8 0 R
+>> endobj
+xref
+0 10
+0000000000 65535 f 
+0000000021 00000 n 
+0000000164 00000 n 
+0000005129 00000 n 
+0000005212 00000 n 
+0000000213 00000 n 
+0000000293 00000 n 
+0000005109 00000 n 
+0000005293 00000 n 
+0000005550 00000 n 
+trailer
+<<
+/Size 10
+/Info 1 0 R
+/Root 2 0 R
+>>
+startxref
+5646
+%%EOF
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/coverage_distributions/coverage.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/coverage_distributions/coverage.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,18 @@
+
+            0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
+       PB1  2  6 10 15 21 25 32 39 44 50 57 64 70 75 80 84 87 91 94 95
+       PB2  2  4  9 13 19 24 30 35 43 50 56 64 69 75 80 85 89 91 94 96
+       PB3  1  4  7 10 14 18 22 27 33 38 43 49 52 59 64 69 73 78 82 86
+       PB4  1  3  8 12 18 24 32 38 44 50 57 64 70 74 80 85 90 93 94 96
+       PB6  2  5  9 15 20 26 31 37 44 52 60 67 74 80 84 88 92 94 96 98
+       PB8  3  5  9 13 20 24 32 40 46 52 60 66 73 78 84 88 91 94 96 97
+
+
+           20 21 22 23 24
+       PB1 97 98 99 99 99
+       PB2 98 98 98 99 99
+       PB3 90 92 95 97 98
+       PB4 98 98 99 99 99
+       PB6 98 98 99 99 99
+       PB8 98 98 99 99 99
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/dpmix/dpmix.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/dpmix/dpmix.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,56 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>dpmix Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:22:23 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="dpmix.pdf">dpmix.pdf</a></li>
+            <li><a href="misc.txt">misc.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Data source: sequence coverage</li>
+            <li>Switch penalty: 10</li>
+            <li>Also analyze random chromosome: no</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Populations
+<ul>
+<li>
+Ancestral population 1
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+</ol>
+</li>
+<li>
+Ancestral population 2
+<ol>
+<li>PB3</li>
+<li>PB4</li>
+</ol>
+</li>
+<li>
+Potentially admixed
+<ol>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+</li>
+</ul>
+      </div>
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/dpmix/dpmix.pdf
Binary file test-data/test_out/dpmix/dpmix.pdf has changed
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/dpmix/dpmix.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/dpmix/dpmix.tabular	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,78 @@
+chr1	0	125154818	0	PB6
+chr1	0	125154818	0	PB8
+chr2	0	85243509	0	PB6
+chr2	0	85243509	0	PB8
+chr3	0	92410450	0	PB6
+chr3	0	92410450	0	PB8
+chr4	0	75619257	0	PB6
+chr4	0	75619257	0	PB8
+chr5	0	90203461	0	PB6
+chr5	0	90203461	0	PB8
+chr6	0	74848993	0	PB6
+chr6	0	74848993	0	PB8
+chr7	0	55833450	0	PB6
+chr7	0	55833450	0	PB8
+chr8	0	71187818	0	PB6
+chr8	0	71187818	0	PB8
+chr9	0	39008708	0	PB6
+chr9	0	39008708	0	PB8
+chr10	0	59511126	0	PB6
+chr10	0	59511126	0	PB8
+chr11	0	53408638	0	PB6
+chr11	0	53408638	2	PB8
+chr12	0	71364712	0	PB6
+chr12	0	71364712	0	PB8
+chr13	0	66022136	0	PB6
+chr13	0	66022136	0	PB8
+chr14	0	56768832	0	PB6
+chr14	0	56768832	0	PB8
+chr15	0	45107015	0	PB6
+chr15	0	45107015	0	PB8
+chr16	0	49888550	0	PB6
+chr16	0	49888550	0	PB8
+chr17	0	61714821	2	PB6
+chr17	0	61714821	0	PB8
+chr18	0	58130413	0	PB6
+chr18	0	58130413	0	PB8
+chr19	0	56559549	0	PB6
+chr19	0	56559549	0	PB8
+chr20	0	46551277	0	PB6
+chr20	0	46551277	0	PB8
+chr21	0	43475551	0	PB6
+chr21	0	43475551	0	PB8
+chr22	0	62406302	0	PB6
+chr22	0	62406302	0	PB8
+chr23	0	48285470	0	PB6
+chr23	0	48285470	0	PB8
+chr24	0	46598214	0	PB6
+chr24	0	46598214	0	PB8
+chr25	0	51074589	0	PB6
+chr25	0	51074589	0	PB8
+chr26	0	36606979	0	PB6
+chr26	0	36606979	0	PB8
+chr27	0	45471964	2	PB6
+chr27	0	45471964	2	PB8
+chr28	0	36441234	0	PB6
+chr28	0	36441234	0	PB8
+chr29	0	21150118	0	PB6
+chr29	0	21150118	0	PB8
+chr30	0	33636744	2	PB6
+chr30	0	33636744	0	PB8
+chr31	0	26434322	0	PB6
+chr31	0	26434322	0	PB8
+chr32	0	38901021	2	PB6
+chr32	0	38901021	0	PB8
+chr33	0	26189703	0	PB6
+chr33	0	26189703	0	PB8
+chr34	0	42800126	2	PB6
+chr34	0	42800126	2	PB8
+chr35	0	25394646	2	PB6
+chr35	0	25394646	2	PB8
+chr36	0	32954182	0	PB6
+chr36	0	32954182	0	PB8
+chr37	0	31853191	0	PB6
+chr37	0	31853191	0	PB8
+chr38	0	16186061	0	PB6
+chr38	0	16186061	0	PB8
+chrX	0	93319721	2	PB6
+chrX	0	93319721	2	PB8
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/dpmix/misc.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/dpmix/misc.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,11 @@
+state 2 agrees with: PB1 PB2
+state 0 agrees with: PB3 PB4
+
+PB6: 360 SNPs where state 2 is as likely as state 0
+PB6: 12 SNPs where state 0 is more likely than state 2
+
+PB8: 358 SNPs where state 2 is as likely as state 0
+PB8: 14 SNPs where state 0 is more likely than state 2
+
+PB6: 0 = 83.7%, 1 = 0.0%, 2 = 16.3%
+PB8: 0 = 87.6%, 1 = 0.0%, 2 = 12.4%
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/evaluate_population_numbers/evaluate_population_numbers.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/evaluate_population_numbers/evaluate_population_numbers.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,2 @@
+CV error (K=1): 0.07423
+CV error (K=2): 0.07708
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/extract_flanking_dna/extract_flanking_dna.txt
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/extract_primers/extract_primers.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/extract_primers/extract_primers.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,1265 @@
+> Contig161_chr1_4641264_4641879 115 C T 0.323016
+
+  1 TCCGAACCGCTAAATCCTGACGACTGTTCAGTGAGAACGGGnTTCCAGCTCAGTGGAGAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACTCAGAGCTTATGTGATGCACCGTCGTGCCCGTGTCTGACTAAATGTGTTGCCAGAGAA
+                                                            <<<<
+
+121 CAAAACGAAAGCCCCTATT
+    <<<<<<<<<<<<<<<<   
+
+> Contig86_chr1_30984450_30985684 670 C T 0.031427
+
+  1 TAATTCATGACGACTGCAGAAGGGCACTCAGAGGCAATTCTACTTGAGGATATTGTCTGG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TATACTCTGTCCTTGCTCAGGACATCAGTGAGAACATAGAAACATTCACnTCCCCACACC
+                                                                
+
+121 GAAAGCGTCTGTAGACCGGCCCACGGGCCGAAGTCTTTGCATTTCCTCTTGCCATGCACG
+                                                                
+
+181 AGCATTCCCAGTGGCAATCAGGGGCCAGCCCTTCTGTTTGGCCTCTGCAAGCTTGTATCC
+                                            <<<<<<<<<<<<<<<<<<<<
+
+241 TTG
+       
+
+> Contig21_chr1_60697952_60699446 307 G A 0.507396
+
+  1 TCTGGGGCCATGTTTCTGAAGTAAGGCTGTTTCTGCAGCCTTGCGGGCTGTGTCTTGCTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 nCACCCCTTAATTCTTACCTGTAGGTGGTATTTGGTAGAGTGGAGTAAAACTGGAAACTG
+                                                              <<
+
+121 GTTCTCTGTGTTCCTGCATCT
+    <<<<<<<<<<<<<<<<<<   
+
+> Contig64_chr1_87343284_87345672 163 T A 0.038702
+ VspI
+  1 ATGGCCAATTCTGGTTTAcGCATCATTGTTAACAACTCTTCCATTCATTCTCAGAATTTT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CCCAATTCACATGATAAATTGTATGGTCACCTACcTACAACTAAACACTTAGTTTATTTC
+                                                                
+
+121 TATTATTATTATTATTATTATTATTATTATTAnTAtTATTATTGAAATACATTTTTTTTT
+                                                                
+
+181 CATAAACCGTTCACcCTTGTGAGAAC
+       <<<<<<<<<<<<<<<<<<<<   
+
+> Contig20_chr1_110679280_110679687 181 C T 0.659726
+
+  1 GAGCACTCAATGAGGGGTTCGACCCTTTGCAGACACAGCATGTAGGAGGAAGAAATGCAA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 cGGGGCACCCCTGCGGGGGCAGGCTTCCAGTTCAAACTGATCnGGTCTGGTCCTGGGGCC
+                                                                
+
+121 GGGCCAAAGTTGTGGTTTCcCGCACTCAAGTCTCCAC
+                  <<<<<<<<<<<<<<<<<<<<   
+
+> Contig222_chr2_9817738_9818143 220 C T 0.092668
+ SpeI
+  1 AGATTTAGCTGGAGCATGCCTTTGCCCTTTTTAGCCTTTCCCTTTTACCTTTATCCTTCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TATTCTTGAAATGTTGAAATAGATGGAAGTATAGCAGCTATCTTGTCCCATAATGATGAA
+                                                                
+
+121 AACCAGGTACAAAGTTGGTGAAAACTAAAAGAGAGGAGGAGCCTGGGTTCTTGGTGGCAT
+                                                                
+
+181 CATGAACACCTGCACnAGTCTAGCATGGTCTGTGCAAAATCTCCTGATCCAAGAAAAATA
+                                                                
+
+241 TAAACATCCTTCTGTAGGGTTTTATTgCCTGAAGCAAAA
+                    <<<<<<<<<<<<<<<<<<<<   
+
+> Contig47_chr2_25470778_25471576 126 G A 0.289103
+ Bsp1286I
+  1 GCCAGGCGTCCCTCTTTTTGAGTTCtAATTGTGTACATCCAATCCCCATCTCAACAAATA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GCTGAACCAGCTTCCTaTTTATTTGGTAGGTnAGCACTCTAGAAATTTGCTACACTGAAC
+                                                                
+
+121 TCACCAAATTTATAATGTaAATTATGACCATTCTTTGCCATAATAATTTGGGGTAGGTCA
+                                                                
+
+181 GATTTGGTTTTGGGGGCAGAAGAAATCATCATATCACAAGCATGTGACAGCTTCCAGCCC
+                                                             <<<
+
+241 CATCTCAACTCCAAGAAATT
+    <<<<<<<<<<<<<<<<<   
+
+> Contig6_chr2_56859179_56859956 671 T C 5.308026
+ MspA1I
+  1 TATCCCAAAGACGTGTGTCTCAAAGCCCTGAGGTTTACAGCCAAACATGATGGACTGCCC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATGACAAcGGATACAAATGCTAGCgTGGGTTTAATTATGCTAGAATTTTTATGATAATTA
+                                                                
+
+121 TAATGATATTGTTATGAAGTATGCTAGGCTTTnAGCGGCTAGTCTCTAAACCTATTTTCC
+                                                                
+
+181 tTATAAATCCTTTTATTTTTAGTGCACTATTTTATAGAATAAGAGGTTTTTCAGGAACAC
+                                                <<<<<<<<<<<<<<<<
+
+241 ATATATTGCATT
+    <<<<<<<<<   
+
+> Contig163_chr2_76402959_76404830 221 C T 0.178077
+
+  1 GCCCCTTCGAGTCCATCTTaCGCgCAGCAGCAGGAGGGATGGTCCCAACCACAAACCTAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CCGCTGCCTGAACGCTTnAAGTGCCCTCCGAAGAAAGCCCAACTCCACAGCCTGGCAACT
+                                                                
+
+121 GAGGTCCTTGTGATCTTAGCTTCCTCTGCCCCACTCCACAGCTCAGCCTCACCgGACTCC
+                                                                
+
+181 CGAGCTCCTTAAAGGAGCCCCCGAGCCCCCGCACATGCTGTTCCCTGTAACCGGGTACTC
+                                                                
+
+241 CACGGCTCGTCTGTCCTTGGAGGCTCAGCTG
+            <<<<<<<<<<<<<<<<<<<<   
+
+> Contig56_chr3_17326225_17327548 387 G C 0.224947
+ AgeI,HpaII,MspI
+  1 CAAAGGCAGTGATATGGGAGTGGAATGGAGAGGATGGGTGCCCCAGACTGGGTGCAGATC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TGTTCTATCTGGTGTTTGGTGGCTGACCATACnGGTGAGAAGAAGTGTcCAGGTTTCTGG
+                                                                
+
+121 CTTGATGATGCCGACAGTTATGGCAGGAAATGCTGAAGGGGTGCACATGAGCTCCTGTTC
+                                                                
+
+181 ATTCTTCACTCTTCCTCTTCTACCTCCAACCTTGCTACCTGTGTGTACCCGACTC
+                                    <<<<<<<<<<<<<<<<<<<<   
+
+> Contig108_chr3_46210055_46210874 367 A G 0.027845
+
+  1 TTCACTCACCTGCTTCCCTGCTAACTGTCACCGCCCTCCCAATGCCTTAAACCAGCTTAG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AAACACAAAATTTAAAAAACATTATGTTGAGACAAAAATATGTATAACCTGGAATATTGA
+                                                                
+
+121 ATAACAAAATGAAAGGGAAAATGATTCAAGAACACTTGGATAAGGAAAACTACAAATATT
+                                                                
+
+181 nAAGATGTACCTTTGAACTTCCTATCACTGAAAGCAACCATGGAACCAGTACAATGTAGA
+                                                                
+
+241 CCTTCTGATCTGACTTTCTTTTGTCTCTTGCTGCTGGGAAGTAGAATGCCCC
+                                 <<<<<<<<<<<<<<<<<<<<   
+
+> Contig1_chr3_51588422_51589409 926 A G 1.147200
+
+  1 AGATTATGGCCTGTGTTTACcCCAGCCTcGCAGAACATTTTACTGGGGACACCTGCCAGG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TGGCAGATCAGAAGCCCGTGAGGCAGCCAGCCAATGGGAtGGCCAAAACCTAGGGCTTCG
+                                                                
+
+121 TAnGGGAGGGAGATGTTTTCCTCgTCCCTCT
+            <<<<<<<<<<<<<<<<<<<<   
+
+> Contig65_chr3_80727952_80728283 39 T C 7.077725
+
+  1 CAAAGGCTTACTTTTTaGATCAACACTCTAAATTCTTAAnAAACAACAAAGCCAAATTTT
+    >>>>>>>>>>>>>>>>>>>>>>>>>>                                  
+
+ 61 CCTATATCATTGAGTAGTTGATACGTCTTTGGTTTTGCGCTAGCAGT
+                            <<<<<<<<<<<<<<<<<<<<   
+
+> Contig134_chr4_12145648_12148225 1326 C T 0.079565
+ BalI
+  1 AACCCAGAtCAGAAACGTCCCATGGCTAGTCATCTTCCTACACAGACTTCTgAGAGCCAA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GCATCGTCAAcCGGCCAtTCTnGGCCATTCTCCCGAGCAGATGCTGCCGGGATAATCTGC
+                                                                
+
+121 AGCATGAAGCCCTCCCTCGGGGGAGACCCGACcgGGTCCACACAGGTCTGTcTAGC
+                                     <<<<<<<<<<<<<<<<<<<<   
+
+> Contig19_chr4_26233601_26233991 146 G C 0.163005
+ DpnI,MboI,Sau3AI
+  1 AATTTGGCTTCCTCTGGAGTtGTCCCTTAATGCTAGGTATCAAGTGCTGACAGGCCACAG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATnAGGGTAACACATGATTACAGGGCAACACACTGTAACACGTATTCCCTTGCCTTGTCT
+                                          <<<<<<<<<<<<<<<<<<<<  
+
+121 T
+     
+
+> Contig17_chr4_61310346_61311158 267 C T 0.097708
+
+  1 TATTCCAGACCAACCAAAAGGTCTAAGGAATAATAGAAGCTTCACCCACAGACCTGCCAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CCAACTTGAGAAACAGCACTTGCTTCCTCATAGAGTCGAAACGTCTTCGGTGGGTCCCCT
+                                                                
+
+121 CCTGAAGCATCACCGCTACCTTTCCTCTTGGGAGTCACTGCCACCCnGAACTTGTTGCTG
+                                                                
+
+181 CTTATTCTCTTTTATTTTTCTTGTTTTTGAAAGAACCCTGTCTTGGGTGTTAGGATAC
+                                       <<<<<<<<<<<<<<<<<<<<   
+
+> Contig31_chr5_4734956_4736547 1166 C T 0.020932
+
+  1 TGTTCTGCCATGCACACTTCTTCAACCCTTCAACCTGTGGGAGTCACCTCACATTCCCAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AGcGAATGGAATATCTATCTATCTgnCTTTAGGGATTTGTTACGTTTTCTTTTTCTTCCT
+                                                                
+
+121 TTTCCTTCCAATATCTTAATGGGCAATTTTGTGGACAGTTGATAGAGACAACGTCAGGAG
+                                                         <<<<<<<
+
+181 CTGTTGGCCTAGTAAA
+    <<<<<<<<<<<<<   
+
+> Contig6_chr5_26899813_26900498 97 A C 7.369943
+ AvaII,Sau96I,SinI
+  1 AACTGAAAGTGAGAATTCTTTGTATTTGCTAGTCAAAAGGATTTCTAAGTCAAAAAAGTA
+       >>>>>>>>>>>>>>>>>>>>>>>>>>                               
+
+ 61 ATTTGGGAnCATTAAGTCATATTTATAGACTAAAATTTCATTCCTAAAGACAATTTAGTA
+                                                                
+
+121 AAAATGCTAGGCTTTCTAGAAATTTAACCTAACATAAAAAATTACAGTAAGTTTGCTAAA
+                                                                
+
+181 GAATCACAGAGTTGACTGACAGTTTCCCAGGTTA
+               <<<<<<<<<<<<<<<<<<<<   
+
+> Contig45_chr5_50892738_50892968 169 C A 0.496871
+
+  1 TGAAAGGGGCACTGGGAATTATCAGAACCTTCTGGGTAATTAAACTGGGGAAAGCATAAT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACCATTTAGAAAAAGTTCAAGTGAGTCTTTTCCTTATTCTCCCnTGTACCCAGAAAAACC
+                                                     <<<<<<<<<<<
+
+121 TGGACATGGTAC
+    <<<<<<<<<   
+
+> Contig45_chr5_76133561_76134403 388 A G 0.038045
+
+  1 CATGAGCATGCTGTCTGCACAAtGGGAGCACCCGTGATGTGAGAGTAGCCAGGCCACCCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GGCTTGAGTGCTTTGTCCAAAAGGCACAATGGGAACTACACAGAAACAATCAGATTCACT
+                                                                
+
+121 GCCTTCGAGGGTTTGAAGAAGACAGCTGAAGAGTAGGAGGTAGAAnCAAAAAGGCATGAG
+                                                                
+
+181 AGGGGGAAGCAGAGGCTGCAAGACATGAGCTGGGCAGTACTGACgGGCCACACAGAGCAC
+                                                               <
+
+241 TGGAGACAAGGTCAGGAGCCCT
+    <<<<<<<<<<<<<<<<<<<   
+
+> Contig111_chr6_5821219_5822519 1060 A G 0.230765
+ AvaI
+  1 CGTCAGAGCTGTCTTCCCTCCAGCCAGAGGGGCCCTGAGAAGGAAGGGGGCTGAACCCAG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GCgCCAGCCCAAGCTGCAGCGTGATCTGGGGGTGAGGCCCCCCGCTGCACAGGGGGCACG
+                                                                
+
+121 GGGGTTCGGGCAGAGATCGGCTACCCATGGCCGGCGAGGCCACAgTGGCAATGGGCAGCC
+                                                                
+
+181 AGCCTCCGACCAGCcGCCCCCnAGCTGCCTATTTAAGTCAGGAGCTTCTCCTTCCCgTGG
+                                                         <<<<<<<
+
+241 AAGTAGAGGACAAATT
+    <<<<<<<<<<<<<   
+
+> Contig102_chr6_30271329_30271577 39 T G 1.158547
+
+  1 TCTTCCTTTATGCATCAGGGCAGCACCCTGGGGAGAAGnGGGGGGGACAcGTGTGTCCTG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GGGAAAGGGGTGTTCCCACTCCCTGCAATGCCTTCCCCCGCCCAGACCAGCAGTTCTCAG
+                                                                
+
+121 TCTTGACTGCATGGACTCTCCTGGAAGGCTTTAAAAAATGTGGAGGCCGAGGCTTACCCA
+                                                                
+
+181 tGACGGTTCTGACTGAATTGCTCTGGAGTAGGGCTTAGGCACTG
+                         <<<<<<<<<<<<<<<<<<<<   
+
+> Contig112_chr6_51024554_51024851 100 A G 4.286925
+
+  1 CTTCATCATACCTATCATTGCCTATCGTTATACTATAGAGGTATTGTTCATTCTTTTTTA
+       >>>>>>>>>>>>>>>>>>>>>>>>                                 
+
+ 61 TAGACTCATTGAGTAAAACTCAGGnCATGAGGGAAGGAACTTTGTCTCTTGTGCAATTCC
+                                                                
+
+121 CTATCCTCAGTCCTTAAATATATGTATGCTAcCCAATAGGCACCAAATAT
+                               <<<<<<<<<<<<<<<<<<<<   
+
+> Contig84_chr7_6648683_6650255 1297 G A 0.165637
+
+  1 GTTAGTTGTGAACACTCCCCAGGTAAACTGGTGTAACTCTTGGGGCAAAGCATGGAGTCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACCCAAAAATGTAGAATTCTGCAGAGACAGCTGTTTCTTGGTTGGGTTTCTAGACCAGAA
+                                                                
+
+121 AATAGAAGATTATAATTATGGGTGGGAAATATATGTGCAAAAAAGTATAAAAGAAGAGGA
+                                                                
+
+181 ACAGAATAAAnGGAAATGGAAATGTTTGTAATTGATAGGGATGTGGATGTAAATGCCTGG
+                                                    <<<<<<<<<<<<
+
+241 CAGAGAGGAGG
+    <<<<<<<<   
+
+> Contig206_chr7_26281823_26282074 103 C A 0.947486
+ NheI
+  1 ATCCACATTCGCACAGCTCCTAATATAATATTTCATTGTTAAAATACTTCTGATTGGCCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AGGACATATTTTTACAACTGCCTTGACTTCAATTGCTAGnAGTAGCTTGCCAAAGAgGTG
+                                                                
+
+121 CTTTAATAAAGGAAATTAACTTCTTTTAATATGTTGACTGATATACCAAGGTTTTAGTGC
+                                                                
+
+181 TATTAGTTTACCTTCCCCAAAAGTGCTTA
+          <<<<<<<<<<<<<<<<<<<<   
+
+> Contig38_chr7_50681997_50682600 42 T C 0.145997
+
+  1 TAGAGCTCTCAGCATCCAAGCAGAATCTACTGGGTCTGACTGnGTTCTGCTCTGTCACTG
+     >>>>>>>>>>>>>>>>>>>>                                       
+
+ 61 GAATGACATTTCATTGCAGAGTACTCCTGCAGTACAACCAGGGCACAGCCTTTAAATTGA
+                                                                
+
+121 CCATGTCCCCTGGTCTaCTCTGCTGAGCTaTGCACGGGTCCCTTCTGGTTCAAACACAGA
+                                                                
+
+181 CTGATACAGCTCAGATGGAAGGGAGGCAGTTGCAGAGAAACAAA
+                         <<<<<<<<<<<<<<<<<<<<   
+
+> Contig91_chr8_12804505_12805470 409 C A 0.175272
+
+  1 CTGTTTTCAGGGGCTACCTGCTATCTCCAGAACATGCCTGGCTCTCCTCCAAACACTGTT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CAAnCTGACCAAAGCAGAGAGCTGTATATGGACCACACATACCAAAAAAAAAAAAAAGAC
+                                                                
+
+121 AGTCCACACCCTCTGTATAATTATATGGTACAAATAATAGAGTTTTTGTTAACTACCAGC
+                                                                
+
+181 TCTTTTTACAAAGCCTATCAAgTATCATAGACAGTATAATGCTGTGATTGCATCTGTGAA
+                                           <<<<<<<<<<<<<<<<<<<< 
+
+241 CC
+      
+
+> Contig8_chr8_27811135_27812620 333 C T 0.272485
+
+  1 CTTCAAGGAAAGGAGGCAGTTTGGACAAGTCAAAAAAATCCCAAAACtTTGTACTATATA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AATCTGGCATATTTGTTGATGACanAATTGAGTTAGAAGCAAGAGTCAGAAGCTGACTTT
+                                                                
+
+121 CATGCTGTTTTTCTGTTGTTTTCTGCGGCTCCCCTATGTACTAGTTCTCTTCCgGTGTGC
+                                                                
+
+181 TGACAACTTCCAACTTcTCATAcCCTCTGCATTTCACGTTCTGC
+                         <<<<<<<<<<<<<<<<<<<<   
+
+> Contig17_chr8_57490059_57490498 69 G T 0.522227
+ BglII,DpnI,MboI,Sau3AI,XhoII
+  1 CACCAGAAAACAGGCATGGAACAGATTCTTTCAnATCTTTAAGAACAAACCAGTCCTGCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GACACATAGATTTTTGGACTTTTGGCCTCTGTAACTGTGAGAATAAATTTCTATTTTAAG
+                                                                
+
+121 CCATCTACTTTGTAGTAATTTGTTATGGCAGCCCTGAGAAATTA
+                         <<<<<<<<<<<<<<<<<<<<   
+
+> Contig73_chr9_29451535_29452248 616 A G 0.448230
+ Eco47III,HaeII
+  1 ACCCAAGAGTCTGAGAGGCCCAGAGGCAGCTGGAGGCTGGAGGAGTCCCaCAGGCAAACC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CTCCATTCCATGCGCCCCAGGGAGGCCAGGAAATCAGCnCTCCCAGGAGCAGGGAAGCAG
+                                                                
+
+121 CAGTCCCTGGCATTGCCAGGGCAAGTGGCCACTCAGGGGAGAAAGGGGTGAGCTGGGGAG
+                                                                
+
+181 GGGGAAGAGGGGAGGGGAGGGAAGGCAGAGACGAAGAGAA
+                       <<<<<<<<<<<<<<<<<<<< 
+
+> Contig96_chr9_39008495_39009278 215 A C 0.426539
+ SspI
+  1 TGGCAAACTCCTTGTGAATGCCACTACACTTTCTGGTCTCTGTATGTAATGCTAGATATT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACTGACACTTACcGCTACAAAGGCAAGACAAGCAAGACAACTGACATACACCCAgGTATG
+                                                                
+
+121 GATCTATGAAGGGAGCTCCTTCTGCTAGAAAACAATATGTAAnTATTTACaTAACACCTA
+                                                                
+
+181 CAATTCTAAATGGGTAGTTTCCCACATGTGAGATTACATCTTCAAGAGCCAAAGGACAAT
+                                                  <<<<<<<<<<<<<<
+
+241 TTGTGCATC
+    <<<<<<   
+
+> Contig22_chr10_15505382_15505589 172 T C 2.860867
+
+  1 CGCAGGCGCCCCAATTATTCTTAACTCCTTATCAAAAGTTTTCCTAATTGAAACTTAAGC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATCACCTGTTTATTTCCTCTAAAATAAATGTATACATATAGAATTTCAGTAAGATAATGT
+                                                                
+
+121 CTCAAAGAAGATGATAGCCATGGGAGAGGCTTATATGTACTTCnTATAATAAACAACGTC
+                                                      <<<<<<<<<<
+
+181 CAGGTGTGATATT
+    <<<<<<<<<<   
+
+> Contig69_chr10_40547265_40548153 371 G A 0.137642
+ Bsp1286I,CfoI,HaeII,HhaI
+  1 AAGGGGAAGAACTGAAGCGAGTGAGAAGCACGGAAGGACTTTTAGGTTTACAGCTGGGGT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CACTGGTCTTCGCTATGGATGCCTCTCTTAAAGGAAAGACTAATTCTCTGTGGGTACTGA
+                                                                
+
+121 AGGTGgGAGATGAATGTAGATGGGCnCTCGCATGTGTCAATGCTGACGGCTTGGTGAGAG
+                                                                
+
+181 GTTTGGTGCGAGGCCAAAAAGGCgGGGATgAGAGAAGGATGACCTAGGGAGACTGCAGGG
+                                                                
+
+241 TATTTAAAAGTTTGGGTCCAATTTTTCTCAAAGTGTGGCCAGTGCAC
+                            <<<<<<<<<<<<<<<<<<<<   
+
+> Contig9_chr10_51475063_51476054 770 C T 0.393903
+
+  1 GTCTTCCTTCTAATCCCCaAGCcGTGAGAAGCTGTCTGAGCGCTCCTTGCTGGGCGTCCC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TGCATGCCTGTACTGGGGCACACCTACGCCCTGGGTCCTGCTnCTGAAACGGTGTCTCAT
+                                                                
+
+121 TTCTGTAATCGCTCCAAGCTTAATGGCTCTCAGCCTTGTGGGTTGCAGTGGAGAGAAAGC
+                                            <<<<<<<<<<<<<<<<<<<<
+
+181 ATT
+       
+
+> Contig72_chr11_7142765_7143772 146 G A 1.137400
+
+  1 GGTGTAGTGAGGCTTCCACGAGCAGCCAGGCTTACAAACTCATCCTTAGCCTAAAAACTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CACnAAGTCAAGTATCTTGTGGGTGTTGAAAACTGTTCCACTCTGCAGAGCACCTCTATA
+                                                                
+
+121 TGAAGTAATAATCATGGTATAATGTCCTTCTTCACATACCTGCCAAGAT
+                             <<<<<<<<<<<<<<<<<<<<<   
+
+> Contig7_chr11_40017076_40017630 352 C T 0.336170
+
+  1 TCGGTCCTTCCTTGATCACATCTCCATGATCCTCCCACTGTTACTTGGAGGAGAATTGTT
+     >>>>>>>>>>>>>>>>>>>>                                       
+
+ 61 GCTTCCACAAATCAGATCTCTTTATTTTTCATTTATTCAACAAATGTGGACTGAGCTCTT
+                                                                
+
+121 TGtATAGTACATTCTGTGGGCACTATTCACTAGACACACTGTAAACACTTCTGCTTCCTG
+                                                                
+
+181 ACTTTGTTCAGATCTACCCCCnTGCCTGATCTGCCCTCCCCACCTGGTTTTCATCTCAGG
+                                                                
+
+241 CTTAGGTCAAGCCTCATtTGCACTTC
+       <<<<<<<<<<<<<<<<<<<<   
+
+> Contig16_chr11_53408448_53408790 187 A G 1.366749
+
+  1 ATATTGCCAGTTTTAATGGGTGATATTTAGTCCTCCAATTAGACCTCTTTAGTGCATTGG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATACCAGTGAGCAATCATTCTGACAnAATTTCTGCTGCCTTGATTTTTGTGACAA
+                                    <<<<<<<<<<<<<<<<<<<<   
+
+> Contig21_chr12_18403415_18404381 586 G T 0.068025
+
+  1 AGTTCCAATGTCAGAGTCCCTCCCTCTACCTCCTATCCAACCcGCTACTTTTTTTnTTTT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GTTTTACAACAAAAATAAACCTTCTTGTAACAATTCCAACAATTACAAAATAGAGTAAAA
+                                                                
+
+121 TGTTTAAGTCTCTACCTAAACACACTCATCCTCAGAGAAACTCACAGGTAATTTCGGTtC
+                                                                
+
+181 GTATCTTCCCAGACCCTCTTCTCAGcTTTCACACATACTACATACACATGAACTTCGAGC
+                                                 <<<<<<<<<<<<<<<
+
+241 TGGCTGTT
+    <<<<<   
+
+> Contig41_chr12_25565452_25566993 475 G T 2.230501
+
+  1 TTACATAGCcAAGTGGGAAACAAAGcTACATTTTTnAATATTAATAAATCTGTTTTTTTA
+       >>>>>>>>>>>>>>>>>>>>>                                    
+
+ 61 AAGGGTTaTTATACAATATTATCAAACTTCTTGTGAATGTCAGAATCCAGAACAAACCTA
+                                                                
+
+121 AAATCAGTAATACTTGGGAAAGACGCAAATAGTCCCTCTTTCCACT
+                           <<<<<<<<<<<<<<<<<<<<   
+
+> Contig5_chr12_53880670_53882675 1221 A C 0.061001
+
+  1 AAGCCATCCATGTGTGTGCTTTCATAATATATTATGACAGGAGATTAAATTCTAAGTAAA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GATTAGTCCCCAGTACAGTAGTGTAAAATAGGACTTTTCTCCCTTTTTCTCTCCnCGATA
+                                                                
+
+121 TTCCAAATCAGAGTTTGGCCAAAAAgAAGTCAATAAGGACTTAcAAAAAAAAAATCTCCA
+                                                                
+
+181 TTCACTGAAAGTAGCTTGCTAGCATTTTCCTTTCTcCTGATGTTGCTCCATAACTTCAAC
+                                                                
+
+241 CCTTTTTAAAACTGTCTACTGTGGGGTAGACAGAAGGCGTGGTCGTGAGGTAAAGGTCAA
+                                         <<<<<<<<<<<<<<<<<<<<   
+
+> Contig107_chr13_26045881_26046290 341 C G 4.509990
+
+  1 CATAGATTGCCTTTTCCAGTCcAGAAGTTTAGAACAGACTGCCCTGAGATCATGGTGGGA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AATATAATACTCATTAGGTTgTTGAAATTCTTGTAGGAATGGAAGAATTTCAGCTTAGGC
+                                                                
+
+121 ATTCTGCTnCTGTATTCCCAGATTACAGTGGGAACTGTATGAAA
+                     <<<<<<<<<<<<<<<<<<<<<<<<   
+
+> Contig251_chr13_28498333_28501066 864 T G 0.067573
+
+  1 GCCCTCTGGCTTCTGTTTGGGAGGTAGGGCGGGTGGGCAGGAAGGGAGGACGGTCGGGGT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATTGGTTCnCCTCCTCCtGCTGGGTCCCAGATGGATACAGGCCAGGTCTG
+                               <<<<<<<<<<<<<<<<<<<<   
+
+> Contig55_chr13_53467708_53468101 221 T G 5.717222
+ HinfI
+  1 AATACGGTGAAGAGCAAATGAGAAACATTTCTTCAAACATTTGTAAAGTGAAAATATTTA
+       >>>>>>>>>>>>>>>>>>>>>                                    
+
+ 61 AAATGAAATAGATnCCAAATTTTTTCTTCCAAtGGATTATCTACTGGGTTCTGAATATCA
+                                                 <<<<<<<<<<<<<<<
+
+121 CAAAGACAAATG
+    <<<<<<<<<   
+
+> Contig48_chr14_11839435_11843272 3014 A G 0.907583
+
+  1 GTGCTTCCAGTCAAAGGGGAAAACTTGATAGACAAAAGTTTGGATTTTTTTTTTTTTCCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCTCCTTGGGAGTATGTCTGAGTTACCGTTTTTAGTTTTGATCTGTGGAAAAAGTGATTA
+                                                                
+
+121 TATAGGTTCCAAATCTTACTTTTCCCTTTTTGTTTTCAATAGACTTTTTGTGATCATTTC
+                                                                
+
+181 AnCATAGTTTGTATTATTAAGTAGGGGTTTTtTTTTGTTTTGGTTTTTTTGTGGTTGTGC
+                                                 <<<<<<<<<<<<<<<
+
+241 GTTGTAAG
+    <<<<<   
+
+> Contig28_chr14_26905747_26909514 975 G C 0.116622
+ AluI
+  1 CTGGTACGTGCTTCTCCTCCTGCAGCCCACCGTTTACTTGGTAAGTCGCTGCCGATCCGG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CGCCCCCGCAATCCCACCCTCGTCGCGAGGACAGACAACCAGGGGCGCGCGGGAGGAGGG
+                                                                
+
+121 TGAGACCGCCAGTTCAGCGGAGCAGCGTTCCTAGCGACCGTGTTGGAACAACTTTGGCAA
+                                                                
+
+181 nCTGGTCTTTGGATCCCTGCGGGATTTTTCGGGTTTCCCACCCTCATTTCTTGCTT
+                                     <<<<<<<<<<<<<<<<<<<<   
+
+> Contig64_chr14_56768376_56768902 473 C T 8.281311
+
+  1 ATAAGAATCTCCTCAGTAGAGAGAAGCCTGATCTACCATGATTTTATTTGAGTAAAACCA
+       >>>>>>>>>>>>>>>>>>>>>>>>>                                
+
+ 61 TTGAAACAAACAnTTCAAGAAAGATGGTCAGAGAAGCAAAATGTAA
+                        <<<<<<<<<<<<<<<<<<<<<<<   
+
+> Contig60_chr15_18493036_18494316 150 G A 0.125024
+
+  1 CGCCTGGAATAGCATGGTGCCTTTAGGAAATTACATCTAACTCTCTAGGGCTGGAAGGAA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CACTGAGTnAACGTAAAGAATTGTGGGAGAGAAGCCTTTAGTTAGATCATGCAGGGCtCC
+                                                                
+
+121 GTGCTCCAAATGGGCTTTGTGTTTTG
+       <<<<<<<<<<<<<<<<<<<<   
+
+> Contig112_chr15_26772864_26773267 374 C T
+
+> Contig119_chr16_6160274_6160477 180 G A
+
+> Contig60_chr16_28079136_28080263 588 T G 5.998983
+ NsiI
+  1 TTAGAGAATTATTCACTCCCCCAAAAGTAATAAAAATATAAGAAACAAAGCATAATCATA
+       >>>>>>>>>>>>>>>>>>>>>>                                   
+
+ 61 ATGCAnTGGTTGAGTTAGTAGTAAATAACATTTTAGGGTCATAAATTAAAAACTGAATTG
+                                                                
+
+121 AGATTTAGCTGGAAATTGTGATATAAATGTCAGGATAAGAGAAGCAAGATTGAAAGAAAG
+                                                                
+
+181 ATGGATTAAAAATGCTAAATCCTTCTCTACTATTACAGGAAATTGATAAAAGAAGAGAGA
+                                                     <<<<<<<<<<<
+
+241 GGAAACAGCACATAT
+    <<<<<<<<<<<<   
+
+> Contig31_chr17_12128267_12129637 205 G A 0.246305
+
+  1 TGGAGGCAATGGAGGTGAATGAGCCCCAGTCCTGGACCTCgAAGCAGACTGGCCAGAGAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACCAGGATTTAAGGCATGTGATGAAGACACAGTTCAAAGTGACGAGCCCTGCAGACTCTT
+                                                                
+
+121 CnGGAGCAGAGgTAGAGTGATGACCCGTACCTGGAAGGTTTTAGGAAGGATAACAATGAA
+                                                               <
+
+181 TTTACCAGAAGGCAGGGGTAGA
+    <<<<<<<<<<<<<<<<<<<   
+
+> Contig99_chr17_26021506_26022200 505 C T 0.171977
+ RsaI
+  1 TGTTGCCATGTTGCCAGTATGTTTTTTTAAGTTTTCCTTTTTAATTTCATTTATGATATT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TTTTGGAGTAnaGAAGTTATCATTTCACATGATCAACTTTTCAGTCTTTTTCTTTATAAT
+                                                                
+
+121 TTTTAAcTTTGTTGTCATGTTTAGAAAGGTTAAATTTATACCTTGTAAAATAcCTTCgCA
+                                                                
+
+181 AATTTACATTTGGGAAATTATTAGTAGTATTATTTcAGGAAGTTaTTATTTTTAAGTGTT
+                                                     <<<<<<<<<<<
+
+241 GGGTTCCCGTGA
+    <<<<<<<<<   
+
+> Contig27_chr17_61713766_61716585 1056 G C 2.199527
+ Eco47III,HaeII
+  1 AAGAGGCGCAGGAAGGAGAGTCCGCCcGCCGCAGCCCGCCCGCCGGCTCCTCAGACAGCn
+       >>>>>>>>>>>>>>>>>>                                       
+
+ 61 CTCGCaGGTCCTCCAGCCTTCCAGCGAGAAGAAAGAAAGAGCGTCACCGGAAACCACCGA
+                                                          <<<<<<
+
+121 AACTCTGGGGTAGAGCG
+    <<<<<<<<<<<<<<   
+
+> Contig229_chr18_3706523_3708577 1076 A G 0.444778
+
+  1 TTTAAACTCCCGTGTCTGTGCTTGATTATGGCACCGTTAcTCTCGGACGTATTTAATTTT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CTGATTCTGATTCATTGGTCTATTACATGAGCAATTGGTGGnAAGTGATGTCTGTCTGTG
+                                                                
+
+121 GCCCTTACATTATTTATAATAAAACTCTCCTTCAAAGAACCTTTGGACGATGTCTCCACA
+                                                                
+
+181 ATTACAGAATGAGTACAAATTAGTTTTCTAAAACAGCAACTGGTGGTTAATTAAGTTTTG
+                                                                
+
+241 TCATGTTTTCTGGAGATGAGTGTCTCATGGTTTGGATACTATGAAGGCATTTCTGCAAGG
+                                            <<<<<<<<<<<<<<<<<<<<
+
+301 TT
+      
+
+> Contig82_chr18_27305489_27306229 566 C T 0.348750
+
+  1 CCTGAGAACTTCAAGCTCAGCGGAGGGCTGAAAGGGAGGTAACCACTTTTGTACTAAATT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GTCACCTCCTTGCTTATTTTCGTGAAGTTCTAAAGAACACAACTATCTCACTAACACAAc
+                                                                
+
+121 AGATTTATTATTGAGTTGTCAGAATCAGCAGCTTTTAGTCACngGTCACTTGTGTGCCTC
+                                                        <<<<<<<<
+
+181 CACTCCATCATAACT
+    <<<<<<<<<<<<   
+
+> Contig64_chr18_55979770_55980315 49 G A 2.123800
+
+  1 CCCCAAGGAGACAGGAGGGCAGGCTGTGTGGGTTTCCTGGCCCGCAAnCCCTGTGCAGGT
+       >>>>>>>>>>>>>>>>>>>                                      
+
+ 61 GCGgTTCTGCCAGGCCCGCAAATCTCGGTCTCACTTAACTGCGGCATCATTTATGCTAAT
+                                          <<<<<<<<<<<<<<<<<<<<  
+
+121 G
+     
+
+> Contig146_chr19_5221790_5223013 143 A G 0.869806
+
+  1 TTAGAATGGCTTTTTCACGGAAGGAGATGAGTTATAAAGTACGGgTGACATTTTTTTGTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TTGnGTTTTTTTTTTTTGTCTTGTTTTTAACTGTTGTTTAAGTCAGCCAACAAGTACATA
+                                                                
+
+121 ATTTCTCAGCCCACATTTAAAAATTATCAACTCATTTTCACTTGGAGGTGTGGACATAAA
+                                                                
+
+181 GCCATAAATATAATTTGCATTCTGCTGACCTGTTTC
+                 <<<<<<<<<<<<<<<<<<<<   
+
+> Contig129_chr19_25541958_25542221 202 T C 2.550968
+ HinfI
+  1 AAGAATCAAGCATGCATTCTGCCTTCCCCATGTGAAAAGTACCAGGTGAGGATATGTACC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCTTTATATCCATGTTCCAAGAACAACAACAACAACAAAAGAATGAGAGTnACCACTTTA
+                                                                
+
+121 CAACCCCCAAAGAATTAATGGATT
+    <<<<<<<<<<<<<<<<<<<<<   
+
+> Contig60_chr19_54013816_54014398 281 A G 1.271267
+
+  1 ATTTCTCTCGCCGATATTGAGGTTAAGTATCCCTCTAGGCTAAAAGACCAGCAGCTTTTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TTAAACCTATTACAGGAATCCCAATAATGGAAAGAAACGAGGGGAGGCAGTGCTCATGTC
+                                                                
+
+121 ACATTCTTCCAGAAATCAAATATAnTGGGTTTTTTTGTTGACGTAAATACATAGGTTGGA
+                                                                
+
+181 AAAAAGGTAGGGGGAAAGGAAAA
+    <<<<<<<<<<<<<<<<<<<<   
+
+> Contig50_chr20_12138509_12141975 3206 C A 0.383804
+
+  1 TCACACCAGGCTCAAGGTTAAGGCAGAACACAAGATAAGAGAGCAAGCTGGCTTCCTGTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CCCCAGCTGGCtTnCCCATGGGAGCAGAAGCTGGATGGGTGCAGCTGCTGGCTAGGGATC
+                                                                
+
+121 CTGTAAAAACTGAAGACCTCCaGTCTCCAGGGCTGGAGGaGGGATTCCTGCCCTGGGGGC
+                                                                
+
+181 AGGCCaGATGAGAGGGATGCGATAATGGCAGGTGTCTCCACAAGA
+                          <<<<<<<<<<<<<<<<<<<<   
+
+> Contig36_chr20_32631363_32632049 176 G A 1.149790
+
+  1 CTGCCCGAAACAAGTTCCTCATTGTTTCCTnCGTTCTGTGCTGTGGCGGTTTCTTCCTGG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACCCAGAGTCCTTTTCcGAACATTAGCAACTCCATTATGCCACACAGAGATGAGATTTGA
+                                                                
+
+121 GAAAGGAAAATAAAGTTGTCTCGTGATATGGAGGGCAAAGCTGATAG
+                            <<<<<<<<<<<<<<<<<<<<   
+
+> Contig50_chr21_4178523_4178687 121 G A 0.483377
+
+  1 GTAGGAATCTCAAGCCCCAATCTACTTTTCAGGAAGCTGAGGCTCAGAGAAGTAAAGTAA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CATGCTCAAATTCACACCAGTAAGTGAGAGAGTTnTAAGTAACTATAGTAAGTGACAGAG
+                                                            <<<<
+
+121 CTGGGATTTGAACCCTCAT
+    <<<<<<<<<<<<<<<<   
+
+> Contig129_chr21_31045749_31046924 381 A G 0.028026
+ AcyI,Hsp92I
+  1 CAGCTGAAGCACCCTCTCTGACCAAACCTGATCTTTCTTTTGGGGATCCTTGACnTCTCA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TAAGTCTTTATGAACCATTTGTCCTTCCAGCCATCATTTCCTTCAAC
+                            <<<<<<<<<<<<<<<<<<<<   
+
+> Contig159_chr22_7896450_7896974 109 G C 0.465232
+
+  1 TAACTGAGTGATAGTGCTTGGcGCAAGACACTAGCAAnCCTGTACTCACCTTCCATTCAT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TTATGTCATAATAATAATTCTTTAAATATGGAAAGcGTAgAAACAAAATAGGAACACTGC
+                                                                
+
+121 TAAGTATTCATTTAGGTAATAAGTTTAGTGCTAGATGTGTGACAGGAATTATTTTcATTA
+                                                                
+
+181 ACCACAAGCAAACATTTATGGAATGTCCATTGCATGCTGAAATGTA
+                           <<<<<<<<<<<<<<<<<<<<   
+
+> Contig23_chr22_34612023_34612568 167 C G 0.409430
+
+  1 TATTCTACCACTCAAAGCCAGCCTGAAGGAAnCCTGGGcTCTTTCCATCAGCTATCTGAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AAGTTGATCTAAAcgTGTAGAAAGCATGCCTGGCTCCACACCTGATTTCATGTGGAGCCA
+                                                                
+
+121 TCAGCTCTCACACGATCACCTTG
+    <<<<<<<<<<<<<<<<<<<<   
+
+> Contig26_chr22_57817664_57819633 1453 A G 0.471213
+ RsaI
+  1 TGCcCACCCACATCAcTGAACAATTCAGAGAAGATTCCTTTAACATATGCATTCAATGTT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TAAGCCTCGCTAACATTTTTTAAGCACCGAACCTTTTTAAAAAGGGCTCTAAAAAATAAG
+                                                                
+
+121 CATGAAACTAAATCTCTCTAATACgTCACGTGACACACATGTAtATAACCCAGAAGGTnC
+                                                                
+
+181 ATCTAGGGAAACGCAAAAGGAATTATG
+        <<<<<<<<<<<<<<<<<<<<   
+
+> Contig133_chr23_3525134_3526502 1223 A G 1.358849
+
+  1 TCCTATTTTGTCCCCAAGTCCCAGGTTCAGGAGCTCCATTAAGTCACAGGTAATTCAGCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GAGAGCCTGCAAAATGGCAACCCCACCTGAGGCACTTTCTTTAAATCAACTGTATCAAGG
+                                                                
+
+121 TAACATTTACACAGAATAAnAAGCACTCATTTTAAAGAAATAGCTTGATGAGTTTAGTCT
+                                                                
+
+181 AATTGTATCTGTGTAGCCACcACACAGTCAAGATa
+               <<<<<<<<<<<<<<<<<<<<<   
+
+> Contig35_chr23_28447813_28449115 70 T A 0.163155
+ DdeI
+  1 CCTTTTCTCTCCATTCACACCCCATCCTTCTTnGTCCCTCCAAAACTCCTAGCTGTTTCC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CATTTTAGGGTCTCTGCATTTGCTGTTCCAAGCAAGCTCTGCCCCCAAATGATCTGGTGG
+                                                    <<<<<<<<<<<<
+
+121 CTTGTTCCCTC
+    <<<<<<<<   
+
+> Contig50_chr24_22515247_22516072 761 C T 0.190253
+
+  1 GGGGAGACCCTGATCCATCCTCATTCTACTGCTTCCCGATGTCCCAGGCCTGCtGTTCTA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CACGAAAGCCCATTCngTGCCTCCAAGTAGGGAGCAGAAGGGAAGAACACA
+                                <<<<<<<<<<<<<<<<<<<<   
+
+> Contig84_chr24_29196623_29199644 466 C T 0.214603
+
+  1 TAATTGGACaCTTTTGACTTGCGTTTCATGATTTTGCCCCATTTTTCTCTGCnGCAATTT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GGCCAGTGATTCCTGTCTTTCCCTCTATTATCCACTCTGATTAACTCAGCTGCACCTGCC
+                                                         <<<<<<<
+
+121 AGCCTTTATTCCTGCA
+    <<<<<<<<<<<<<   
+
+> Contig144_chr25_4011170_4013134 541 A G 0.086768
+
+  1 GGTGAGGtGGAGAGTGGCAAGAGCTGTTGGTGGGCGTGTGTGAGCCAGAGGGCAAGCGGG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GAGCTCCTAACTGCAAcATCCAGGGGCAGTCGATACTGCCTGGGAAGTAGGAACTGCTCT
+                                                                
+
+121 GGAGCATGAGTGGAATTAGCAGATGGATAACAAGGGAGnGCGAcaAGGGCATTTTATGAA
+                                                                
+
+181 GATGGAACACCTTGGAAAAGATCAGATTGCTGAAGCATCCGTTTGAGAAAGCACAGATAA
+                                                                
+
+241 CTTTTCAAATCTGAAGAGGAGGGACATGACGGGGAGATGAGACTAG
+                           <<<<<<<<<<<<<<<<<<<<   
+
+> Contig103_chr25_38891221_38892140 407 G A 0.166581
+ Alw44I,Bsp1286I,CfoI,HhaI
+  1 GTGGGATGCAGGTGCTGTGTCTACCCACTTCTTCCGGGGACCAGCCCCTCTCTGGCCACA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CCCACTTCCTCTCATCTTAACTGTCCAAATTTGCTGACTCAAAGGGATGTGTGTGCGTAT
+                                                                
+
+121 GTGTGTGTGTGCnCACATGTGCATGCATGTGTTTTGTGTCTTTCACTCTCAAAATTATTT
+                                                                
+
+181 AAGTTCCCATGGCCCTGCCCTGATTTATCTCCCAAAT
+                  <<<<<<<<<<<<<<<<<<<<   
+
+> Contig204_chr26_4311195_4311778 170 C T 0.085422
+
+  1 AACAGAAGCCTGTCCCAGCTACAGGAGGGAAACGGGCTCGGCAgCgTGGCACTGCCTCAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 tGTCACCCCCAGGAGCCCGGGAAGCCGTCCCTTGAnTCCTCAGTGACGGTGACCATGACC
+                                                                
+
+121 AAGGGCAGTAACTCTGCCCGCGGGACACAGCgCTCCTGCTCCCgACgGAAGGTGTGCCGG
+                                                                
+
+181 CCACAGAGCGCACGTTGgGGCCgAGTTCAGGGGCAGAtAGGAAGACACAGGa
+                                 <<<<<<<<<<<<<<<<<<<<   
+
+> Contig146_chr26_26622638_26623906 574 G A 0.318381
+
+  1 TTTCTGAGATCACACAGCCAGGAAATGGGGAGCCAAGATTTGAACCCAAGCCTGTCTGAT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCTGGAACCTGCACCAGAaCCACACCTCAgCCCTGCCTTCCCTTGGAAGGCTtACcnTTG
+                                                                
+
+121 TGCCTGGAACATAGTAAGTGCTCAAAAAATGGTcTAAATCATCATCGTGTaTTAGGAAGC
+                                                                
+
+181 CTGGGTCCACACCCCTTGGGCTGTGGAGTGTCTTGAG
+                  <<<<<<<<<<<<<<<<<<<<   
+
+> Contig135_chr27_6853874_6854079 158 C T 0.060201
+
+  1 AAAGGGTTCCAAGTTACGGGATTcATACGGGAAGGCTCCcGAAATAGAAATGATCGTTGT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AACATGGGGAGATTTGTCAGGGACAgACAnGAACTGTCTTATAAAATGCAGCCCAGTTTT
+                                                   <<<<<<<<<<<<<
+
+121 CTTcTTGAGA
+    <<<<<<<   
+
+> Contig64_chr27_34654435_34654621 132 C A 0.296658
+ RsaI
+  1 AAATTGGTCAGTGACTGGGAACACGTTCCGAACCAGCTCCGTGGATTTACAAGTTTTCCA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GTAGAAACGGTCCTCCAAGCGTnCCTGAAGTGCTcATTCATTACCGCAAGGTG
+                                  <<<<<<<<<<<<<<<<<<<<   
+
+> Contig131_chr28_6481806_6483783 138 C T 0.387007
+
+  1 AGaCCCTCGAAATTCTCCAGTTGTCAAATTCTTCCCCAGTnTCTGCTTGAgAGATTTTCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CCTAGCTTCAGAGCCTTAACTACAGAATACTGAGTCTTTGCTCAAGCAGCGGCTCAACAC
+                                                                
+
+121 ATAACCCCTAAGCTGCCAAGGCTTTTCTCCCCCAAGACTTTGTTTCCTTCCACGAAACCT
+                                           <<<<<<<<<<<<<<<<<<<< 
+
+181 TC
+      
+
+> Contig60_chr28_30197166_30197364 92 T C 1.139483
+
+  1 AATCAGAAAGTCCCAGAGGTGGAGACTACAGCAAATTACCTGACATTTGTCTTTGATGCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 nTATGTAAAAACTCTGGGTGGCAGGAAAGCACTTAAATTTACCTTGTAGAGCTTTGCTAC
+                                                                
+
+121 CCAATAGAACATTCTGTGCTGATGGGAATG
+           <<<<<<<<<<<<<<<<<<<<   
+
+> Contig29_chr29_4726399_4727143 559 A T 3.113735
+
+  1 CTTTTTGTGGCCAAAAgTGACAACATAATTTTCAAAATGGGAAACGATGATTACAAATGA
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 GTGGAACACATGTTACAGTGGCAAGATGTGTGAGCAATGCTGATTCAGGGTATAATGGGT
+                                                                
+
+121 TGGTTGTAAAACAAATATGAGTTTCTAATATTCGGGCATATTAAACAATCTAAGTTnTAC
+                                                                
+
+181 AAAATCTCTCTTGTACTATTTATTGGGTAACTACTAGTAAAGGAAAGGCCTAATAGGCTG
+                                                            <<<<
+
+241 TTCCCATAAAAAGAAGCTAC
+    <<<<<<<<<<<<<<<<<   
+
+> Contig1_chr30_5992217_5993068 106 C T 1.078937
+
+  1 CTTGAGACAGCCATGGTGTTTGTTTCTACCTTTCCTCTAAGAAGACACCTGTATACAGAT
+       >>>>>>>>>>>>>>>>>>>>>                                    
+
+ 61 ATTCCnTGTGACTCACACTCATCCTCATAGACATCCCCAGTATCATTTCTGTGAAGCCTT
+                                                           <<<<<
+
+121 CCTTGACATTTTCCAACA
+    <<<<<<<<<<<<<<<   
+
+> Contig165_chr30_25804389_25804926 190 T C 0.328844
+
+  1 CCGCTTGTCCCGCTCTGTGATTTAGATGTTTCACGAGCGGGAAGGTGGGGGGATTGATTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCTnATTCGCGCTTCTCCGCCCAGGCTGCGCATTAGAATCACTTGGGGAGCTTTAAAACA
+                                               <<<<<<<<<<<<<<<<<
+
+121 TGCCAG
+    <<<   
+
+> Contig38_chr31_5164423_5166573 2074 C T
+
+> Contig17_chr31_26433828_26434459 498 T C 4.814134
+
+  1 CCATGCAATCTCATGCAATGGTTAAAAGCAATGAATTTGTTGTACTCAAAATACCTTGGT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TGGGTATTAAAAAGTTTTnAGTAAACATAATGAAAAAAATAGAAGTAGATAAGATCAACA
+                                                      <<<<<<<<<<
+
+121 CACCTACACGAATTA
+    <<<<<<<<<<<<   
+
+> Contig9_chr32_19479532_19479735 12 A G
+
+> Contig30_chr32_25902721_25905783 208 C G 0.322381
+ AluI,HindIII
+  1 TTCACAGTGTTCTCCCAAGGCACAAATAGAATGCTCAGTCATTGGTTATTTTACTTAGAT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATTCCTTTCTAAATATAGACTTACCTGTTATTTTTTCCTATACTAATAACATTCAAATTA
+                                                                
+
+121 TCTATGTGTACAATAATAAACACTAGGCATAACTGTATCTCAGTACCAATTTCCTTAGAA
+                                                                
+
+181 GGTAAAnCTTATTTCAGTCAAGGTCTAGGCCAAGCATTGA
+                     <<<<<<<<<<<<<<<<<<<<   
+
+> Contig18_chr33_22207246_22209159 1363 G T 2.559961
+
+  1 ACGACTGCCCTTTTTCCCTCTGTCTCTATTTCTCCTACACACACACACACACACACACAC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACACAgAGTGAGCTTTTAGCAACCTTGTTTAACATTTGGAAAGGAATAGCTGACACAACA
+                                                                
+
+121 GAGGGGGGnATAAGTAAATACATTGCATGGCTGTATATAATTGAACATTCTTCAAATTCT
+                                                                
+
+181 TTAAACAGAAATTTCAGTACCATGGAGATCCTTGAAAT
+                 <<<<<<<<<<<<<<<<<<<<<<   
+
+> Contig170_chr33_26189421_26189940 292 T C 0.307330
+
+  1 TTCAAGTTCCCCTTTTGTGCCTTCACACACTTGTnTTTATGGTCTCTATTTAAAAAAAAg
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 AGACAAAAACCTCTTAATAAATTTAGGAAGTAGTCCTCACTCTTTAAAGGAATTGTGCTT
+                                                                
+
+121 AAAGCAGCAGCTCTTCCTCACTCCTTG
+        <<<<<<<<<<<<<<<<<<<<   
+
+> Contig113_chr34_13341080_13341643 236 C T 0.412222
+ Hsp92II,NcoI,StyI
+  1 AAATGCTCATTTCCCAACATTCAGTGAAATGCCTGATGACTAATCCTTGCTCCaTGGATC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CTGGGGGTCCCGTGCACAGAAGAGGGTGAGGTCTCTGCCAnGGACTGTGTCCCTGGAAAT
+                                                  <<<<<<<<<<<<<<
+
+121 GACAGGGCA
+    <<<<<<   
+
+> Contig152_chr34_31794848_31795540 242 G A 2.779642
+
+  1 ATAGGAAATAAAACCCCAGCTCTCAGAGnAAAGCAAAATACTTTTAAAAAGATGAAAAAG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 CATCAGAGCTATGAGACACAGAAGATCTAGAGTATAATTGTGTTTTTGTATAGAAGGGAG
+                                                                
+
+121 AGAAGGAATGCTGCAGGAGCCACATTTCTCCATCTA
+                 <<<<<<<<<<<<<<<<<<<<   
+
+> Contig47_chr35_3666773_3667898 348 G T 0.234571
+
+  1 GCTGCCTCAGCAGTTATCTTGGGTTCTGTTAACTTTGACACACCTTTCACGAAGAAATTC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TTCATTGCAGTGCTTGAACAATCTGATTGTTCAATCTGATTnGATTCTATTTCTTGCTGA
+                                                                
+
+121 GATAATGTTCTAGCACCTTCTCTGTGGATCCCCTTAT
+                  <<<<<<<<<<<<<<<<<<<<   
+
+> Contig74_chr35_25394343_25394813 303 A T 4.297720
+
+  1 AGTTTCCCCAAATGTTCATGATTAACCAGGTAAACTGAAGATTAACCTTAAATATATATT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCTTTGAGTCATTATAATTAAATTAACTAGGTTGTTTTCAAATATACTAATAATAGAAAC
+                                                                
+
+121 TGAAAAAATAATCCAAGTAATATaTCTGAATTGAAAAAAAAAGTAAGGCCATTGTATAAA
+                                                                
+
+181 ACAACTGAAAGTTTTTGGAnAAGGTACTATTTTTAATTTACAGTGCATTTTTTTAATCGG
+                                                                
+
+241 CATTTCAAATAATAACTTCAATCaCACACACAAAAATAAACCAAATCAACTGCATGTAAG
+                                             <<<<<<<<<<<<<<<<<<<
+
+301 GGaAGT
+    <<<   
+
+> Contig5_chr36_4562983_4563634 343 C T 1.168507
+
+  1 ATATGAATGGTGGTGATGGATTCAGCATCTTGACTCTTTTTCAACTATGTCAAGATTTGC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACTGGATCTTGTCTAAAGTCACTCTTCTAGGGGAAGTCAAAGAGACTGGGTCaGTCCtCA
+                                                                
+
+121 AGATAcGATGTAAGCAGGTAAGATAGCACTATAGTAGGTCTTCTTGTCATGGTGAGTCAA
+                                                                
+
+181 TAACCATTCAATATTCTTTCnACCTACTCTTTACCTGCTCAATCAAGGTAGGGGTC
+                                     <<<<<<<<<<<<<<<<<<<<   
+
+> Contig133_chr36_32954045_32955409 136 A G 3.772017
+ TaqI
+  1 ATTAAATGAAAACAGTGTCAGGCAATAAGATGTATTAAGTACAGTATGCCTGAGGATATA
+       >>>>>>>>>>>>>>>>>>>>>>                                   
+
+ 61 ATATTAAACACAGATTCTGCTGTTACTATCnAAGTGGATATTAAAATAACAGTGCTACTT
+                                                                
+
+121 TGAGGGTAATGCTACTTTGGAGAATATTTTCTAATAAGCTCACCaTAAAATGACggATAA
+                                         <<<<<<<<<<<<<<<<<<<<   
+
+> Contig53_chr37_6665763_6665919 116 C T 10.874746
+ BstOI
+  1 AGTCCTCATGTTGTACTTTACCTCACCTGAATTTACTCATCtGATAGTTGGAAATTTGTA
+       >>>>>>>>>>>>>>>>>>>>>>>>                                 
+
+ 61 TCCATTGCCCATCtTCACCACCCCATGTCnCTGGAAACCAACAAtCTGTTCTCTGTATGa
+                                       <<<<<<<<<<<<<<<<<<<<<<<<<
+
+121 CTT
+       
+
+> Contig2_chr37_31197993_31198256 182 C T 0.594606
+
+  1 CTCTCACCACATGGAGAATCCTGTATGTTCAGCTGTATGACGTGGGGGGAACGTCAGAGC
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCAGTTTCATAGCAGTCAGCTCCATGTTATGGGTTCAAgAnGAAAACAGGTGGCAGGCtT
+                                                                
+
+121 GCCACAGCCTCCCTCAGGGGTGgCCTTGACAGATAAAcGT
+                     <<<<<<<<<<<<<<<<<<<<   
+
+> Contig7_chr38_12217200_12218387 1163 A T
+
+> Contig265_chrX_2689247_2689484 114 C G 9.232233
+
+  1 CTTAGAGAATTCCCTGATTCACTGAGTTAAATTATTACCAAATCTGATAATAATAAAAGA
+       >>>>>>>>>>>>>>>>>>>>>>>                                  
+
+ 61 AGTAATTACAGATCAATAATTAATCTATATGTCTGAATACATTTTAATAAGTCCnAcTCA
+                                                                
+
+121 ACAATATGCTGACAAAACAATACATCTTGTCT
+          <<<<<<<<<<<<<<<<<<<<<<<   
+
+> Contig113_chrX_26287829_26288398 385 C T 0.077485
+
+  1 AAAGCCGTAACAGTCGCTAGGAGAATCATAATTTTAAGCTTTGTGTGTCCCGGGcTTGAG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 TCCCTCAGGAGTAGTTAGATGCGGCCTTAAATTCTCcCAGTAAATTCACnTTGACGGCCT
+                                                                
+
+121 ATTTTTGACCTGGGGGCACACGCTGCTATACACTCTAGCCACCTCTGATCCTCTGGCCTC
+                                                                
+
+181 CTCTGTTACAATGACAGAAACGACAGAAGCATTTCTTTAAAATAAGTCCCAGTACGTGCA
+                                                                
+
+241 CACAAACGTTCAGGGCAGCCTTCTCCATAAACGGCACGAAATGGC
+                          <<<<<<<<<<<<<<<<<<<<   
+
+> Contig90_chrX_57430715_57431566 548 C T 0.153995
+ EcoRV
+  1 CTCATTCCCAGCTACCTCCACCTCTATACCAACCCCTAGTTCCTGTACATCCCTGCTTCT
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ATAGGAAATCTTCCTGGTGTTGATATnATTCCCAAGGTCAGGCTGTCCTCCTAGCTCCCT
+                                                                
+
+121 CTCCTCATCTGCATCAAGTCCTCCAAACTGGGCAGTAGAC
+                     <<<<<<<<<<<<<<<<<<<<   
+
+> Contig133_chrX_84833782_84834125 182 G A 0.277794
+
+  1 CACCAGAGTGCAATCGAGAACCATCTGATCACAGAACCATAGAAAAGATTGCTGTACAAG
+       >>>>>>>>>>>>>>>>>>>>                                     
+
+ 61 ACTTAGGAACTCATTCTGTTCAGGATGGAGAAGCTGATGCCCAAAAAGGGAAAGGAACTT
+                                                                
+
+121 AACCAAAGTCCATACAnTATCAACTCTACACATAAAGGAAGGGAGTGGAGGGAGCAGTAA
+                                                                
+
+181 GACCAGAGATATAGACCCCAGTGAGGAGGCTGTGAGCTCCTG
+                       <<<<<<<<<<<<<<<<<<<<   
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/find_intervals/find_intervals.interval
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/find_intervals/find_intervals.interval	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,1 @@
+chr2	9817960	67331624	1272.2000
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/map_ensembl_transcripts/map_ensembl_transcripts.tabular	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,150 @@
+ENSCAFT00000000001	476153	cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+ENSCAFT00000000144	483960	N
+ENSCAFT00000000160	610160	N
+ENSCAFT00000000215	U	N
+ENSCAFT00000000233	483973	N
+ENSCAFT00000000365	474414	cfa00450=Selenocompound metabolism.cfa00970=Aminoacyl-tRNA biosynthesis
+ENSCAFT00000000507	484023	N
+ENSCAFT00000000517	476233	N
+ENSCAFT00000000674	611986	N
+ENSCAFT00000000724	609478	N
+ENSCAFT00000000760	U	N
+ENSCAFT00000000762	U	N
+ENSCAFT00000001047	475067	cfa00240=Pyrimidine metabolism.cfa00410=beta-Alanine metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa00983=Drug metabolism - other enzymes.cfa01100=Metabolic pathways
+ENSCAFT00000001052	U	N
+ENSCAFT00000001063	481999	N
+ENSCAFT00000001076	U	N
+ENSCAFT00000001104	607591	N
+ENSCAFT00000001141	484064	N
+ENSCAFT00000001146	475076	N
+ENSCAFT00000001204	481203	N
+ENSCAFT00000001219	474465	N
+ENSCAFT00000001250	481729.481731	cfa04145=Phagosome.cfa04514=Cell adhesion molecules (CAMs).cfa04612=Antigen processing and presentation.cfa04672=Intestinal immune network for IgA production.cfa04940=Type I diabetes mellitus.cfa05140=Leishmaniasis.cfa05145=Toxoplasmosis.cfa05150=Staphylococcus aureus infection.cfa05152=Tuberculosis.cfa05164=Influenza A.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05310=Asthma.cfa05320=Autoimmune thyroid disease.cfa05322=Systemic lupus erythematosus.cfa05323=Rheumatoid arthritis.cfa05330=Allograft rejection.cfa05332=Graft-versus-host disease.cfa05416=Viral myocarditis
+ENSCAFT00000001352	482026	cfa00565=Ether lipid metabolism
+ENSCAFT00000001363	475084	cfa03022=Basal transcription factors
+ENSCAFT00000001421	484096	N
+ENSCAFT00000001523	475088	N
+ENSCAFT00000001575	481744	cfa04141=Protein processing in endoplasmic reticulum
+ENSCAFT00000001587	482035	N
+ENSCAFT00000001597	609411	N
+ENSCAFT00000002056	610014	N
+ENSCAFT00000002100	U	N
+ENSCAFT00000002110	481249	N
+ENSCAFT00000002175	476310	N
+ENSCAFT00000002259	484151	N
+ENSCAFT00000002460	481785	N
+ENSCAFT00000002537	U	N
+ENSCAFT00000002577	484157	N
+ENSCAFT00000002578	608906	N
+ENSCAFT00000002660	U	N
+ENSCAFT00000002792	474523	N
+ENSCAFT00000002849	475216	N
+ENSCAFT00000002999	U	N
+ENSCAFT00000003163	474921	cfa03040=Spliceosome
+ENSCAFT00000003223	474925	N
+ENSCAFT00000003307	609995	N
+ENSCAFT00000003515	482316	N
+ENSCAFT00000003560	U	N
+ENSCAFT00000003644	484216	cfa00970=Aminoacyl-tRNA biosynthesis
+ENSCAFT00000003824	475249	N
+ENSCAFT00000003840	482333	N
+ENSCAFT00000004092	474960	N
+ENSCAFT00000004103	484298	N
+ENSCAFT00000004208	481637	N
+ENSCAFT00000004253	100534006.100534007.474588	N
+ENSCAFT00000004311	482346	N
+ENSCAFT00000004464	481892	N
+ENSCAFT00000004511	481893	N
+ENSCAFT00000004609	611755	N
+ENSCAFT00000004673	611817	N
+ENSCAFT00000004726	610047	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa03430=Mismatch repair.cfa03440=Homologous recombination.cfa05166=HTLV-I infection
+ENSCAFT00000004799	U	N
+ENSCAFT00000004933	482382	cfa04621=NOD-like receptor signaling pathway.cfa05133=Pertussis
+ENSCAFT00000004993	474995	cfa03008=Ribosome biogenesis in eukaryotes
+ENSCAFT00000005126	U	N
+ENSCAFT00000005142	606804	N
+ENSCAFT00000005225	475647	N
+ENSCAFT00000005323	U	N
+ENSCAFT00000005467	U	N
+ENSCAFT00000005496	481925	N
+ENSCAFT00000005518	492302	cfa02010=ABC transporters.cfa04971=Gastric acid secretion.cfa04972=Pancreatic secretion.cfa04976=Bile secretion
+ENSCAFT00000005653	403417	cfa04145=Phagosome.cfa04620=Toll-like receptor signaling pathway.cfa05132=Salmonella infection.cfa05133=Pertussis.cfa05134=Legionellosis.cfa05140=Leishmaniasis.cfa05142=Chagas disease (American trypanosomiasis).cfa05144=Malaria.cfa05145=Toxoplasmosis.cfa05146=Amoebiasis.cfa05152=Tuberculosis.cfa05162=Measles.cfa05164=Influenza A.cfa05323=Rheumatoid arthritis
+ENSCAFT00000005746	476410	cfa00071=Fatty acid metabolism.cfa03320=PPAR signaling pathway.cfa04920=Adipocytokine signaling pathway
+ENSCAFT00000005749	610007	N
+ENSCAFT00000005832	403584	cfa04060=Cytokine-cytokine receptor interaction.cfa04630=Jak-STAT signaling pathway.cfa04672=Intestinal immune network for IgA production.cfa05166=HTLV-I infection.cfa05168=Herpes simplex infection.cfa05323=Rheumatoid arthritis
+ENSCAFT00000005972	475012	N
+ENSCAFT00000006025	482980	N
+ENSCAFT00000006114	483829	N
+ENSCAFT00000006157	475021	N
+ENSCAFT00000006219	483261	cfa04972=Pancreatic secretion.cfa04978=Mineral absorption
+ENSCAFT00000006272	484394	cfa00280=Valine, leucine and isoleucine degradation.cfa00290=Valine, leucine and isoleucine biosynthesis.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
+ENSCAFT00000006453	475893	N
+ENSCAFT00000006479	U	N
+ENSCAFT00000006507	484622	cfa03030=DNA replication.cfa04110=Cell cycle
+ENSCAFT00000006669	476094	N
+ENSCAFT00000006689	475897	N
+ENSCAFT00000006827	U	N
+ENSCAFT00000006891	610021	N
+ENSCAFT00000007130	485445	cfa04020=Calcium signaling pathway.cfa04080=Neuroactive ligand-receptor interaction
+ENSCAFT00000007145	607961	N
+ENSCAFT00000007244	476781	N
+ENSCAFT00000007375	403767	cfa04977=Vitamin digestion and absorption
+ENSCAFT00000007440	482516	N
+ENSCAFT00000007467	485576	N
+ENSCAFT00000007484	609336	N
+ENSCAFT00000007527	607108	N
+ENSCAFT00000007553	487123	cfa03450=Non-homologous end-joining.cfa05340=Primary immunodeficiency
+ENSCAFT00000007697	475382	N
+ENSCAFT00000007703	477019	cfa03430=Mismatch repair.cfa03460=Fanconi anemia pathway.cfa05200=Pathways in cancer.cfa05210=Colorectal cancer.cfa05213=Endometrial cancer
+ENSCAFT00000007747	U	N
+ENSCAFT00000007774	477021	cfa04510=Focal adhesion.cfa04512=ECM-receptor interaction.cfa04514=Cell adhesion molecules (CAMs).cfa04810=Regulation of actin cytoskeleton.cfa05410=Hypertrophic cardiomyopathy (HCM).cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC).cfa05414=Dilated cardiomyopathy
+ENSCAFT00000007776	U	N
+ENSCAFT00000007779	478007.478008	cfa03060=Protein export.cfa04141=Protein processing in endoplasmic reticulum.cfa04145=Phagosome
+ENSCAFT00000007859	483010	N
+ENSCAFT00000007951	U	N
+ENSCAFT00000007959	482810.611087	N
+ENSCAFT00000008012	485173	N
+ENSCAFT00000008063	484489	N
+ENSCAFT00000008142	476128	N
+ENSCAFT00000008198	612489	N
+ENSCAFT00000008413	U	N
+ENSCAFT00000008540	483021	N
+ENSCAFT00000008586	484499	N
+ENSCAFT00000008588	U	N
+ENSCAFT00000008673	478018	N
+ENSCAFT00000008678	485188	N
+ENSCAFT00000008728	U	N
+ENSCAFT00000008769	485523	cfa02010=ABC transporters.cfa04976=Bile secretion
+ENSCAFT00000008831	475398	N
+ENSCAFT00000009074	485769	cfa04330=Notch signaling pathway
+ENSCAFT00000009114	483354	N
+ENSCAFT00000009614	475416	N
+ENSCAFT00000009698	486001	N
+ENSCAFT00000009710	486002	N
+ENSCAFT00000010094	486223	cfa00230=Purine metabolism.cfa00240=Pyrimidine metabolism.cfa01100=Metabolic pathways.cfa03030=DNA replication.cfa03410=Base excision repair.cfa03420=Nucleotide excision repair.cfa05166=HTLV-I infection
+ENSCAFT00000010141	482857	cfa04360=Axon guidance
+ENSCAFT00000010439	610992	N
+ENSCAFT00000010496	415126	cfa04380=Osteoclast differentiation.cfa04916=Melanogenesis.cfa05200=Pathways in cancer.cfa05218=Melanoma
+ENSCAFT00000010516	U	N
+ENSCAFT00000010531	484693	N
+ENSCAFT00000010559	483405	N
+ENSCAFT00000010593	U	N
+ENSCAFT00000010616	474176	cfa03450=Non-homologous end-joining.cfa04110=Cell cycle
+ENSCAFT00000010630	486770	N
+ENSCAFT00000010829	486944	N
+ENSCAFT00000010865	U	N
+ENSCAFT00000010931	485368	N
+ENSCAFT00000010977	U	N
+ENSCAFT00000010988	482891	cfa04145=Phagosome
+ENSCAFT00000011187	475441	N
+ENSCAFT00000011380	U	N
+ENSCAFT00000011397	475750	cfa04110=Cell cycle.cfa04114=Oocyte meiosis.cfa04120=Ubiquitin mediated proteolysis.cfa04914=Progesterone-mediated oocyte maturation.cfa05166=HTLV-I infection
+ENSCAFT00000011721	475621	N
+ENSCAFT00000011730	486534	N
+ENSCAFT00000011771	477193	N
+ENSCAFT00000011789	609978	N
+ENSCAFT00000011968	488881	cfa00760=Nicotinate and nicotinamide metabolism.cfa04146=Peroxisome
+ENSCAFT00000012081	478082	cfa04621=NOD-like receptor signaling pathway
+ENSCAFT00000012133	611998	N
+ENSCAFT00000012159	484609	N
+ENSCAFT00000012254	U	N
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/modify_snp_table/modify.gd_snp
--- a/test-data/test_out/modify_snp_table/modify.gd_snp	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,338 +0,0 @@
-Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0
-Contig20_chr1_21313469_21313570	66	C	T	54.0	chr1	21313534	C	4	0	2	39	4	0	2	39	5	0	2	42	4	0	2	39	4	0	2	39	5	0	2	42	N	1	+99.	0
-Contig86_chr1_30984450_30985684	670	C	T	365.0	chr1	30985133	C	9	0	2	54	10	0	2	57	13	0	2	66	3	0	2	36	9	0	2	54	7	0	2	48	Y	145	0.031	0
-Contig5_chr1_32562160_32563940	1215	G	T	163.0	chr1	32563356	G	17	0	2	78	19	0	2	84	20	0	2	87	14	0	2	69	12	0	2	63	10	0	2	57	Y	17	0.251	0
-Contig110_chr1_33385093_33386888	510	C	T	270.0	chr1	33385587	A	14	0	2	69	11	0	2	60	19	0	2	84	11	0	2	60	10	0	2	57	13	0	2	66	Y	13	0.126	0
-Contig100_chr1_33562920_33564288	743	C	T	178.0	chr1	33563655	C	6	0	2	45	10	0	2	57	8	0	2	51	5	0	2	42	13	0	2	66	7	0	2	48	Y	13	0.090	3
-Contig7_chr1_37302355_37302489	97	A	G	59.2	chr1	37302452	G	3	0	2	36	8	0	2	51	5	0	2	42	8	0	2	51	7	0	2	48	6	0	2	45	N	56	2.812	0
-Contig62_chr1_41880715_41882180	1078	T	G	57.6	chr1	41881785	T	14	0	2	69	15	0	2	72	16	0	2	75	13	0	2	66	8	0	2	51	10	0	2	57	Y	21	0.477	0
-Contig47_chr1_48409178_48409384	37	C	T	134.0	chr1	48409215	T	5	0	2	42	6	0	2	45	8	0	2	51	9	0	2	54	4	0	2	39	6	0	2	45	N	66	+99.	0
-Contig119_chr1_49647683_49650077	1618	C	A	99.7	chr1	49649276	A	8	0	2	51	11	0	2	60	10	0	2	57	9	0	2	54	10	0	2	57	14	0	2	69	Y	16	0.166	0
-Contig21_chr1_60697952_60699446	307	G	A	51.9	chr1	60698265	G	12	0	2	63	9	0	2	54	4	0	2	39	6	0	2	45	9	0	2	54	4	0	2	39	Y	98	0.507	0
-Contig131_chr1_62319542_62320564	169	C	G	103.0	chr1	62319709	C	12	0	2	63	12	0	2	66	14	0	2	69	12	0	2	63	9	0	2	54	9	0	2	54	Y	73	0.307	1
-Contig14_chr1_63450425_63450680	101	T	A	102.0	chr1	63450530	T	8	0	2	51	10	0	2	57	18	0	2	81	8	0	2	51	8	0	2	34	8	0	2	51	N	99	1.085	0
-Contig83_chr1_63869778_63869942	40	T	C	23.7	chr1	63869819	C	5	0	2	42	7	0	2	48	2	0	2	33	4	0	2	39	6	0	2	48	4	0	2	39	N	654	1.364	0
-Contig30_chr1_64702572_64703138	178	A	T	117.0	chr1	64702750	T	10	0	2	57	10	0	2	57	20	0	2	87	21	0	2	90	6	0	2	45	12	0	2	63	Y	50	3.872	0
-Contig101_chr1_69868406_69868872	287	G	A	14.6	chr1	69868689	G	13	0	2	66	17	0	2	78	10	0	2	57	8	0	2	51	7	0	2	48	8	0	2	51	N	137	0.305	0
-Contig35_chr1_74482577_74482791	170	G	A	45.4	chr1	74482751	A	3	0	2	36	4	0	2	39	13	0	2	66	2	0	2	33	5	0	2	42	2	0	2	33	N	20	+99.	3
-Contig49_chr1_83865731_83865944	85	G	A	34.1	chr1	-1	N	4	0	2	39	4	0	2	39	8	0	2	51	2	0	2	33	5	0	2	42	4	0	2	39	N	-1	1.485	0
-Contig129_chr1_117547123_117548666	926	G	A	126.0	chr1	117548059	G	19	0	2	84	9	0	2	54	11	0	2	60	10	0	2	57	12	0	2	63	11	0	2	60	Y	64	0.049	0
-Contig7_chr1_125154638_125154844	190	G	T	130.0	chr1	125154818	A	5	0	2	42	4	0	2	39	7	0	2	48	2	0	2	33	7	0	2	48	4	0	2	39	N	33	+99.	0
-Contig222_chr2_9817738_9818143	220	C	T	888.0	chr2	9817960	C	17	0	2	78	12	0	2	63	20	0	2	87	8	0	2	51	11	0	2	60	12	0	2	63	Y	76	0.093	1
-Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
-Contig10_chr2_40859744_40860534	637	G	A	888.0	chr2	40860397	A	3	0	2	36	3	0	2	36	2	0	2	33	7	0	2	48	6	0	2	45	8	0	2	51	Y	42	1.435	0
-Contig52_chr2_41421981_41422725	604	C	A	888.0	chr2	41422583	A	17	0	2	78	18	0	2	81	14	0	2	69	17	0	2	78	12	0	2	63	14	0	2	69	Y	44	0.882	0
-Contig94_chr2_43869105_43870358	220	G	A	888.0	chr2	43869333	G	12	0	2	63	18	0	2	81	11	0	2	60	15	0	2	72	12	0	2	63	13	0	2	66	Y	1	0.156	0
-Contig34_chr2_48444129_48444939	695	C	T	134.0	chr2	48444828	C	14	0	2	69	8	0	2	51	16	0	2	75	17	0	2	78	9	0	2	54	15	0	2	72	Y	161	0.375	0
-Contig6_chr2_56859179_56859956	671	T	C	999.9	chr2	56859851	T	15	0	2	72	18	0	2	81	20	0	2	90	19	0	2	84	19	0	2	84	24	0	2	99	N	28	5.308	1
-Contig115_chr2_61631913_61632510	310	G	T	999.3	chr2	61632216	G	7	0	2	48	9	0	2	54	7	0	2	48	11	0	2	60	10	0	2	57	10	0	2	57	N	13	0.184	0
-Contig31_chr2_67331584_67331785	39	C	T	999.0	chr2	67331623	C	11	0	2	60	10	0	2	57	7	0	2	48	9	0	2	54	2	0	2	33	4	0	2	39	N	110	0.647	1
-Contig92_chr2_75906683_75907774	773	T	C	85.4	chr2	75907438	C	12	0	2	63	12	0	2	63	17	0	2	78	8	0	2	51	8	0	2	51	13	0	2	66	Y	93	0.166	0
-Contig163_chr2_76402959_76404830	221	C	T	127.0	chr2	76403181	C	4	0	2	42	10	0	2	57	9	0	2	54	11	0	2	60	7	0	2	48	9	0	2	54	Y	54	0.178	1
-Contig59_chr2_85243022_85243758	506	G	A	96.3	chr2	85243509	T	9	0	2	54	11	0	2	60	12	0	2	63	14	0	2	69	10	0	2	57	7	0	2	48	Y	6	0.459	0
-Contig56_chr3_17326225_17327548	387	G	C	91.2	chr3	17326591	G	14	0	2	69	13	0	2	66	15	0	2	72	15	0	2	72	13	0	2	66	12	0	2	63	Y	20	0.225	3
-Contig108_chr3_46210055_46210874	367	A	G	21.0	chr3	46210423	A	19	0	2	84	10	0	2	57	16	0	2	75	14	0	2	69	20	0	2	87	11	0	2	60	N	236	0.028	1
-Contig16_chr3_47113407_47114449	322	G	A	105.0	chr3	47113713	G	13	0	2	66	17	0	2	78	15	0	2	72	6	0	2	45	11	0	2	60	11	0	2	60	Y	114	0.132	5
-Contig3_chr3_47564810_47565251	262	T	G	112.0	chr3	47565104	T	14	0	2	69	16	0	2	75	20	0	2	87	10	0	2	57	9	0	2	54	8	0	2	51	Y	24	0.073	1
-Contig35_chr3_49662401_49662929	270	A	T	96.1	chr3	49662652	A	14	0	2	69	11	0	2	60	23	0	2	96	13	0	2	66	12	0	2	63	11	0	2	60	Y	36	3.583	2
-Contig97_chr3_49820354_49821631	1069	G	A	44.1	chr3	49821402	G	9	0	2	54	9	0	2	54	6	0	2	45	10	0	2	57	5	0	2	42	8	0	2	51	N	6	0.201	2
-Contig25_chr3_53260697_53262560	402	G	A	211.0	chr3	53261095	G	17	0	2	78	14	0	2	69	15	0	2	75	12	0	2	63	14	0	2	69	12	0	2	63	Y	116	1.033	0
-Contig11_chr3_53992739_53995954	2392	G	A	82.4	chr3	53995143	A	12	0	2	66	11	0	2	60	14	0	2	69	6	0	2	45	11	0	2	60	17	0	2	78	Y	358	0.321	1
-Contig236_chr3_72676275_72676473	128	G	A	278.0	chr3	72676410	G	12	0	2	63	11	0	2	60	13	0	2	66	10	0	2	57	11	0	2	60	8	0	2	51	N	36	0.496	1
-Contig48_chr3_74792236_74792388	63	T	C	111.0	chr3	74792289	-	17	0	2	78	9	0	2	54	9	0	2	54	5	0	2	42	11	0	2	60	9	0	2	54	N	-1	3.528	0
-Contig65_chr3_80727952_80728283	39	T	C	71.2	chr3	80727990	T	7	0	2	48	3	0	2	36	8	0	2	51	6	0	2	45	8	0	2	51	11	0	2	60	N	22	7.078	0
-Contig53_chr3_86407941_86409349	1406	G	A	86.9	chr3	86409317	A	5	0	2	42	5	0	2	42	4	0	2	39	10	0	2	57	8	0	2	51	12	0	2	63	N	14	3.285	1
-Contig13_chr3_92409738_92412300	718	A	G	23.3	chr3	92410450	A	12	0	2	63	16	0	2	75	18	0	2	81	13	0	2	66	22	0	2	93	7	0	2	48	Y	23	0.224	2
-Contig134_chr4_12145648_12148225	1326	C	T	164.0	chr4	12146961	C	9	0	2	54	8	0	2	51	7	0	2	48	3	0	2	36	5	0	2	42	5	0	2	42	Y	4	0.080	1
-Contig88_chr4_15557471_15557833	268	A	G	145.0	chr4	15557737	A	6	0	2	45	6	0	2	45	11	0	2	60	9	0	2	54	5	0	2	42	6	0	2	45	Y	46	4.138	0
-Contig53_chr4_18823968_18824478	149	A	G	91.3	chr4	18824115	A	18	0	2	81	15	0	2	72	21	0	2	90	13	0	2	66	9	0	2	54	12	0	2	63	N	51	0.251	0
-Contig19_chr4_26233601_26233991	146	G	C	51.6	chr4	26233744	G	10	0	2	57	8	0	2	51	9	0	2	54	5	0	2	42	9	0	2	54	4	0	2	39	N	41	0.163	3
-Contig78_chr4_28579975_28580134	30	T	G	19.6	chr4	28579994	-	4	0	2	39	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	2	0	2	33	N	33	0.499	0
-Contig16_chr4_30177226_30179725	621	C	T	88.4	chr4	30177859	C	20	0	2	87	13	0	2	66	13	0	2	66	11	0	2	60	8	0	2	51	8	0	2	51	Y	45	0.797	1
-Contig30_chr4_46196500_46197672	1045	A	C	33.4	chr4	46197522	C	16	0	2	75	9	0	2	54	4	0	2	39	7	0	2	48	14	0	2	69	6	0	2	45	Y	43	0.306	0
-Contig2_chr4_47039007_47039323	158	G	C	35.1	chr4	47039160	-	8	0	2	51	9	0	2	54	13	0	2	66	8	0	2	51	10	0	2	60	9	0	2	54	N	0	0.131	0
-Contig17_chr4_61310346_61311158	267	C	T	49.9	chr4	61310604	T	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	14	0	2	69	7	0	2	48	Y	219	0.098	0
-Contig26_chr4_64190783_64191295	64	A	G	162.0	chr4	64190843	A	10	0	2	57	6	0	2	45	20	0	2	87	12	0	2	63	17	0	2	78	7	0	2	48	Y	306	7.428	0
-Contig11_chr4_65500960_65501654	634	T	C	107.0	chr4	65501585	T	13	0	2	66	14	0	2	69	13	0	2	66	13	0	2	66	6	0	2	45	18	0	2	81	Y	10	6.849	0
-Contig38_chr4_67768488_67768982	113	A	G	102.0	chr4	67768598	A	9	0	2	54	8	0	2	51	9	0	2	54	11	0	2	60	10	0	2	57	7	0	2	48	Y	188	3.175	0
-Contig30_chr4_70978564_70979580	596	A	G	164.0	chr4	70979151	A	15	0	2	72	12	0	2	63	20	0	2	87	14	0	2	69	15	0	2	72	15	0	2	72	Y	111	2.458	2
-Contig72_chr4_74225793_74226492	674	A	G	110.0	chr4	74226472	A	5	0	2	42	3	0	2	36	2	0	2	33	3	0	2	36	7	0	2	48	4	0	2	39	Y	115	+99.	1
-Contig32_chr4_75618955_75620254	301	T	C	333.0	chr4	75619257	C	10	0	2	57	8	0	2	51	12	0	2	63	20	0	2	87	12	0	2	63	14	0	2	69	Y	34	0.163	2
-Contig31_chr5_4734956_4736547	1166	C	T	133.0	chr5	4736132	C	14	0	2	69	8	0	2	51	17	0	2	78	4	0	2	39	9	0	2	54	12	0	2	63	Y	1	0.021	0
-Contig30_chr5_15698241_15699076	396	G	T	76.6	chr5	15698633	T	8	0	2	51	9	0	2	54	10	0	2	57	7	0	2	48	11	0	2	60	8	0	2	54	Y	65	0.009	0
-Contig36_chr5_17709244_17710004	373	T	C	281.0	chr5	17709624	T	6	0	2	45	9	0	2	54	7	0	2	48	4	0	2	39	10	0	2	57	4	0	2	39	Y	16	0.131	0
-Contig13_chr5_21881138_21881562	227	A	G	251.0	chr5	21881356	A	11	0	2	60	20	0	2	87	22	0	2	93	10	0	2	57	10	0	2	57	21	0	2	90	Y	182	2.013	0
-Contig5_chr5_23188121_23190168	1841	C	T	141.0	chr5	23189975	C	20	0	2	87	19	0	2	84	22	0	2	93	16	0	2	75	18	0	2	81	14	0	2	69	N	45	0.355	0
-Contig6_chr5_26899813_26900498	97	A	C	88.6	chr5	26899910	A	15	0	2	72	14	0	2	69	27	0	2	108	15	0	2	72	13	0	2	69	12	0	2	63	Y	92	7.370	3
-Contig314_chr5_34019166_34019319	72	C	A	20.1	chr5	-1	N	6	0	2	45	9	0	2	54	4	0	2	39	4	0	2	39	9	0	2	54	5	0	2	42	N	-1	+99.	4
-Contig147_chr5_38980258_38980559	221	C	T	40.8	chr5	38980477	C	15	0	2	72	15	0	2	72	19	0	2	84	10	0	2	57	12	0	2	63	20	0	2	87	Y	11	4.576	0
-Contig115_chr5_48119079_48120169	151	C	T	78.3	chr5	48119234	C	17	0	2	78	10	0	2	57	14	0	2	69	16	0	2	75	8	0	2	51	12	0	2	63	Y	205	0.320	0
-Contig45_chr5_50892738_50892968	169	C	A	25.8	chr5	50892911	C	10	0	2	57	7	0	2	48	10	0	2	60	6	0	2	45	6	0	2	45	13	0	2	66	N	244	0.497	1
-Contig40_chr5_51484164_51484696	14	A	G	53.3	chr5	51484180	A	6	0	2	45	4	0	2	39	4	0	2	39	3	0	2	36	0	0	2	13	3	0	2	36	N	63	+99.	1
-Contig40_chr5_51664286_51667573	861	C	T	148.0	chr5	51665149	C	20	0	2	87	21	0	2	90	20	0	2	87	11	0	2	60	16	0	2	75	15	0	2	72	Y	207	0.080	1
-Contig15_chr5_51889708_51891244	882	A	G	149.0	chr5	51890581	G	13	0	2	66	18	0	2	81	17	0	2	78	22	0	2	93	15	0	2	72	22	0	2	93	Y	7	0.025	1
-Contig143_chr5_57231364_57232010	294	T	C	78.5	chr5	57231644	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	10	0	2	57	6	0	2	45	Y	73	0.337	2
-Contig13_chr5_57609985_57610584	496	C	T	50.5	chr5	57610476	C	17	0	2	78	9	0	2	54	6	0	2	45	8	0	2	51	10	0	2	57	12	0	2	63	N	77	2.022	1
-Contig230_chr5_58486998_58487280	227	T	C	192.0	chr5	58487232	T	3	0	2	36	4	0	2	39	9	0	2	54	6	0	2	45	4	0	2	39	7	0	2	48	N	24	0.100	2
-Contig32_chr5_70852360_70853289	282	G	A	114.0	chr5	70852623	G	16	0	2	75	11	0	2	60	13	0	2	66	12	0	2	63	13	0	2	66	7	0	2	48	Y	33	0.276	0
-Contig100_chr5_71189678_71190590	813	C	T	30.8	chr5	71190523	C	11	0	2	60	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	Y	8	0.362	1
-Contig45_chr5_76133561_76134403	388	A	G	103.0	chr5	76133941	G	3	0	2	36	8	0	2	51	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	57	0.038	0
-Contig61_chr5_90202541_90204393	909	C	T	101.0	chr5	90203461	T	7	0	2	48	5	0	2	42	14	0	2	69	3	0	2	36	5	0	2	42	8	0	2	51	Y	64	1.448	0
-Contig111_chr6_5821219_5822519	1060	A	G	68.1	chr6	5822321	T	7	0	2	48	6	0	2	45	11	0	2	60	9	0	2	54	3	0	2	36	12	0	2	63	Y	7	0.231	1
-Contig220_chr6_10671338_10672441	999	T	C	36.3	chr6	10672322	T	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	5	0	2	42	9	0	2	54	Y	1	1.667	0
-Contig226_chr6_17361986_17362884	418	G	C	251.0	chr6	17362406	G	6	0	2	45	8	0	2	51	7	0	2	48	9	0	2	54	7	0	2	48	7	0	2	48	Y	7	0.147	0
-Contig51_chr6_20231207_20231785	161	A	G	70.5	chr6	20231375	G	13	0	2	66	5	0	2	42	8	0	2	51	2	0	2	36	5	0	2	42	5	0	2	42	Y	153	1.754	0
-Contig102_chr6_30271329_30271577	39	T	G	139.0	chr6	30271371	G	3	0	2	36	4	0	2	39	6	0	2	45	1	0	2	30	4	0	2	39	4	0	2	39	N	15	1.159	0
-Contig217_chr6_31393824_31394218	97	G	A	115.0	chr6	31393921	G	9	0	2	54	19	0	2	84	15	0	2	72	12	0	2	63	7	0	2	48	10	0	2	57	N	45	0.477	0
-Contig186_chr6_31928098_31928245	73	G	A	117.0	chr6	-1	N	5	0	2	42	8	0	2	51	2	0	2	33	4	0	2	39	1	0	2	30	5	0	2	42	N	-1	0.276	1
-Contig52_chr6_33188498_33188724	123	G	A	59.0	chr6	-1	N	5	0	2	42	13	0	2	66	8	0	2	51	4	0	2	39	9	0	2	54	9	0	2	54	N	-1	0.880	1
-Contig102_chr6_38743009_38743435	290	A	G	178.0	chr6	38743311	A	11	0	2	60	13	0	2	66	9	0	2	54	11	0	2	60	12	0	2	63	13	0	2	66	Y	34	0.148	4
-Contig81_chr6_49018353_49019532	179	C	A	72.5	chr6	49018530	A	15	0	2	72	13	0	2	66	19	0	2	72	8	0	2	51	12	0	2	63	16	0	2	75	Y	15	0.145	1
-Contig112_chr6_51024554_51024851	100	A	G	121.0	chr6	51024654	A	10	0	2	57	12	0	2	63	9	0	2	54	13	0	2	66	14	0	2	69	17	0	2	78	N	75	4.287	0
-Contig40_chr6_51412751_51413807	227	T	C	94.5	chr6	51412975	C	5	0	2	42	8	0	2	51	7	0	2	48	9	0	2	54	11	0	2	60	10	0	2	57	Y	4	5.661	0
-Contig47_chr6_69073222_69074767	1315	T	C	212.0	chr6	69074558	T	20	0	2	87	17	0	2	78	18	0	2	81	12	0	2	63	17	0	2	78	7	0	2	48	Y	9	0.652	0
-Contig30_chr6_74848932_74849059	57	C	G	46.3	chr6	74848993	C	7	0	2	48	7	0	2	33	6	0	2	45	7	0	2	48	5	0	2	42	6	0	2	45	N	-1	+99.	1
-Contig84_chr7_6648683_6650255	1297	G	A	110.0	chr7	6649988	G	18	0	2	81	9	0	2	54	22	0	2	77	16	0	2	75	20	0	2	87	6	0	2	45	Y	83	0.166	0
-Contig239_chr7_13007379_13007700	275	A	G	39.8	chr7	13007642	A	8	0	2	51	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	5	0	2	42	N	46	1.511	3
-Contig119_chr7_18310707_18310948	23	A	T	133.0	chr7	18310729	A	6	0	2	45	5	0	2	42	10	0	2	57	5	0	2	42	2	0	2	33	2	0	2	33	N	4553	+99.	0
-Contig93_chr7_18513377_18513741	173	T	C	130.0	chr7	18513533	C	15	0	2	72	11	0	2	60	18	0	2	81	6	0	2	45	10	0	2	57	14	0	2	69	Y	115	0.174	0
-Contig133_chr7_19603333_19603776	414	C	G	31.9	chr7	19603734	G	10	0	2	57	4	0	2	39	4	0	2	39	5	0	2	42	9	0	2	54	9	0	2	54	N	78	+99.	5
-Contig132_chr7_20426224_20428145	1815	A	G	28.3	chr7	20428041	A	11	1	2	43	12	0	2	63	19	0	2	84	23	0	2	96	14	0	2	69	10	0	2	57	N	11	0.264	0
-Contig206_chr7_26281823_26282074	103	C	A	101.0	chr7	26281925	T	11	0	2	60	16	0	2	61	19	0	2	84	6	0	2	45	19	0	2	84	16	0	2	75	N	-1	0.947	1
-Contig55_chr7_53147505_53148974	894	A	G	68.4	chr7	53148397	G	22	0	2	93	13	0	2	66	16	0	2	75	8	0	2	51	16	0	2	75	11	0	2	60	Y	19	0.060	0
-Contig4_chr7_53685534_53688206	1709	C	G	76.2	chr7	53687225	C	18	0	2	81	17	0	2	78	18	0	2	81	15	0	2	72	14	0	2	69	14	0	2	69	Y	32	0.659	1
-Contig61_chr7_55832923_55834065	506	T	C	185.0	chr7	55833450	C	9	0	2	54	10	0	2	57	22	0	2	93	12	0	2	63	12	0	2	63	7	0	2	48	Y	1	0.019	0
-Contig91_chr8_12804505_12805470	409	C	A	111.0	chr8	12804906	C	8	0	2	51	10	0	2	57	15	0	2	72	12	0	2	63	14	0	2	69	15	0	2	72	N	145	0.175	0
-Contig8_chr8_27811135_27812620	333	C	T	37.9	chr8	27811458	C	4	0	2	39	11	0	2	60	18	0	2	81	5	0	2	42	6	0	2	45	5	0	2	42	Y	1	0.272	0
-Contig66_chr8_28273102_28273660	175	G	C	81.6	chr8	28273263	T	9	0	2	54	17	0	2	78	19	0	2	84	8	0	2	51	16	0	2	75	19	0	2	84	Y	3	2.735	0
-Contig84_chr8_31375511_31376456	443	T	C	125.0	chr8	31375954	T	10	0	2	57	15	0	2	72	27	0	2	108	18	0	2	81	16	0	2	75	9	0	2	54	Y	2	0.650	0
-Contig18_chr8_32575859_32577431	264	T	C	151.0	chr8	32576124	T	20	0	2	87	14	0	2	69	17	0	2	78	14	0	2	69	13	0	2	66	14	0	2	69	Y	17	0.915	1
-Contig54_chr8_40913908_40916451	1275	G	A	175.0	chr8	40915190	G	10	0	2	57	8	0	2	51	11	0	2	60	7	0	2	48	8	0	2	51	9	0	2	54	Y	21	0.056	3
-Contig93_chr8_44658786_44659075	180	T	G	55.3	chr8	44658964	T	4	0	2	39	3	0	2	36	6	0	2	45	5	0	2	45	5	0	2	42	4	0	2	39	N	14	0.188	0
-Contig66_chr8_58562376_58563446	345	C	G	5.74	chr8	58562721	C	14	0	2	69	12	0	2	63	9	0	2	57	10	0	2	57	9	0	2	54	10	0	2	57	Y	6	0.685	0
-Contig44_chr8_71186368_71188207	1455	G	T	147.0	chr8	71187818	G	4	10	1	74	3	0	2	36	20	0	2	87	12	0	2	63	8	0	2	51	10	0	2	57	Y	88	0.036	0
-Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
-Contig96_chr9_39008495_39009278	215	A	C	98.7	chr9	39008708	C	7	0	2	48	13	0	2	66	28	0	2	111	16	0	2	75	17	0	2	78	17	0	2	78	Y	8	0.427	1
-Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
-Contig63_chr10_42716594_42719945	1018	A	G	88.7	chr10	42717616	G	13	0	2	66	14	0	2	69	13	0	2	66	12	0	2	63	18	0	2	81	5	0	2	42	Y	25	1.740	0
-Contig22_chr10_43255307_43255570	81	C	A	37.2	chr10	43255383	C	15	0	2	72	18	0	2	81	22	0	2	93	16	0	2	75	11	0	2	60	12	0	2	63	N	62	0.450	0
-Contig9_chr10_51475063_51476054	770	C	T	57.3	chr10	51475839	C	6	0	2	45	16	0	2	75	16	0	2	75	13	0	2	66	9	0	2	54	9	2	2	21	N	80	0.394	0
-Contig42_chr10_53816543_53818392	1642	G	A	27.5	chr10	53818172	A	7	0	2	48	13	0	2	66	17	0	2	78	14	0	2	69	19	0	2	84	16	0	2	75	N	1	0.433	0
-Contig36_chr10_53992615_53993741	229	G	C	86.2	chr10	53992846	G	17	0	2	78	14	0	2	69	13	0	2	66	15	0	2	72	12	0	2	63	15	0	2	72	N	23	1.912	0
-Contig20_chr10_58141129_58141750	575	C	T	46.1	chr10	58141701	C	7	0	2	48	8	0	2	51	9	0	2	54	3	0	2	36	4	0	2	39	9	0	2	54	N	1	4.264	0
-Contig26_chr10_59510973_59511899	146	C	A	29.0	chr10	59511126	C	8	0	2	51	13	0	2	66	18	0	2	81	13	0	2	66	10	0	2	57	7	0	2	48	Y	208	1.077	0
-Contig72_chr11_7142765_7143772	146	G	A	152.0	chr11	7142911	A	8	0	2	51	8	0	2	51	24	0	2	99	10	0	2	57	17	0	2	78	11	0	2	60	Y	90	1.137	0
-Contig9_chr11_9904571_9905983	1284	C	T	151.0	chr11	9905857	C	16	0	2	75	19	0	2	84	17	0	2	78	16	0	2	75	12	0	2	63	13	1	2	44	Y	11	0.422	1
-Contig7_chr11_40017076_40017630	352	C	T	46.3	chr11	40017422	C	7	0	2	48	9	0	2	54	6	0	2	45	8	0	2	51	16	0	2	75	9	0	2	54	Y	44	0.336	0
-Contig108_chr11_42953408_42955156	367	A	G	89.4	chr11	42953779	A	17	0	2	78	11	0	2	60	14	0	2	69	20	0	2	87	14	0	2	69	17	0	2	78	Y	118	0.784	1
-Contig16_chr11_53408448_53408790	187	A	G	153.0	chr11	53408638	A	7	0	2	48	9	0	2	54	18	0	2	81	10	0	2	57	11	0	2	60	12	0	2	63	Y	116	1.367	0
-Contig21_chr12_18403415_18404381	586	G	T	34.5	chr12	18403983	-	13	0	2	66	16	0	2	75	25	0	2	102	12	0	2	63	12	0	2	63	14	0	2	69	Y	12	0.068	0
-Contig33_chr12_19804073_19804529	178	T	C	69.4	chr12	19804261	T	13	0	2	66	13	0	2	66	22	0	2	93	11	0	2	60	12	0	2	63	18	0	2	81	Y	11	1.571	0
-Contig41_chr12_25565452_25566993	475	G	T	6.29	chr12	25565926	G	15	0	2	72	14	0	2	69	10	0	2	57	15	0	2	72	18	0	2	81	19	0	2	84	N	10	2.231	1
-Contig9_chr12_27204351_27204696	239	A	G	145.0	chr12	27204587	A	7	0	2	48	8	0	2	51	12	0	2	63	8	0	2	51	11	0	2	60	11	0	2	60	Y	14	0.046	0
-Contig45_chr12_30548282_30550498	448	C	T	124.0	chr12	30548703	-	9	0	2	54	11	0	2	60	22	0	2	93	19	0	2	84	12	0	2	63	12	0	2	63	Y	66	0.305	0
-Contig46_chr12_35571846_35572563	58	G	C	83.2	chr12	35571906	G	4	0	2	39	10	0	2	57	11	0	2	60	6	0	2	45	10	0	2	57	6	0	2	45	Y	55	+99.	1
-Contig28_chr12_42075871_42076044	136	G	A	134.0	chr12	42076006	A	6	0	2	45	5	0	2	42	7	0	2	48	7	0	2	48	2	0	2	33	4	0	2	39	N	3	9.479	0
-Contig16_chr12_42386141_42387454	194	A	G	161.0	chr12	42386323	A	11	0	2	60	8	0	2	54	23	0	2	96	17	0	2	78	6	0	2	45	13	0	2	66	Y	7	0.927	1
-Contig42_chr12_44424628_44425829	255	A	G	84.4	chr12	44424879	A	12	0	2	63	19	0	2	84	23	0	2	96	15	0	2	72	18	0	2	81	14	0	2	69	Y	18	1.190	2
-Contig10_chr12_44447953_44449698	63	C	T	105.0	chr12	44448020	C	11	0	2	60	9	0	2	54	12	0	2	63	10	0	2	57	15	0	2	72	8	0	2	51	Y	31	11.791	0
-Contig5_chr12_53880670_53882675	1221	A	C	99.4	chr12	53881888	A	16	0	2	75	18	0	2	81	23	0	2	96	10	0	2	57	15	0	2	72	17	0	2	78	Y	31	0.061	0
-Contig86_chr12_56715356_56716464	818	T	C	166.0	chr12	56716164	T	20	0	2	87	16	0	2	75	16	0	2	75	14	0	2	69	13	0	2	66	7	0	2	48	Y	22	1.092	0
-Contig3_chr12_65021967_65024097	238	T	G	92.6	chr12	65022205	T	17	0	2	78	14	0	2	69	16	0	2	75	9	0	2	54	13	0	2	66	15	0	2	72	Y	258	0.117	0
-Contig43_chr12_66499742_66500010	121	G	T	41.5	chr12	66499866	G	12	0	2	63	4	0	2	39	8	0	2	51	6	0	2	45	10	0	2	57	6	0	2	45	N	42	0.421	0
-Contig14_chr12_71364692_71365311	20	A	C	103.0	chr12	71364712	A	7	0	2	48	3	0	2	36	5	0	2	42	1	0	2	30	2	0	2	33	3	0	2	36	Y	35	+99.	0
-Contig37_chr13_15910164_15910426	245	G	A	32.9	chr13	-1	N	3	4	1	41	4	0	2	39	3	0	2	36	4	0	2	39	3	0	2	36	10	0	2	57	N	-1	2.159	1
-Contig107_chr13_26045881_26046290	341	C	G	81.4	chr13	26046230	C	16	0	2	75	20	0	2	90	14	0	2	69	15	0	2	72	9	0	2	54	9	0	2	54	Y	51	4.510	0
-Contig251_chr13_28498333_28501066	864	T	G	296.0	chr13	28499180	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	5	0	2	42	6	0	2	45	Y	9	0.068	0
-Contig154_chr13_36777857_36778736	356	G	A	95.5	chr13	36778225	A	6	0	2	45	11	0	2	60	11	0	2	60	9	0	2	54	13	0	2	66	8	0	2	51	Y	59	0.192	0
-Contig37_chr13_42529793_42530857	150	G	T	192.0	chr13	42529926	G	18	0	2	81	14	0	2	69	16	0	2	75	14	0	2	69	8	0	2	51	11	0	2	60	N	22	0.795	5
-Contig47_chr13_47045833_47046626	257	A	C	28.5	chr13	47046097	A	13	0	2	66	10	0	2	57	17	0	2	78	20	0	2	87	15	0	2	72	9	0	2	57	N	129	0.468	0
-Contig42_chr13_47730018_47730856	254	A	G	75.1	chr13	47730294	A	13	0	2	66	6	0	2	45	12	0	2	63	9	0	2	54	16	0	2	75	11	0	2	63	Y	630	0.049	1
-Contig55_chr13_53467708_53468101	221	T	G	132.0	chr13	53467925	T	25	0	2	102	12	0	2	63	26	0	2	105	7	0	2	48	16	0	2	75	16	0	2	75	N	20	5.717	1
-Contig49_chr13_55103679_55105532	503	G	A	76.0	chr13	55104178	G	21	0	2	90	19	0	2	84	18	0	2	81	20	0	2	87	8	9	1	89	17	0	2	78	Y	20	0.259	1
-Contig66_chr13_66021813_66022244	319	C	T	125.0	chr13	66022136	C	11	0	2	60	16	0	2	75	15	0	2	75	12	0	2	63	17	0	2	78	8	0	2	51	N	14	0.055	3
-Contig48_chr14_11839435_11843272	3014	A	G	163.0	chr14	11842446	A	10	0	2	57	8	0	2	51	13	0	2	66	10	0	2	57	5	0	2	42	10	0	2	57	Y	31	0.908	0
-Contig9_chr14_23353717_23354432	80	G	A	61.3	chr14	23353797	G	3	0	2	36	6	0	2	45	11	0	2	60	8	0	2	51	4	0	2	39	2	4	1	35	Y	11	0.444	0
-Contig14_chr14_24131180_24133488	1633	G	A	131.0	chr14	24132818	G	21	0	2	90	16	0	2	75	12	0	2	63	10	0	2	57	11	0	2	60	20	0	2	87	Y	36	0.347	0
-Contig28_chr14_26905747_26909514	975	G	C	3.13	chr14	26906723	G	16	0	2	75	10	0	2	57	12	0	2	63	15	0	2	72	10	0	2	57	7	0	2	48	N	287	0.117	2
-Contig14_chr14_29616948_29618316	109	G	A	80.3	chr14	29617053	-	17	0	2	78	16	0	2	75	16	0	2	75	10	0	2	57	17	0	2	78	19	0	2	84	Y	32	1.051	0
-Contig76_chr14_30028102_30029179	1046	C	T	38.5	chr14	30029169	T	3	0	2	36	6	0	2	45	9	0	2	54	7	0	2	48	9	0	2	54	8	0	2	51	Y	96	+99.	0
-Contig115_chr14_31417207_31417574	259	A	G	12.1	chr14	31417454	G	13	0	2	66	15	0	2	72	21	0	2	90	12	0	2	63	13	0	2	66	9	0	2	54	N	28	5.379	2
-Contig70_chr14_46653662_46653790	111	G	A	46.7	chr14	46653768	G	7	0	2	48	5	0	2	42	11	0	2	60	11	0	2	60	8	0	2	51	10	0	2	57	N	21	+99.	2
-Contig43_chr14_49991855_49993511	918	A	G	112.0	chr14	49992767	G	15	0	2	72	10	0	2	57	11	0	2	63	9	0	2	54	12	0	2	63	9	0	2	54	Y	6	0.314	1
-Contig64_chr14_56768376_56768902	473	C	T	29.0	chr14	56768832	C	15	0	2	72	11	0	2	60	14	0	2	69	14	0	2	69	7	0	2	48	9	0	2	54	Y	91	8.281	0
-Contig60_chr15_18493036_18494316	150	G	A	92.6	chr15	18493188	G	9	0	2	54	13	0	2	66	9	0	2	54	6	0	2	45	5	0	2	42	12	0	2	63	Y	45	0.125	0
-Contig59_chr15_22138344_22138535	120	G	C	142.0	chr15	22138470	C	11	0	2	60	10	0	2	57	18	0	2	81	4	0	2	39	10	0	2	57	15	0	2	72	N	8	2.553	0
-Contig112_chr15_26772864_26773267	374	C	T	21.6	chr15	26773244	C	4	0	2	39	4	0	2	39	5	0	2	42	2	0	2	33	4	0	2	39	3	0	2	36	N	18	+99.	0
-Contig24_chr15_26894765_26895003	155	G	A	87.6	chr15	-1	N	6	0	2	45	5	0	2	42	7	0	2	48	4	0	2	39	4	0	2	39	2	0	2	33	N	-1	0.178	0
-Contig2_chr15_33944796_33947182	1860	G	A	99.5	chr15	33946654	G	10	0	2	57	11	0	2	60	16	0	2	75	14	0	2	69	14	0	2	69	16	0	2	75	Y	16	0.252	0
-Contig73_chr15_34690052_34691332	714	T	C	130.0	chr15	34690769	T	7	0	2	48	7	0	2	48	17	0	2	78	9	0	2	54	9	0	2	54	4	0	2	39	Y	7	6.003	0
-Contig68_chr15_37747190_37747426	126	G	A	130.0	chr15	37747331	G	14	0	2	69	14	0	2	69	11	0	2	63	19	0	2	84	13	0	2	66	21	0	2	90	N	229	0.255	0
-Contig104_chr15_45106954_45107158	70	A	T	64.4	chr15	45107015	A	6	0	2	45	6	0	2	45	19	0	2	84	7	0	2	48	7	0	2	48	3	0	2	36	N	202	4.319	0
-Contig119_chr16_6160274_6160477	180	G	A	54.8	chr16	6160457	G	7	0	2	48	6	0	2	45	12	0	2	63	3	0	2	36	11	0	2	60	10	0	2	57	N	42	+99.	0
-Contig126_chr16_10611887_10612152	150	G	T	145.0	chr16	10612037	G	14	0	2	69	9	0	2	54	11	0	2	63	8	0	2	51	8	0	2	51	11	0	2	60	N	15	0.104	6
-Contig43_chr16_20200090_20200514	70	A	G	58.6	chr16	20200154	A	11	0	2	60	15	0	2	72	15	0	2	72	6	0	2	45	9	0	2	54	12	0	2	63	Y	2	0.466	1
-Contig60_chr16_28079136_28080263	588	T	G	157.0	chr16	28079739	T	22	0	2	93	20	0	2	87	22	0	2	93	17	0	2	78	12	0	2	63	10	0	2	57	Y	105	5.999	1
-Contig70_chr16_33758668_33759655	104	A	T	58.1	chr16	33758772	A	6	0	2	45	7	0	2	48	17	0	2	78	14	0	2	69	8	0	2	51	10	0	2	57	N	54	0.162	0
-Contig66_chr16_37935682_37935831	116	T	C	99.2	chr16	37935802	C	12	0	2	63	6	0	2	45	19	0	2	84	12	0	2	63	13	0	2	66	17	0	2	78	N	266	+99.	2
-Contig16_chr16_40451506_40451643	84	A	G	59.8	chr16	40451592	A	7	0	2	48	5	0	2	42	7	0	2	48	13	0	2	66	14	0	2	69	19	0	2	84	N	45	5.061	0
-Contig31_chr17_12128267_12129637	205	G	A	90.5	chr17	12128484	G	7	0	2	48	6	0	2	45	6	0	2	45	11	0	2	60	7	0	2	48	4	0	2	39	Y	10	0.246	0
-Contig1_chr17_12979232_12980380	808	G	T	12.3	chr17	12980028	G	18	0	2	81	12	0	2	63	21	0	2	90	13	0	2	66	22	0	2	93	18	0	2	81	Y	9	0.336	1
-Contig42_chr17_23434859_23438330	2100	C	T	39.5	chr17	23436985	T	4	0	2	39	7	0	2	48	7	0	2	48	3	0	2	36	6	0	2	45	2	0	2	33	Y	25	0.344	0
-Contig63_chr17_23796320_23796814	220	A	G	54.0	chr17	23796536	G	6	0	2	45	4	0	2	39	5	0	2	42	6	0	2	45	4	0	2	39	6	0	2	45	Y	139	0.067	1
-Contig76_chr17_24107434_24107834	316	T	C	141.0	chr17	24107726	T	19	0	2	84	15	0	2	72	20	0	2	87	16	0	2	75	11	0	2	60	18	0	2	81	Y	30	0.175	2
-Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
-Contig59_chr17_26790302_26795045	287	C	T	45.1	chr17	26790582	C	8	0	2	51	6	0	2	45	13	0	2	66	6	0	2	45	15	0	2	72	12	0	2	63	Y	75	0.019	1
-Contig99_chr17_27018324_27019378	446	G	A	31.1	chr17	27018776	G	14	0	2	69	12	0	2	63	14	0	2	69	10	0	2	57	9	0	2	54	11	0	2	60	Y	13	0.290	4
-Contig125_chr17_27739115_27739410	63	G	A	107.0	chr17	27739177	G	8	0	2	51	11	0	2	60	16	0	2	75	8	0	2	51	4	0	2	39	15	0	2	72	N	100	0.819	0
-Contig115_chr17_37489899_37490101	159	G	A	62.4	chr17	37490067	G	4	0	2	39	3	0	2	36	4	0	2	39	4	0	2	39	3	0	2	36	6	0	2	45	N	4	1.411	1
-Contig180_chr17_45154356_45154925	524	A	G	146.0	chr17	45154886	G	7	0	2	48	9	0	2	54	7	0	2	48	9	0	2	54	4	0	2	39	8	0	2	51	Y	11	+99.	2
-Contig61_chr17_48221795_48223545	1404	T	A	177.0	chr17	48223216	T	15	0	2	72	14	0	2	69	24	0	2	99	17	0	2	78	18	0	2	81	24	0	2	99	Y	161	0.633	2
-Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
-Contig229_chr18_3706523_3708577	1076	A	G	83.9	chr18	3707630	A	11	0	2	60	13	0	2	66	26	0	2	105	11	0	2	60	15	0	2	72	17	0	2	78	Y	63	0.445	0
-Contig24_chr18_14049894_14050480	24	A	G	123.0	chr18	14049918	A	5	0	2	42	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	5	0	2	42	Y	17	+99.	0
-Contig123_chr18_19916160_19916379	116	G	A	79.2	chr18	19916272	A	14	0	2	69	12	0	2	63	14	0	2	69	6	0	2	45	11	0	2	60	10	0	2	57	N	26	0.172	0
-Contig82_chr18_27305489_27306229	566	C	T	49.5	chr18	27306051	A	6	0	2	45	6	0	2	45	10	0	2	57	11	0	2	60	6	0	2	45	7	0	2	48	N	1	0.349	0
-Contig71_chr18_34324706_34326687	136	G	A	151.0	chr18	34324841	G	9	0	2	54	9	0	2	54	17	0	2	78	8	0	2	51	11	0	2	60	10	0	2	57	Y	2	2.129	2
-Contig16_chr18_34672093_34673044	538	T	C	58.2	chr18	34672635	T	8	0	2	51	15	0	2	72	16	0	2	75	15	0	2	72	9	0	2	57	18	0	2	81	Y	8	0.214	1
-Contig96_chr18_38492535_38493333	624	G	A	119.0	chr18	38493162	T	17	0	2	78	12	0	2	63	13	0	2	66	16	0	2	75	8	0	2	51	15	0	2	72	Y	127	0.131	0
-Contig226_chr18_47753756_47754666	427	T	C	21.1	chr18	47754215	T	10	0	2	57	4	0	2	39	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	42	0.522	0
-Contig170_chr18_49411558_49412230	94	C	A	74.3	chr18	49411655	C	14	0	2	69	10	0	2	57	9	0	2	54	10	0	2	57	3	0	2	36	3	0	2	36	N	9	1.457	0
-Contig192_chr18_49419342_49420737	1058	C	T	42.8	chr18	49420381	A	3	0	2	36	4	0	2	39	5	0	2	42	8	0	2	51	3	0	2	36	3	0	2	36	Y	34	2.107	2
-Contig64_chr18_55979770_55980315	49	G	A	89.1	chr18	55979824	G	3	0	2	36	9	0	2	54	7	0	2	51	4	0	2	39	3	0	2	36	3	0	2	36	Y	-1	2.124	0
-Contig20_chr18_58130301_58130735	112	A	G	74.4	chr18	58130413	A	12	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	6	0	2	45	6	0	2	45	Y	10	0.290	0
-Contig67_chr19_12398520_12399367	499	C	T	161.0	chr19	12399017	C	10	0	2	57	11	0	2	60	20	0	2	87	14	0	2	69	24	0	2	99	8	0	2	51	Y	137	5.634	0
-Contig66_chr19_16285672_16287223	996	C	T	190.0	chr19	16286674	C	9	0	2	57	14	0	2	69	16	0	2	78	17	0	2	78	8	0	2	51	22	0	2	93	Y	40	0.110	0
-Contig129_chr19_25541958_25542221	202	T	C	68.1	chr19	25542154	C	11	0	2	60	19	0	2	84	10	0	2	60	17	0	2	78	9	0	2	54	12	0	2	63	N	-1	2.551	1
-Contig29_chr19_37339947_37341911	1692	C	T	211.0	chr19	37341631	C	15	0	2	72	20	0	2	87	11	0	2	60	15	0	2	72	3	0	2	36	12	0	2	63	Y	7	0.096	0
-Contig39_chr19_47709708_47711327	444	C	T	36.8	chr19	47710148	T	10	0	2	57	4	0	2	39	8	0	2	51	9	0	2	54	6	0	2	45	6	0	2	45	Y	95	1.251	1
-Contig60_chr19_54013816_54014398	281	A	G	138.0	chr19	54014103	C	6	0	2	45	15	0	2	72	7	0	2	48	10	0	2	57	15	0	2	72	10	0	2	57	Y	188	1.271	0
-Contig251_chr19_56559098_56559626	452	T	C	3.36	chr19	56559549	T	12	0	2	63	13	0	2	66	21	0	2	90	15	0	2	72	14	0	2	69	11	0	2	60	N	1	0.117	0
-Contig50_chr20_12138509_12141975	3206	C	A	248.0	chr20	12141763	C	8	0	2	51	15	0	2	72	14	0	2	69	6	0	2	45	10	0	2	57	7	0	2	48	Y	2	0.384	0
-Contig36_chr20_32631363_32632049	176	G	A	24.1	chr20	32631526	G	7	0	2	48	14	0	2	69	19	0	2	84	14	0	2	69	15	0	2	72	16	0	2	75	N	50	1.150	0
-Contig32_chr20_36468058_36468869	66	C	T	40.4	chr20	36468127	C	6	0	2	45	3	0	2	36	4	0	2	39	5	0	2	42	3	0	2	36	4	0	2	39	N	59	0.281	0
-Contig24_chr20_38203888_38204900	834	C	T	132.0	chr20	38204731	C	9	0	2	54	17	0	2	78	20	0	2	87	8	0	2	51	11	0	2	60	17	0	2	78	Y	14	0.397	0
-Contig79_chr20_44263127_44264103	456	G	T	31.5	chr20	44263573	G	22	0	2	93	16	0	2	75	15	0	2	72	19	0	2	84	13	0	2	66	26	0	2	105	Y	8	3.250	0
-Contig26_chr20_45878482_45878787	197	A	G	160.0	chr20	45878672	A	17	0	2	78	15	0	2	72	11	0	2	63	17	0	2	78	12	0	2	63	10	0	2	57	N	14	0.535	0
-Contig119_chr20_46550670_46551383	609	G	A	139.0	chr20	46551277	G	7	0	2	48	17	0	2	78	19	0	2	84	20	0	2	87	9	0	2	54	15	0	2	72	Y	7	0.488	1
-Contig50_chr21_4178523_4178687	121	G	A	362.0	chr21	4178640	G	8	0	2	51	14	0	2	69	5	0	2	42	3	0	2	36	11	0	2	60	4	0	2	39	N	392	0.483	0
-Contig103_chr21_10177255_10177765	121	G	A	125.0	chr21	10177367	G	12	0	2	63	10	0	2	57	10	0	2	57	17	0	2	78	14	0	2	69	7	0	2	51	Y	37	0.213	3
-Contig1_chr21_10805534_10806399	766	A	G	146.0	chr21	10806301	G	10	0	2	57	6	0	2	45	9	0	2	54	6	0	2	45	7	0	2	48	5	0	2	42	Y	20	0.319	0
-Contig46_chr21_21029492_21030645	443	C	T	5.37	chr21	21029910	C	15	0	2	72	11	0	2	60	16	0	2	75	15	0	2	72	13	0	2	66	6	0	2	45	Y	96	3.737	0
-Contig129_chr21_31045749_31046924	381	A	G	129.0	chr21	31046141	A	19	0	2	84	8	0	2	51	23	0	2	96	12	0	2	63	15	0	2	72	18	0	2	81	Y	69	0.028	2
-Contig23_chr21_31651123_31651986	840	C	T	71.3	chr21	31651957	T	6	0	2	45	9	0	2	54	8	0	2	51	10	0	2	57	4	0	2	39	7	0	2	48	Y	105	2.977	3
-Contig64_chr21_43341847_43342031	84	T	C	114.0	chr21	43341926	T	11	0	2	60	9	0	2	54	10	0	2	57	6	0	2	45	6	0	2	45	7	0	2	48	N	10	3.954	2
-Contig60_chr21_43475347_43475824	175	C	T	8.05	chr21	43475551	T	6	0	2	45	7	0	2	48	13	0	2	66	6	0	2	45	14	0	2	69	14	0	2	69	N	45	0.058	0
-Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0
-Contig46_chr22_9416920_9417467	381	G	A	145.0	chr22	9417259	G	10	0	2	57	9	0	2	54	10	0	2	57	6	0	2	45	13	0	2	66	7	0	2	48	Y	154	0.242	0
-Contig86_chr22_9440787_9441725	713	T	G	119.0	chr22	9441488	G	6	0	2	45	12	0	2	63	10	0	2	57	11	0	2	60	13	0	2	66	16	0	2	75	Y	132	0.218	0
-Contig16_chr22_15636960_15637372	236	A	C	9.79	chr22	15637192	T	4	0	2	39	5	0	2	42	12	0	2	63	7	0	2	48	6	0	2	45	11	0	2	60	Y	5	2.163	0
-Contig4_chr22_16114310_16114546	128	G	C	101.0	chr22	16114432	G	10	0	2	57	13	0	2	66	20	0	2	87	20	0	2	87	16	0	2	75	9	0	2	54	N	19	0.526	0
-Contig23_chr22_34612023_34612568	167	C	G	92.3	chr22	34612181	C	11	0	2	60	18	0	2	81	13	0	2	66	8	0	2	51	12	0	2	63	14	0	2	69	Y	7	0.409	0
-Contig4_chr22_38252245_38253712	799	A	C	159.0	chr22	38253064	A	18	0	2	81	15	0	2	72	15	0	2	72	20	0	2	87	27	0	2	108	15	0	2	72	Y	90	4.330	0
-Contig122_chr22_48412466_48414788	1888	C	T	125.0	chr22	48414355	T	16	0	2	75	15	0	2	72	16	0	2	75	14	0	2	72	12	0	2	63	7	0	2	48	N	42	0.122	0
-Contig77_chr22_49764414_49764875	353	C	A	148.0	chr22	49764777	C	7	4	1	65	18	0	2	81	16	0	2	75	20	0	2	87	4	3	1	52	9	4	1	67	Y	12	0.941	0
-Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
-Contig348_chr22_62406104_62406495	189	C	A	134.0	chr22	62406302	A	9	0	2	54	14	0	2	69	11	0	2	60	10	0	2	57	12	0	2	63	6	0	2	45	Y	5	0.912	0
-Contig133_chr23_3525134_3526502	1223	A	G	201.0	chr23	3526387	A	11	0	2	60	13	0	2	66	23	0	2	96	21	0	2	90	13	0	2	66	10	0	2	57	Y	61	1.359	0
-Contig111_chr23_7058063_7058181	107	G	A	108.0	chr23	7058162	A	8	0	2	51	8	0	2	51	7	0	2	48	2	0	2	33	5	0	2	42	6	0	2	45	N	3	+99.	0
-Contig79_chr23_7844129_7844837	110	C	A	141.0	chr23	7844237	T	13	0	2	66	15	0	2	72	17	0	2	78	12	0	2	63	15	0	2	72	16	0	2	75	Y	40	0.339	0
-Contig38_chr23_9201002_9201725	597	C	T	155.0	chr23	9201609	T	17	0	2	78	8	0	2	51	13	0	2	66	5	0	2	42	11	0	2	60	7	0	2	48	Y	167	0.633	1
-Contig33_chr23_20672540_20674320	347	T	A	91.4	chr23	20672885	A	11	0	2	60	14	0	2	69	15	0	2	72	7	0	2	48	12	0	2	63	18	0	2	81	Y	31	0.452	1
-Contig35_chr23_28447813_28449115	70	T	A	21.3	chr23	28447881	T	9	0	2	54	8	0	2	51	10	0	2	57	9	0	2	54	10	0	2	57	12	0	2	63	N	251	0.163	1
-Contig51_chr23_30590939_30591162	140	C	T	142.0	chr23	30591080	C	14	0	2	69	4	0	2	39	10	0	2	57	12	0	2	63	14	0	2	69	4	0	2	39	N	13	1.658	0
-Contig57_chr23_32216351_32216721	179	T	G	143.0	chr23	32216534	T	15	0	2	72	15	0	2	72	23	0	2	96	13	0	2	66	16	0	2	75	15	0	2	72	N	32	1.387	1
-Contig93_chr23_35744841_35745791	40	A	T	30.4	chr23	35744880	T	6	0	2	45	7	0	2	48	7	0	2	48	2	0	2	33	5	0	2	42	5	0	2	42	Y	50	2.173	0
-Contig32_chr23_48285289_48286638	186	T	C	176.0	chr23	48285470	T	18	0	2	81	12	0	2	63	16	0	2	75	13	0	2	66	9	0	2	54	9	0	2	54	Y	4	4.238	1
-Contig50_chr24_22515247_22516072	761	C	T	243.0	chr24	22515981	T	11	0	2	60	10	0	2	57	8	0	2	51	9	0	2	54	18	0	2	81	8	0	2	51	Y	1	0.190	0
-Contig84_chr24_29196623_29199644	466	C	T	126.0	chr24	29197091	T	7	0	2	48	11	0	2	60	8	0	2	51	7	0	2	48	11	0	2	60	15	0	2	72	Y	42	0.215	0
-Contig145_chr24_34778364_34778898	163	T	C	372.0	chr24	34778541	C	10	0	2	57	8	0	2	51	12	0	2	63	12	0	2	63	6	1	2	31	7	0	2	48	Y	40	0.037	0
-Contig34_chr24_36147443_36150244	2679	C	T	140.0	chr24	36150125	C	13	0	2	66	7	0	2	48	14	0	2	69	14	0	2	69	10	0	2	57	13	0	2	66	N	282	0.099	1
-Contig164_chr24_46598127_46599206	84	C	T	105.0	chr24	46598214	C	13	0	2	66	12	0	2	63	15	0	2	72	15	0	2	72	11	0	2	60	8	0	2	51	Y	22	1.262	1
-Contig144_chr25_4011170_4013134	541	A	G	160.0	chr25	4011690	A	12	0	2	63	17	0	2	78	13	0	2	66	13	0	2	66	13	0	2	66	13	0	2	66	Y	5	0.087	0
-Contig81_chr25_6103472_6104760	699	G	A	378.0	chr25	6104190	A	14	0	2	69	16	0	2	75	13	0	2	66	11	0	2	60	11	0	2	60	12	0	2	63	Y	33	0.789	2
-Contig152_chr25_7486442_7487609	75	A	G	11.6	chr25	7486515	A	17	0	2	78	13	0	2	66	8	0	2	51	16	0	2	75	8	0	2	51	6	0	2	45	N	2	0.158	0
-Contig24_chr25_7695778_7698612	2714	C	T	130.0	chr25	7698446	C	16	0	2	75	13	0	2	66	22	0	2	93	17	0	2	78	10	0	2	57	17	0	2	78	Y	27	0.346	0
-Contig89_chr25_8635170_8636009	586	G	C	209.0	chr25	8635744	G	13	0	2	66	13	0	2	66	21	0	2	93	14	0	2	69	15	0	2	72	15	0	2	72	Y	14	0.067	0
-Contig59_chr25_18196776_18197707	785	G	A	112.0	chr25	18197551	G	8	10	1	42	27	0	2	108	21	0	2	90	18	0	2	81	10	0	2	57	14	0	2	69	N	36	3.625	0
-Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
-Contig84_chr25_42407960_42408708	55	C	T	119.0	chr25	42408013	C	6	0	2	45	9	0	2	54	11	0	2	60	9	0	2	54	7	0	2	48	8	0	2	51	Y	11	0.121	0
-Contig73_chr25_43562500_43564110	955	T	C	52.1	chr25	43563469	C	9	0	2	57	4	0	2	39	6	0	2	45	5	0	2	42	7	0	2	48	10	0	2	57	Y	4	1.406	0
-Contig37_chr25_51074433_51074885	170	A	G	102.0	chr25	51074589	G	11	0	2	60	7	0	2	48	6	0	2	45	15	0	2	72	9	0	2	54	7	0	2	48	Y	68	0.207	1
-Contig204_chr26_4311195_4311778	170	C	T	16.9	chr26	4311363	T	20	0	2	87	8	0	2	51	13	0	2	66	18	0	2	81	11	0	2	60	14	0	2	69	N	35	0.085	0
-Contig122_chr26_7622321_7623491	106	C	G	139.0	chr26	7622423	C	3	0	2	36	9	0	2	54	10	0	2	57	12	0	2	63	9	0	2	54	5	0	2	42	N	19	0.458	0
-Contig11_chr26_11062142_11062902	707	C	A	108.0	chr26	11062836	T	7	0	2	48	8	0	2	51	16	0	2	75	10	0	2	57	6	0	2	45	14	0	2	69	Y	-1	4.709	0
-Contig133_chr26_17695661_17696368	39	T	G	98.7	chr26	17695700	T	10	0	2	57	3	0	2	36	11	0	2	60	9	0	2	54	2	0	2	33	1	0	2	30	N	85	3.402	0
-Contig146_chr26_26622638_26623906	574	G	A	186.0	chr26	26623219	A	11	0	2	60	12	0	2	63	9	0	2	54	11	0	2	60	9	0	2	54	12	0	2	63	Y	1	0.318	0
-Contig8_chr26_27834126_27834326	140	G	A	41.7	chr26	27834268	G	13	0	2	66	7	0	2	48	13	0	2	66	11	0	2	60	12	0	2	63	6	0	2	45	N	29	0.142	1
-Contig78_chr26_31128839_31129005	123	T	C	145.0	chr26	-1	N	11	0	2	60	3	0	2	36	7	0	2	48	8	0	2	51	10	0	2	46	7	0	2	48	N	-1	1.230	1
-Contig28_chr26_32935355_32935833	289	T	C	77.9	chr26	32935638	T	15	0	2	72	22	0	2	93	15	0	2	72	9	0	2	54	15	0	2	72	17	0	2	78	Y	10	2.258	1
-Contig135_chr27_6853874_6854079	158	C	T	116.0	chr27	6854032	T	18	0	2	81	19	0	2	84	13	0	2	66	7	0	2	48	8	0	2	51	11	0	2	60	N	4	0.060	1
-Contig47_chr27_11777710_11777915	25	A	G	67.3	chr27	11777731	A	3	0	2	36	5	0	2	42	6	0	2	45	10	0	2	57	9	0	2	54	6	0	2	45	N	97	+99.	0
-Contig23_chr27_14633002_14633153	23	G	A	128.0	chr27	14633023	A	3	0	2	36	4	0	2	39	5	0	2	42	5	0	2	42	3	0	2	36	2	0	2	33	N	240	3.881	0
-Contig29_chr27_15428166_15429413	380	T	C	140.0	chr27	15428539	T	15	0	2	72	15	0	2	72	17	0	2	78	15	0	2	72	15	0	2	72	15	0	2	72	Y	47	0.916	1
-Contig31_chr27_19519489_19520891	129	G	T	14.9	chr27	19519624	T	12	0	2	63	19	0	2	84	20	0	2	87	16	0	2	75	10	0	2	57	11	0	2	60	Y	48	2.756	0
-Contig35_chr27_40596169_40596445	20	G	C	133.0	chr27	40596189	G	8	0	2	51	3	0	2	36	4	0	2	39	2	0	2	33	4	0	2	39	4	0	2	39	Y	4	+99.	1
-Contig85_chr27_45471750_45472022	211	G	A	53.1	chr27	45471964	G	18	0	2	81	10	0	2	57	15	0	2	72	0	13	0	36	16	0	2	75	14	0	2	69	N	75	2.502	1
-Contig131_chr28_6481806_6483783	138	C	T	36.2	chr28	6481953	C	12	0	2	63	12	0	2	63	20	0	2	87	11	0	2	60	10	0	2	57	12	0	2	63	Y	10	0.387	0
-Contig141_chr28_10027332_10028242	780	T	G	74.8	chr28	10028095	T	10	0	2	57	11	0	2	60	14	0	2	69	10	0	2	57	7	0	2	48	9	0	2	54	Y	19	3.348	0
-Contig144_chr28_15468203_15470548	743	G	A	20.0	chr28	15468942	G	13	0	2	66	12	0	2	63	10	0	2	57	11	0	2	60	16	0	2	75	7	0	2	48	N	14	0.053	0
-Contig47_chr28_21311718_21312366	541	G	A	116.0	chr28	21312258	G	9	0	2	54	6	0	2	45	12	0	2	63	6	0	2	45	5	0	2	45	12	0	2	63	N	9	0.240	0
-Contig60_chr28_30197166_30197364	92	T	C	164.0	chr28	30197258	T	10	0	2	57	13	0	2	66	15	0	2	72	16	0	2	75	12	0	2	63	11	0	2	60	N	369	1.139	0
-Contig29_chr29_4726399_4727143	559	A	T	163.0	chr29	4726955	A	15	0	2	72	18	0	2	81	18	0	2	81	16	0	2	75	11	0	2	60	14	0	2	72	Y	161	3.114	0
-Contig48_chr29_13129286_13130137	232	A	G	92.2	chr29	13129514	G	13	0	2	66	11	0	2	60	19	0	2	84	16	0	2	75	11	0	2	60	17	0	2	78	Y	337	2.581	1
-Contig33_chr29_17000374_17000921	71	C	T	48.6	chr29	17000441	-	4	0	2	39	9	0	2	54	12	0	2	66	10	0	2	57	7	0	2	48	4	0	2	39	N	26	5.491	0
-Contig34_chr29_17581796_17584016	2105	C	T	126.0	chr29	17583890	T	14	0	2	69	11	0	2	60	18	0	2	81	12	0	2	63	10	0	2	57	10	0	2	57	Y	22	2.208	0
-Contig19_chr29_20976080_20977761	1007	G	A	115.0	chr29	20977076	G	19	0	2	84	22	0	2	93	22	0	2	93	22	0	2	93	11	0	2	60	13	0	2	66	Y	4	1.915	0
-Contig51_chr29_21149853_21150467	266	C	T	146.0	chr29	21150118	C	12	0	2	63	12	0	2	63	23	0	2	96	14	0	2	69	13	0	2	66	10	0	2	57	Y	4	0.051	0
-Contig1_chr30_5992217_5993068	106	C	T	129.0	chr30	5992319	C	10	0	2	57	11	0	2	60	7	0	2	48	11	0	2	60	10	0	2	57	12	0	2	63	Y	76	1.079	0
-Contig1_chr30_8232878_8233406	402	C	T	127.0	chr30	8233264	C	8	0	2	51	19	0	2	84	16	0	2	75	18	0	2	81	10	0	2	57	14	0	2	69	Y	358	5.283	0
-Contig108_chr30_9436961_9437520	546	C	T	39.8	chr30	9437502	C	7	0	2	48	5	0	2	42	2	0	2	33	7	0	2	48	5	0	2	42	7	0	2	48	Y	64	+99.	0
-Contig165_chr30_25804389_25804926	190	T	C	126.0	chr30	25804592	C	3	0	2	36	8	0	2	51	7	0	2	48	10	0	2	57	7	0	2	48	4	0	2	39	Y	113	0.329	0
-Contig193_chr30_27495616_27496125	434	C	A	234.0	chr30	27496024	C	13	0	2	66	16	0	2	75	25	0	2	102	16	0	2	75	13	0	2	66	14	0	2	69	Y	76	2.621	0
-Contig38_chr31_5164423_5166573	2074	C	T	134.0	chr31	5166501	T	13	0	2	66	10	0	2	57	17	0	2	78	11	0	2	60	17	0	2	78	10	0	2	57	Y	58	+99.	0
-Contig6_chr31_9649308_9650149	431	G	T	162.0	chr31	9649742	G	31	0	2	120	23	0	2	96	17	0	2	78	17	0	2	78	10	0	2	57	16	0	2	75	Y	98	2.200	0
-Contig7_chr31_12384974_12386400	305	C	T	69.6	chr31	12385267	C	6	0	2	45	10	0	2	57	11	0	2	60	11	0	2	60	9	0	2	54	12	0	2	63	Y	44	1.165	0
-Contig90_chr31_17267583_17267778	81	C	A	143.0	chr31	17267665	C	20	0	2	87	6	0	2	45	14	0	2	72	22	0	2	93	17	0	2	78	15	0	2	72	N	7	0.565	0
-Contig137_chr31_23357653_23358568	885	G	A	119.0	chr31	23358545	G	5	0	2	42	3	0	2	36	3	0	2	36	2	0	2	33	3	0	2	36	4	0	2	39	Y	11	+99.	0
-Contig17_chr31_26433828_26434459	498	T	C	9.79	chr31	26434322	T	18	0	2	81	10	0	2	57	15	0	2	72	13	0	2	66	16	0	2	75	15	0	2	72	Y	137	4.814	0
-Contig30_chr32_25902721_25905783	208	C	G	162.0	chr32	25902927	G	11	0	2	60	13	0	2	66	11	0	2	60	12	0	2	63	7	0	2	48	11	0	2	60	Y	145	0.322	2
-Contig42_chr32_38900713_38901320	320	A	G	134.0	chr32	38901021	T	12	0	2	63	10	0	2	57	9	11	1	104	5	0	2	42	19	0	2	84	7	6	1	56	Y	71	0.165	0
-Contig18_chr33_22207246_22209159	1363	G	T	51.5	chr33	22208619	-	16	0	2	75	8	0	2	51	11	0	2	60	10	0	2	57	15	0	2	72	12	0	2	63	Y	59	2.560	0
-Contig104_chr33_22483642_22484187	424	C	T	140.0	chr33	22484054	T	13	0	2	66	16	0	2	75	9	0	2	54	15	0	2	72	13	0	2	66	10	0	2	57	Y	36	0.404	0
-Contig170_chr33_26189421_26189940	292	T	C	98.4	chr33	26189703	T	21	0	2	90	13	0	2	66	15	0	2	72	13	0	2	66	19	0	2	84	13	0	2	66	Y	23	0.307	0
-Contig41_chr34_16544482_16545449	46	T	C	102.0	chr34	16544523	T	5	0	2	42	11	0	2	60	6	0	2	45	0	2	0	3	7	0	2	48	8	0	2	51	Y	215	1.156	0
-Contig8_chr34_18474513_18475673	1122	C	A	129.0	chr34	18475628	A	8	0	2	51	15	0	2	72	13	0	2	66	17	0	2	78	13	0	2	66	6	0	2	45	Y	61	0.123	2
-Contig152_chr34_31794848_31795540	242	G	A	93.2	chr34	31795093	G	11	0	2	60	24	0	2	99	17	0	2	78	15	0	2	72	18	0	2	81	17	0	2	78	Y	123	2.780	0
-Contig28_chr34_41708848_41712034	1381	A	G	78.2	chr34	41710232	A	11	0	2	60	17	0	2	78	15	0	2	72	16	0	2	75	15	0	2	72	14	0	2	69	Y	236	0.234	0
-Contig85_chr34_42798284_42800584	1845	C	T	171.0	chr34	42800126	T	5	0	2	42	7	0	2	48	6	0	2	45	7	0	2	48	6	0	2	45	2	0	2	33	Y	5	2.787	0
-Contig47_chr35_3666773_3667898	348	G	T	124.0	chr35	3667121	G	9	0	2	54	20	0	2	87	18	0	2	81	15	0	2	72	12	0	2	63	14	0	2	69	Y	285	0.235	0
-Contig195_chr35_15722500_15722741	205	G	A	4.08	chr35	15722718	G	3	0	2	36	5	0	2	42	1	0	2	30	6	0	2	45	1	0	2	30	1	0	2	30	N	43	+99.	0
-Contig101_chr35_19513178_19513697	62	C	T	112.0	chr35	19513238	C	12	0	2	63	7	0	2	48	13	0	2	66	7	0	2	48	5	0	2	42	8	0	2	51	N	115	3.135	0
-Contig47_chr35_24382042_24382526	33	G	A	87.0	chr35	24382076	G	5	0	2	42	4	0	2	39	6	0	2	45	7	0	2	48	4	0	2	39	2	0	2	33	Y	71	+99.	0
-Contig77_chr35_24796947_24797172	65	A	G	52.1	chr35	24797009	A	7	0	2	48	5	0	2	42	8	0	2	51	6	0	2	45	12	0	2	63	10	0	2	57	N	11	1.401	3
-Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0
-Contig5_chr36_4562983_4563634	343	C	T	151.0	chr36	4563324	T	20	0	2	87	20	0	2	87	23	0	2	96	24	0	2	99	9	0	2	54	8	0	2	51	Y	40	1.169	0
-Contig75_chr36_7885319_7885588	53	G	A	25.7	chr36	7885372	G	10	0	2	57	8	0	2	51	13	0	2	66	7	0	2	48	4	0	2	39	7	0	2	48	N	7	2.653	0
-Contig184_chr36_18956191_18958552	187	A	G	11.5	chr36	18956371	G	10	0	2	57	11	0	2	60	21	0	2	90	14	0	2	69	7	0	2	48	4	0	2	39	N	278	1.434	2
-Contig12_chr36_21557176_21557828	513	T	A	159.0	chr36	21557695	A	11	0	2	60	14	0	2	69	21	0	2	90	12	0	2	63	15	0	2	72	11	0	2	60	Y	55	0.222	0
-Contig2_chr36_22436067_22436794	653	C	T	73.0	chr36	22436730	C	11	0	2	60	16	0	2	75	13	0	2	66	11	0	2	60	21	0	2	90	21	0	2	90	Y	9	0.534	0
-Contig133_chr36_32954045_32955409	136	A	G	116.0	chr36	32954182	A	16	0	2	75	15	0	2	72	20	0	2	87	11	0	2	60	18	0	2	81	13	0	2	66	Y	74	3.772	1
-Contig53_chr37_6665763_6665919	116	C	T	111.0	chr37	6665875	C	9	0	2	54	9	0	2	54	5	0	2	42	9	0	2	54	8	0	2	51	10	0	2	57	N	15	10.875	1
-Contig42_chr37_9589176_9591269	252	G	A	25.1	chr37	9589430	G	10	0	2	40	13	0	2	66	18	0	2	81	21	0	2	90	9	0	2	54	17	0	2	78	N	67	1.170	2
-Contig2_chr37_17134963_17136513	1140	A	C	158.0	chr37	17136092	A	14	0	2	69	24	0	2	99	17	0	2	78	16	0	2	75	15	0	2	75	13	0	2	66	Y	12	0.053	1
-Contig18_chr37_17147806_17149851	291	T	G	112.0	chr37	17148084	T	4	6	1	45	16	0	2	75	17	0	2	78	14	0	2	69	22	0	2	93	13	0	2	66	Y	41	4.442	0
-Contig64_chr37_17606895_17607534	565	C	T	30.2	chr37	17607439	A	9	0	2	54	16	0	2	75	20	0	2	87	14	0	2	69	16	0	2	75	10	0	2	57	N	20	1.622	0
-Contig126_chr37_21587881_21590621	373	G	T	132.0	chr37	21588256	G	11	0	2	60	11	0	2	60	23	0	2	96	12	0	2	63	8	0	2	51	18	0	2	81	Y	12	0.549	0
-Contig2_chr37_31197993_31198256	182	C	T	39.6	chr37	31198171	T	6	0	2	45	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	12	0	2	63	N	2	0.595	0
-Contig46_chr37_31852376_31853555	825	A	G	111.0	chr37	31853191	G	19	0	2	84	14	0	2	69	15	0	2	72	7	0	2	48	8	0	2	51	16	0	2	75	Y	17	0.128	1
-Contig7_chr38_12217200_12218387	1163	A	T	44.4	chr38	12218353	A	11	0	2	60	13	0	2	66	17	0	2	78	10	0	2	57	11	0	2	60	11	0	2	60	Y	67	+99.	0
-Contig15_chr38_12282020_12282253	150	C	T	156.0	chr38	12282164	A	17	0	2	78	11	0	2	60	19	0	2	84	14	0	2	69	5	0	2	42	14	0	2	69	Y	26	2.952	1
-Contig6_chr38_16185744_16186110	325	A	G	74.9	chr38	16186061	A	5	0	2	42	3	0	2	36	9	0	2	54	7	0	2	48	1	0	2	30	12	0	2	63	Y	40	+99.	0
-Contig265_chrX_2689247_2689484	114	C	G	103.0	chrX	2689356	C	11	0	2	60	9	0	2	54	13	0	2	66	16	0	2	75	14	0	2	69	10	0	2	57	N	2	9.232	1
-Contig122_chrX_6026976_6027327	330	C	T	79.4	chrX	6027303	C	3	0	2	36	3	0	2	36	3	0	2	36	4	0	2	39	3	0	2	36	6	0	2	45	Y	30	+99.	0
-Contig113_chrX_26287829_26288398	385	C	T	59.6	chrX	26288213	C	9	0	2	54	9	0	2	54	17	0	2	78	11	0	2	60	3	8	1	44	4	0	2	39	N	13	0.077	0
-Contig237_chrX_31256648_31257654	165	T	A	246.0	chrX	31256814	T	7	0	2	48	23	0	2	96	19	0	2	84	17	0	2	78	14	0	2	69	8	0	2	51	Y	37	1.481	0
-Contig90_chrX_57430715_57431566	548	C	T	116.0	chrX	57431266	T	9	0	2	54	18	0	2	81	13	0	2	66	14	0	2	69	8	0	2	54	7	0	2	48	Y	261	0.154	1
-Contig133_chrX_84833782_84834125	182	G	A	69.7	chrX	84833962	G	5	0	2	42	18	0	2	81	12	0	2	63	19	0	2	84	6	3	1	27	7	0	2	48	N	619	0.278	0
-Contig125_chrX_93319363_93320877	349	A	C	145.0	chrX	93319721	A	4	0	2	39	6	0	2	45	11	0	2	60	10	0	2	57	13	0	2	66	6	0	2	45	Y	59	1.686	0
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pathway_image/pathway_image.png
Binary file test-data/test_out/pathway_image/pathway_image.png has changed
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/PCA.pdf
Binary file test-data/test_out/pca/PCA.pdf has changed
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/admix.gd_indivs
--- a/test-data/test_out/pca/admix.gd_indivs	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-PB1 M All_Individuals
-PB2 M All_Individuals
-PB3 M All_Individuals
-PB4 M All_Individuals
-PB6 M All_Individuals
-PB8 M All_Individuals
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/admix.gd_snp
--- a/test-data/test_out/pca/admix.gd_snp	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,303 +0,0 @@
-  snp1 11 0.002 2000 A T
-  snp3 11 0.002 2000 A T
-  snp4 11 0.002 2000 A T
-  snp5 11 0.002 2000 A T
-  snp6 11 0.002 2000 A T
-  snp7 11 0.002 2000 A T
-  snp8 11 0.002 2000 A T
-  snp9 11 0.002 2000 A T
-  snp10 11 0.002 2000 A T
-  snp11 11 0.002 2000 A T
-  snp12 11 0.002 2000 A T
-  snp13 11 0.002 2000 A T
-  snp14 11 0.002 2000 A T
-  snp16 11 0.002 2000 A T
-  snp17 11 0.002 2000 A T
-  snp22 11 0.002 2000 A T
-  snp24 11 0.002 2000 A T
-  snp25 11 0.002 2000 A T
-  snp27 11 0.002 2000 A T
-  snp28 11 0.002 2000 A T
-  snp29 11 0.002 2000 A T
-  snp30 11 0.002 2000 A T
-  snp31 11 0.002 2000 A T
-  snp33 11 0.002 2000 A T
-  snp34 11 0.002 2000 A T
-  snp37 11 0.002 2000 A T
-  snp38 11 0.002 2000 A T
-  snp39 11 0.002 2000 A T
-  snp40 11 0.002 2000 A T
-  snp41 11 0.002 2000 A T
-  snp42 11 0.002 2000 A T
-  snp43 11 0.002 2000 A T
-  snp45 11 0.002 2000 A T
-  snp46 11 0.002 2000 A T
-  snp47 11 0.002 2000 A T
-  snp48 11 0.002 2000 A T
-  snp49 11 0.002 2000 A T
-  snp50 11 0.002 2000 A T
-  snp51 11 0.002 2000 A T
-  snp52 11 0.002 2000 A T
-  snp53 11 0.002 2000 A T
-  snp54 11 0.002 2000 A T
-  snp56 11 0.002 2000 A T
-  snp58 11 0.002 2000 A T
-  snp59 11 0.002 2000 A T
-  snp60 11 0.002 2000 A T
-  snp61 11 0.002 2000 A T
-  snp62 11 0.002 2000 A T
-  snp63 11 0.002 2000 A T
-  snp64 11 0.002 2000 A T
-  snp65 11 0.002 2000 A T
-  snp67 11 0.002 2000 A T
-  snp68 11 0.002 2000 A T
-  snp70 11 0.002 2000 A T
-  snp71 11 0.002 2000 A T
-  snp72 11 0.002 2000 A T
-  snp73 11 0.002 2000 A T
-  snp74 11 0.002 2000 A T
-  snp75 11 0.002 2000 A T
-  snp76 11 0.002 2000 A T
-  snp77 11 0.002 2000 A T
-  snp78 11 0.002 2000 A T
-  snp80 11 0.002 2000 A T
-  snp81 11 0.002 2000 A T
-  snp83 11 0.002 2000 A T
-  snp84 11 0.002 2000 A T
-  snp87 11 0.002 2000 A T
-  snp89 11 0.002 2000 A T
-  snp90 11 0.002 2000 A T
-  snp91 11 0.002 2000 A T
-  snp92 11 0.002 2000 A T
-  snp93 11 0.002 2000 A T
-  snp94 11 0.002 2000 A T
-  snp98 11 0.002 2000 A T
-  snp100 11 0.002 2000 A T
-  snp101 11 0.002 2000 A T
-  snp102 11 0.002 2000 A T
-  snp103 11 0.002 2000 A T
-  snp104 11 0.002 2000 A T
-  snp105 11 0.002 2000 A T
-  snp106 11 0.002 2000 A T
-  snp107 11 0.002 2000 A T
-  snp108 11 0.002 2000 A T
-  snp110 11 0.002 2000 A T
-  snp111 11 0.002 2000 A T
-  snp112 11 0.002 2000 A T
-  snp113 11 0.002 2000 A T
-  snp116 11 0.002 2000 A T
-  snp117 11 0.002 2000 A T
-  snp118 11 0.002 2000 A T
-  snp119 11 0.002 2000 A T
-  snp121 11 0.002 2000 A T
-  snp122 11 0.002 2000 A T
-  snp123 11 0.002 2000 A T
-  snp124 11 0.002 2000 A T
-  snp125 11 0.002 2000 A T
-  snp126 11 0.002 2000 A T
-  snp128 11 0.002 2000 A T
-  snp129 11 0.002 2000 A T
-  snp131 11 0.002 2000 A T
-  snp133 11 0.002 2000 A T
-  snp134 11 0.002 2000 A T
-  snp135 11 0.002 2000 A T
-  snp137 11 0.002 2000 A T
-  snp138 11 0.002 2000 A T
-  snp139 11 0.002 2000 A T
-  snp140 11 0.002 2000 A T
-  snp141 11 0.002 2000 A T
-  snp143 11 0.002 2000 A T
-  snp145 11 0.002 2000 A T
-  snp146 11 0.002 2000 A T
-  snp148 11 0.002 2000 A T
-  snp149 11 0.002 2000 A T
-  snp150 11 0.002 2000 A T
-  snp151 11 0.002 2000 A T
-  snp152 11 0.002 2000 A T
-  snp153 11 0.002 2000 A T
-  snp154 11 0.002 2000 A T
-  snp156 11 0.002 2000 A T
-  snp157 11 0.002 2000 A T
-  snp158 11 0.002 2000 A T
-  snp159 11 0.002 2000 A T
-  snp160 11 0.002 2000 A T
-  snp161 11 0.002 2000 A T
-  snp162 11 0.002 2000 A T
-  snp164 11 0.002 2000 A T
-  snp165 11 0.002 2000 A T
-  snp167 11 0.002 2000 A T
-  snp168 11 0.002 2000 A T
-  snp169 11 0.002 2000 A T
-  snp170 11 0.002 2000 A T
-  snp171 11 0.002 2000 A T
-  snp172 11 0.002 2000 A T
-  snp174 11 0.002 2000 A T
-  snp175 11 0.002 2000 A T
-  snp176 11 0.002 2000 A T
-  snp177 11 0.002 2000 A T
-  snp178 11 0.002 2000 A T
-  snp179 11 0.002 2000 A T
-  snp181 11 0.002 2000 A T
-  snp182 11 0.002 2000 A T
-  snp183 11 0.002 2000 A T
-  snp184 11 0.002 2000 A T
-  snp185 11 0.002 2000 A T
-  snp186 11 0.002 2000 A T
-  snp188 11 0.002 2000 A T
-  snp191 11 0.002 2000 A T
-  snp192 11 0.002 2000 A T
-  snp193 11 0.002 2000 A T
-  snp195 11 0.002 2000 A T
-  snp196 11 0.002 2000 A T
-  snp197 11 0.002 2000 A T
-  snp199 11 0.002 2000 A T
-  snp200 11 0.002 2000 A T
-  snp201 11 0.002 2000 A T
-  snp202 11 0.002 2000 A T
-  snp203 11 0.002 2000 A T
-  snp205 11 0.002 2000 A T
-  snp207 11 0.002 2000 A T
-  snp210 11 0.002 2000 A T
-  snp211 11 0.002 2000 A T
-  snp212 11 0.002 2000 A T
-  snp213 11 0.002 2000 A T
-  snp214 11 0.002 2000 A T
-  snp215 11 0.002 2000 A T
-  snp216 11 0.002 2000 A T
-  snp217 11 0.002 2000 A T
-  snp218 11 0.002 2000 A T
-  snp219 11 0.002 2000 A T
-  snp220 11 0.002 2000 A T
-  snp221 11 0.002 2000 A T
-  snp223 11 0.002 2000 A T
-  snp224 11 0.002 2000 A T
-  snp225 11 0.002 2000 A T
-  snp226 11 0.002 2000 A T
-  snp227 11 0.002 2000 A T
-  snp228 11 0.002 2000 A T
-  snp229 11 0.002 2000 A T
-  snp230 11 0.002 2000 A T
-  snp231 11 0.002 2000 A T
-  snp232 11 0.002 2000 A T
-  snp235 11 0.002 2000 A T
-  snp236 11 0.002 2000 A T
-  snp237 11 0.002 2000 A T
-  snp239 11 0.002 2000 A T
-  snp240 11 0.002 2000 A T
-  snp241 11 0.002 2000 A T
-  snp242 11 0.002 2000 A T
-  snp243 11 0.002 2000 A T
-  snp244 11 0.002 2000 A T
-  snp246 11 0.002 2000 A T
-  snp247 11 0.002 2000 A T
-  snp248 11 0.002 2000 A T
-  snp249 11 0.002 2000 A T
-  snp250 11 0.002 2000 A T
-  snp251 11 0.002 2000 A T
-  snp252 11 0.002 2000 A T
-  snp253 11 0.002 2000 A T
-  snp254 11 0.002 2000 A T
-  snp255 11 0.002 2000 A T
-  snp256 11 0.002 2000 A T
-  snp257 11 0.002 2000 A T
-  snp258 11 0.002 2000 A T
-  snp260 11 0.002 2000 A T
-  snp261 11 0.002 2000 A T
-  snp262 11 0.002 2000 A T
-  snp263 11 0.002 2000 A T
-  snp264 11 0.002 2000 A T
-  snp265 11 0.002 2000 A T
-  snp266 11 0.002 2000 A T
-  snp267 11 0.002 2000 A T
-  snp268 11 0.002 2000 A T
-  snp269 11 0.002 2000 A T
-  snp270 11 0.002 2000 A T
-  snp271 11 0.002 2000 A T
-  snp273 11 0.002 2000 A T
-  snp274 11 0.002 2000 A T
-  snp275 11 0.002 2000 A T
-  snp276 11 0.002 2000 A T
-  snp277 11 0.002 2000 A T
-  snp278 11 0.002 2000 A T
-  snp281 11 0.002 2000 A T
-  snp282 11 0.002 2000 A T
-  snp284 11 0.002 2000 A T
-  snp287 11 0.002 2000 A T
-  snp288 11 0.002 2000 A T
-  snp289 11 0.002 2000 A T
-  snp290 11 0.002 2000 A T
-  snp291 11 0.002 2000 A T
-  snp292 11 0.002 2000 A T
-  snp293 11 0.002 2000 A T
-  snp294 11 0.002 2000 A T
-  snp297 11 0.002 2000 A T
-  snp298 11 0.002 2000 A T
-  snp299 11 0.002 2000 A T
-  snp300 11 0.002 2000 A T
-  snp301 11 0.002 2000 A T
-  snp302 11 0.002 2000 A T
-  snp303 11 0.002 2000 A T
-  snp304 11 0.002 2000 A T
-  snp307 11 0.002 2000 A T
-  snp308 11 0.002 2000 A T
-  snp309 11 0.002 2000 A T
-  snp310 11 0.002 2000 A T
-  snp312 11 0.002 2000 A T
-  snp313 11 0.002 2000 A T
-  snp316 11 0.002 2000 A T
-  snp317 11 0.002 2000 A T
-  snp320 11 0.002 2000 A T
-  snp321 11 0.002 2000 A T
-  snp322 11 0.002 2000 A T
-  snp323 11 0.002 2000 A T
-  snp324 11 0.002 2000 A T
-  snp325 11 0.002 2000 A T
-  snp328 11 0.002 2000 A T
-  snp329 11 0.002 2000 A T
-  snp331 11 0.002 2000 A T
-  snp332 11 0.002 2000 A T
-  snp333 11 0.002 2000 A T
-  snp334 11 0.002 2000 A T
-  snp335 11 0.002 2000 A T
-  snp336 11 0.002 2000 A T
-  snp338 11 0.002 2000 A T
-  snp339 11 0.002 2000 A T
-  snp341 11 0.002 2000 A T
-  snp342 11 0.002 2000 A T
-  snp344 11 0.002 2000 A T
-  snp345 11 0.002 2000 A T
-  snp348 11 0.002 2000 A T
-  snp350 11 0.002 2000 A T
-  snp352 11 0.002 2000 A T
-  snp353 11 0.002 2000 A T
-  snp354 11 0.002 2000 A T
-  snp355 11 0.002 2000 A T
-  snp360 11 0.002 2000 A T
-  snp361 11 0.002 2000 A T
-  snp362 11 0.002 2000 A T
-  snp364 11 0.002 2000 A T
-  snp366 11 0.002 2000 A T
-  snp369 11 0.002 2000 A T
-  snp370 11 0.002 2000 A T
-  snp371 11 0.002 2000 A T
-  snp372 11 0.002 2000 A T
-  snp373 11 0.002 2000 A T
-  snp374 11 0.002 2000 A T
-  snp375 11 0.002 2000 A T
-  snp376 11 0.002 2000 A T
-  snp377 11 0.002 2000 A T
-  snp378 11 0.002 2000 A T
-  snp379 11 0.002 2000 A T
-  snp380 11 0.002 2000 A T
-  snp381 11 0.002 2000 A T
-  snp382 11 0.002 2000 A T
-  snp383 11 0.002 2000 A T
-  snp384 11 0.002 2000 A T
-  snp385 11 0.002 2000 A T
-  snp386 11 0.002 2000 A T
-  snp389 11 0.002 2000 A T
-  snp390 11 0.002 2000 A T
-  snp393 11 0.002 2000 A T
-  snp395 11 0.002 2000 A T
-  snp397 11 0.002 2000 A T
-  snp400 11 0.002 2000 A T
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/admix.geno
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/pca/admix.geno	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,303 @@
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222212
+222222
+222222
+222221
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+212222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122211
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222022
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+221221
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222122
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+122222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222222
+222212
+222222
+222222
+222222
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/coordinates.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/pca/coordinates.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,7 @@
+           #eigvals:     3.243     1.103 
+                 PB1     0.1887      0.4703  All_Individuals
+                 PB2     0.0398      0.0455  All_Individuals
+                 PB3     0.1647     -0.6945  All_Individuals
+                 PB4    -0.8954     -0.0220  All_Individuals
+                 PB6     0.1887      0.4703  All_Individuals
+                 PB8     0.3135     -0.2696  All_Individuals
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/explained.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/pca/explained.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,4 @@
+Percentage explained by eigenvectors:
+1: 64.9%
+2: 22.1%
+3: 13.1%
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/par.admix
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/pca/par.admix	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,7 @@
+genotypename: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.geno
+snpname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.snp
+indivname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.ind
+evecoutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/coordinates.txt
+evaloutname: /scratch/galaxy/home/oocyte/galaxy_oocyte/database/files/000/dataset_260_files/admix.eval
+altnormstyle: NO
+numoutevec: 2
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/pca/pca.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/pca/pca.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,37 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>PCA Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:19:05 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="PCA.pdf">PCA.pdf</a></li>
+            <li><a href="coordinates.txt">coordinates.txt</a></li>
+            <li><a href="explained.txt">explained.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li><a href="par.admix">par.admix</a></li>
+            <li><a href="admix.geno">admix.geno</a></li>
+            <li><a href="admix.snp">admix.snp</a></li>
+            <li><a href="admix.ind">admix.ind</a></li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Stats<p/><pre>
+
+</pre>
+      </div>
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/phylogenetic_tree/distance_matrix.phylip
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/phylogenetic_tree/distance_matrix.phylip	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,8 @@
+7
+  canFam2 0.0000 0.3205 0.3085 0.3193 0.3101 0.3138 0.3170
+      PB1 0.3205 0.0000 0.0103 0.0100 0.0130 0.0119 0.0112
+      PB2 0.3085 0.0103 0.0000 0.0033 0.0062 0.0094 0.0062
+      PB3 0.3193 0.0100 0.0033 0.0000 0.0081 0.0091 0.0054
+      PB4 0.3101 0.0130 0.0062 0.0081 0.0000 0.0099 0.0088
+      PB6 0.3138 0.0119 0.0094 0.0091 0.0099 0.0000 0.0079
+      PB8 0.3170 0.0112 0.0062 0.0054 0.0088 0.0079 0.0000
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/phylogenetic_tree/informative_snps.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/phylogenetic_tree/informative_snps.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,7 @@
+  canFam2        0      338      339      350      345      342      344
+      PB1      338        0      338      344      338      336      339
+      PB2      339      338        0      345      338      339      338
+      PB3      350      344      345        0      347      342      347
+      PB4      345      338      338      347        0      337      341
+      PB6      342      336      339      342      337        0      343
+      PB8      344      339      338      347      341      343        0
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/phylogenetic_tree/mega_distance_matrix.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/phylogenetic_tree/mega_distance_matrix.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,27 @@
+#mega
+!Title: Galaxy;
+!Format DataType=Distance DataFormat=LowerLeft NTaxa=7;
+
+[1] #canFam2
+[2] #PB1
+[3] #PB2
+[4] #PB3
+[5] #PB4
+[6] #PB6
+[7] #PB8
+
+
+
+[   1   2   3   4   5   6   7 ]
+[1]   
+[2]  0.3205  
+[3]  0.3085 0.0103  
+[4]  0.3193 0.0100 0.0033  
+[5]  0.3101 0.0130 0.0062 0.0081  
+[6]  0.3138 0.0119 0.0094 0.0091 0.0099  
+[7]  0.3170 0.0112 0.0062 0.0054 0.0088 0.0079  
+
+
+
+
+
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/phylogenetic_tree/phylogenetic_tree.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/phylogenetic_tree/phylogenetic_tree.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,49 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Phylogenetic tree Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 01:57:44 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="tree.pdf">tree.pdf</a></li>
+            <li><a href="phylogenetic_tree.newick">phylogenetic tree (newick)</a></li>
+            <li><a href="distance_matrix.phylip">Phylip distance matrix</a></li>
+            <li><a href="mega_distance_matrix.txt">Mega distance matrix</a></li>
+            <li><a href="informative_snps.txt">informative SNPs</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Minimum coverage: 3</li>
+            <li>Minimum quality: 30</li>
+            <li>Include reference sequence: yes</li>
+            <li>Data source: sequence coverage</li>
+            <li>Branch type: square</li>
+            <li>Draw branches to scale: yes</li>
+            <li>Show branch lengths: yes</li>
+            <li>Tree layout: horizontal</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+      </div>
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/phylogenetic_tree/phylogenetic_tree.newick
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/phylogenetic_tree/phylogenetic_tree.newick	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,16 @@
+(
+(
+(
+PB4:0.00174,
+canFam2:0.30836)
+:0.00188,
+PB2:0.00042)
+:0.00210,
+(
+PB6:0.00470,
+PB1:0.00720)
+:0.00035,
+(
+PB8:0.00288,
+PB3:0.00252)
+:0.00055);
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/phylogenetic_tree/tree.pdf
Binary file test-data/test_out/phylogenetic_tree/tree.pdf has changed
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/population_structure/graphical.pdf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/population_structure/graphical.pdf	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,147 @@
+%PDF-1.4
+%���ρ�\r
+1 0 obj
+<<
+/CreationDate (D:20120403142055)
+/ModDate (D:20120403142055)
+/Title (R Graphics Output)
+/Producer (R 2.11.0)
+/Creator (R)
+>>
+endobj
+2 0 obj
+<<
+/Type /Catalog
+/Pages 3 0 R
+>>
+endobj
+5 0 obj
+<<
+/Type /Page
+/Parent 3 0 R
+/Contents 6 0 R
+/Resources 4 0 R
+>>
+endobj
+6 0 obj
+<<
+/Length 7 0 R
+>>
+stream
+1 J 1 j q
+Q q
+1.000 0.000 0.000 rg
+74.40 74.27 54.86 0.00 re f
+0.000 1.000 1.000 rg
+74.40 74.27 54.86 82.69 re f
+1.000 0.000 0.000 rg
+140.23 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+140.23 156.96 54.86 0.00 re f
+1.000 0.000 0.000 rg
+206.06 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+206.06 156.96 54.86 0.00 re f
+1.000 0.000 0.000 rg
+271.89 74.27 54.86 0.00 re f
+0.000 1.000 1.000 rg
+271.89 74.27 54.86 82.69 re f
+1.000 0.000 0.000 rg
+337.71 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+337.71 156.96 54.86 0.00 re f
+1.000 0.000 0.000 rg
+403.54 74.27 54.86 82.69 re f
+0.000 1.000 1.000 rg
+403.54 156.96 54.86 0.00 re f
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 12.00 0.00 -0.00 12.00 236.05 18.72 Tm (Individual #) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 12.96 91.68 Tm [(Ancestr) -30 (y)] TJ
+ET
+Q q
+0.000 0.000 0.000 RG
+0.75 w
+[] 0 d
+1 J
+1 j
+10.00 M
+59.04 74.27 m 59.04 156.96 l S
+59.04 74.27 m 51.84 74.27 l S
+59.04 90.81 m 51.84 90.81 l S
+59.04 107.34 m 51.84 107.34 l S
+59.04 123.88 m 51.84 123.88 l S
+59.04 140.42 m 51.84 140.42 l S
+59.04 156.96 m 51.84 156.96 l S
+BT
+0.000 0.000 0.000 rg
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 65.93 Tm (0.0) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 99.00 Tm (0.4) Tj
+ET
+BT
+/F2 1 Tf 0.00 12.00 -12.00 0.00 41.76 132.08 Tm (0.8) Tj
+ET
+Q
+endstream
+endobj
+7 0 obj
+1275
+endobj
+3 0 obj
+<<
+/Type /Pages
+/Kids [
+5 0 R
+]
+/Count 1
+/MediaBox [0 0 504 216]
+>>
+endobj
+4 0 obj
+<<
+/ProcSet [/PDF /Text]
+/Font <</F2 9 0 R >>
+/ExtGState << >>
+>>
+endobj
+8 0 obj
+<<
+/Type /Encoding
+/BaseEncoding /WinAnsiEncoding
+/Differences [ 45/minus 96/quoteleft
+144/dotlessi /grave /acute /circumflex /tilde /macron /breve /dotaccent
+/dieresis /.notdef /ring /cedilla /.notdef /hungarumlaut /ogonek /caron /space]
+>>
+endobj
+9 0 obj <<
+/Type /Font
+/Subtype /Type1
+/Name /F2
+/BaseFont /Helvetica
+/Encoding 8 0 R
+>> endobj
+xref
+0 10
+0000000000 65535 f 
+0000000021 00000 n 
+0000000164 00000 n 
+0000001641 00000 n 
+0000001724 00000 n 
+0000000213 00000 n 
+0000000293 00000 n 
+0000001621 00000 n 
+0000001805 00000 n 
+0000002062 00000 n 
+trailer
+<<
+/Size 10
+/Info 1 0 R
+/Root 2 0 R
+>>
+startxref
+2158
+%%EOF
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/population_structure/numeric.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/population_structure/numeric.txt	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,6 @@
+0.000010 0.999990
+0.999990 0.000010
+0.999990 0.000010
+0.000010 0.999990
+0.999990 0.000010
+0.999990 0.000010
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/population_structure/population_structure.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/population_structure/population_structure.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,44 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Population structure Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:20:55 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="graphical.pdf">graphical.pdf</a></li>
+            <li><a href="numeric.txt">numeric.txt</a></li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Number of populations: 2</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Populations
+<ul>
+<li>
+All Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+</li>
+</ul>
+      </div>
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/prepare_population_structure/admix.map
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/prepare_population_structure/admix.map	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,303 @@
+1 snp1 0 2
+1 snp3 0 4
+1 snp4 0 5
+1 snp5 0 6
+1 snp6 0 7
+1 snp7 0 8
+1 snp8 0 9
+1 snp9 0 10
+1 snp10 0 11
+1 snp11 0 12
+1 snp12 0 13
+1 snp13 0 14
+1 snp14 0 15
+1 snp16 0 17
+1 snp17 0 18
+1 snp22 0 23
+1 snp24 0 25
+1 snp25 0 26
+1 snp27 0 28
+1 snp28 0 29
+1 snp29 0 30
+1 snp30 0 31
+1 snp31 0 32
+1 snp33 0 34
+1 snp34 0 35
+1 snp37 0 38
+1 snp38 0 39
+1 snp39 0 40
+1 snp40 0 41
+1 snp41 0 42
+1 snp42 0 43
+1 snp43 0 44
+1 snp45 0 46
+1 snp46 0 47
+1 snp47 0 48
+1 snp48 0 49
+1 snp49 0 50
+1 snp50 0 51
+1 snp51 0 52
+1 snp52 0 53
+1 snp53 0 54
+1 snp54 0 55
+1 snp56 0 57
+1 snp58 0 59
+1 snp59 0 60
+1 snp60 0 61
+1 snp61 0 62
+1 snp62 0 63
+1 snp63 0 64
+1 snp64 0 65
+1 snp65 0 66
+1 snp67 0 68
+1 snp68 0 69
+1 snp70 0 71
+1 snp71 0 72
+1 snp72 0 73
+1 snp73 0 74
+1 snp74 0 75
+1 snp75 0 76
+1 snp76 0 77
+1 snp77 0 78
+1 snp78 0 79
+1 snp80 0 81
+1 snp81 0 82
+1 snp83 0 84
+1 snp84 0 85
+1 snp87 0 88
+1 snp89 0 90
+1 snp90 0 91
+1 snp91 0 92
+1 snp92 0 93
+1 snp93 0 94
+1 snp94 0 95
+1 snp98 0 99
+1 snp100 0 101
+1 snp101 0 102
+1 snp102 0 103
+1 snp103 0 104
+1 snp104 0 105
+1 snp105 0 106
+1 snp106 0 107
+1 snp107 0 108
+1 snp108 0 109
+1 snp110 0 111
+1 snp111 0 112
+1 snp112 0 113
+1 snp113 0 114
+1 snp116 0 117
+1 snp117 0 118
+1 snp118 0 119
+1 snp119 0 120
+1 snp121 0 122
+1 snp122 0 123
+1 snp123 0 124
+1 snp124 0 125
+1 snp125 0 126
+1 snp126 0 127
+1 snp128 0 129
+1 snp129 0 130
+1 snp131 0 132
+1 snp133 0 134
+1 snp134 0 135
+1 snp135 0 136
+1 snp137 0 138
+1 snp138 0 139
+1 snp139 0 140
+1 snp140 0 141
+1 snp141 0 142
+1 snp143 0 144
+1 snp145 0 146
+1 snp146 0 147
+1 snp148 0 149
+1 snp149 0 150
+1 snp150 0 151
+1 snp151 0 152
+1 snp152 0 153
+1 snp153 0 154
+1 snp154 0 155
+1 snp156 0 157
+1 snp157 0 158
+1 snp158 0 159
+1 snp159 0 160
+1 snp160 0 161
+1 snp161 0 162
+1 snp162 0 163
+1 snp164 0 165
+1 snp165 0 166
+1 snp167 0 168
+1 snp168 0 169
+1 snp169 0 170
+1 snp170 0 171
+1 snp171 0 172
+1 snp172 0 173
+1 snp174 0 175
+1 snp175 0 176
+1 snp176 0 177
+1 snp177 0 178
+1 snp178 0 179
+1 snp179 0 180
+1 snp181 0 182
+1 snp182 0 183
+1 snp183 0 184
+1 snp184 0 185
+1 snp185 0 186
+1 snp186 0 187
+1 snp188 0 189
+1 snp191 0 192
+1 snp192 0 193
+1 snp193 0 194
+1 snp195 0 196
+1 snp196 0 197
+1 snp197 0 198
+1 snp199 0 200
+1 snp200 0 201
+1 snp201 0 202
+1 snp202 0 203
+1 snp203 0 204
+1 snp205 0 206
+1 snp207 0 208
+1 snp210 0 211
+1 snp211 0 212
+1 snp212 0 213
+1 snp213 0 214
+1 snp214 0 215
+1 snp215 0 216
+1 snp216 0 217
+1 snp217 0 218
+1 snp218 0 219
+1 snp219 0 220
+1 snp220 0 221
+1 snp221 0 222
+1 snp223 0 224
+1 snp224 0 225
+1 snp225 0 226
+1 snp226 0 227
+1 snp227 0 228
+1 snp228 0 229
+1 snp229 0 230
+1 snp230 0 231
+1 snp231 0 232
+1 snp232 0 233
+1 snp235 0 236
+1 snp236 0 237
+1 snp237 0 238
+1 snp239 0 240
+1 snp240 0 241
+1 snp241 0 242
+1 snp242 0 243
+1 snp243 0 244
+1 snp244 0 245
+1 snp246 0 247
+1 snp247 0 248
+1 snp248 0 249
+1 snp249 0 250
+1 snp250 0 251
+1 snp251 0 252
+1 snp252 0 253
+1 snp253 0 254
+1 snp254 0 255
+1 snp255 0 256
+1 snp256 0 257
+1 snp257 0 258
+1 snp258 0 259
+1 snp260 0 261
+1 snp261 0 262
+1 snp262 0 263
+1 snp263 0 264
+1 snp264 0 265
+1 snp265 0 266
+1 snp266 0 267
+1 snp267 0 268
+1 snp268 0 269
+1 snp269 0 270
+1 snp270 0 271
+1 snp271 0 272
+1 snp273 0 274
+1 snp274 0 275
+1 snp275 0 276
+1 snp276 0 277
+1 snp277 0 278
+1 snp278 0 279
+1 snp281 0 282
+1 snp282 0 283
+1 snp284 0 285
+1 snp287 0 288
+1 snp288 0 289
+1 snp289 0 290
+1 snp290 0 291
+1 snp291 0 292
+1 snp292 0 293
+1 snp293 0 294
+1 snp294 0 295
+1 snp297 0 298
+1 snp298 0 299
+1 snp299 0 300
+1 snp300 0 301
+1 snp301 0 302
+1 snp302 0 303
+1 snp303 0 304
+1 snp304 0 305
+1 snp307 0 308
+1 snp308 0 309
+1 snp309 0 310
+1 snp310 0 311
+1 snp312 0 313
+1 snp313 0 314
+1 snp316 0 317
+1 snp317 0 318
+1 snp320 0 321
+1 snp321 0 322
+1 snp322 0 323
+1 snp323 0 324
+1 snp324 0 325
+1 snp325 0 326
+1 snp328 0 329
+1 snp329 0 330
+1 snp331 0 332
+1 snp332 0 333
+1 snp333 0 334
+1 snp334 0 335
+1 snp335 0 336
+1 snp336 0 337
+1 snp338 0 339
+1 snp339 0 340
+1 snp341 0 342
+1 snp342 0 343
+1 snp344 0 345
+1 snp345 0 346
+1 snp348 0 349
+1 snp350 0 351
+1 snp352 0 353
+1 snp353 0 354
+1 snp354 0 355
+1 snp355 0 356
+1 snp360 0 361
+1 snp361 0 362
+1 snp362 0 363
+1 snp364 0 365
+1 snp366 0 367
+1 snp369 0 370
+1 snp370 0 371
+1 snp371 0 372
+1 snp372 0 373
+1 snp373 0 374
+1 snp374 0 375
+1 snp375 0 376
+1 snp376 0 377
+1 snp377 0 378
+1 snp378 0 379
+1 snp379 0 380
+1 snp380 0 381
+1 snp381 0 382
+1 snp382 0 383
+1 snp383 0 384
+1 snp384 0 385
+1 snp385 0 386
+1 snp386 0 387
+1 snp389 0 390
+1 snp390 0 391
+1 snp393 0 394
+1 snp395 0 396
+1 snp397 0 398
+1 snp400 0 401
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/prepare_population_structure/admix.ped
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/prepare_population_structure/admix.ped	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,6 @@
+PB1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB2 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB3 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB4 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+PB6 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1
+PB8 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/prepare_population_structure/prepare_population_structure.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/prepare_population_structure/prepare_population_structure.html	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
+    <title>Prepare to look for population structure Galaxy Composite Dataset</title>
+  </head>
+  <body>
+    <div class="document">
+      Output completed: 2012-04-03 02:17:44 PM
+      <p/>
+      <div id="gd_outputs">
+        Outputs
+        <ul>
+            <li><a href="admix.ped">admix.ped</a></li>
+            <li><a href="admix.map">admix.map</a></li>
+            <li>Using 303 of 400 SNPs</li>
+        </ul>
+      </div>
+      <div id="gd_inputs">
+        Inputs
+        <ul>
+            <li>Minimum reads covering a SNP, per individual: 3</li>
+            <li>Minimum quality value, per individual: 30</li>
+            <li>Minimum spacing between SNPs on the same scaffold: 0</li>
+        </ul>
+      </div>
+      <div id="gd_misc">
+        Populations
+<ul>
+<li>
+All Individuals
+<ol>
+<li>PB1</li>
+<li>PB2</li>
+<li>PB3</li>
+<li>PB4</li>
+<li>PB6</li>
+<li>PB8</li>
+</ol>
+</li>
+</ul>
+      </div>
+    </div>
+  </body>
+</html>
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/rank_pathways/rank_pathways.tabular
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_out/rank_pathways/rank_pathways.tabular	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,240 @@
+3	0.25	1	cfa03450=Non-homologous end-joining
+1	0.25	1	cfa00750=Vitamin B6 metabolism
+2	0.2	3	cfa00290=Valine, leucine and isoleucine biosynthesis
+3	0.18	4	cfa00770=Pantothenate and CoA biosynthesis
+5	0.17	5	cfa05310=Asthma
+4	0.16	6	cfa00760=Nicotinate and nicotinamide metabolism
+2	0.12	7	cfa00450=Selenocompound metabolism
+4	0.11	8	cfa05330=Allograft rejection
+5	0.098	9	cfa04672=Intestinal immune network for IgA production
+4	0.098	9	cfa02010=ABC transporters
+2	0.095	11	cfa03430=Mismatch repair
+4	0.089	12	cfa05320=Autoimmune thyroid disease
+4	0.089	12	cfa00280=Valine, leucine and isoleucine degradation
+3	0.088	14	cfa03410=Base excision repair
+3	0.088	14	cfa03030=DNA replication
+3	0.088	14	cfa00565=Ether lipid metabolism
+6	0.087	17	cfa05140=Leishmaniasis
+2	0.087	17	cfa04977=Vitamin digestion and absorption
+1	0.083	19	cfa00740=Riboflavin metabolism
+4	0.08	20	cfa05150=Staphylococcus aureus infection
+2	0.08	20	cfa03060=Protein export
+3	0.079	22	cfa05340=Primary immunodeficiency
+3	0.079	22	cfa05143=African trypanosomiasis
+6	0.078	24	cfa00564=Glycerophospholipid metabolism
+2	0.077	25	cfa00410=beta-Alanine metabolism
+2	0.071	26	cfa05332=Graft-versus-host disease
+5	0.069	27	cfa03320=PPAR signaling pathway
+6	0.066	28	cfa05323=Rheumatoid arthritis
+5	0.063	29	cfa04664=Fc epsilon RI signaling pathway
+3	0.062	30	cfa00561=Glycerolipid metabolism
+2	0.062	30	cfa00350=Tyrosine metabolism
+2	0.062	30	cfa00020=Citrate cycle (TCA cycle)
+2	0.059	33	cfa00260=Glycine, serine and threonine metabolism
+1	0.059	33	cfa04614=Renin-angiotensin system
+1	0.059	33	cfa00360=Phenylalanine metabolism
+9	0.058	36	cfa04145=Phagosome
+3	0.058	36	cfa05213=Endometrial cancer
+4	0.057	38	cfa05416=Viral myocarditis
+2	0.057	38	cfa00500=Starch and sucrose metabolism
+2	0.056	40	cfa04130=SNARE interactions in vesicular transport
+1	0.056	40	cfa00592=alpha-Linolenic acid metabolism
+1	0.053	42	cfa04964=Proximal tubule bicarbonate reclamation
+1	0.053	42	cfa00630=Glyoxylate and dicarboxylate metabolism
+3	0.052	44	cfa04621=NOD-like receptor signaling pathway
+2	0.05	45	cfa05219=Bladder cancer
+2	0.05	45	cfa04940=Type I diabetes mellitus
+2	0.05	45	cfa00380=Tryptophan metabolism
+2	0.047	48	cfa03420=Nucleotide excision repair
+3	0.045	49	cfa04920=Adipocytokine signaling pathway
+3	0.045	49	cfa00970=Aminoacyl-tRNA biosynthesis
+2	0.045	49	cfa00071=Fatty acid metabolism
+1	0.045	49	cfa00591=Linoleic acid metabolism
+1	0.045	49	cfa00340=Histidine metabolism
+4	0.043	54	cfa04972=Pancreatic secretion
+2	0.043	54	cfa03022=Basal transcription factors
+2	0.043	54	cfa00982=Drug metabolism - cytochrome P450
+3	0.042	57	cfa05218=Melanoma
+3	0.042	57	cfa05211=Renal cell carcinoma
+4	0.041	59	cfa05414=Dilated cardiomyopathy
+2	0.04	60	cfa00590=Arachidonic acid metabolism
+1	0.04	60	cfa04320=Dorso-ventral axis formation
+3	0.039	62	cfa04662=B cell receptor signaling pathway
+2	0.039	62	cfa00310=Lysine degradation
+3	0.038	64	cfa04512=ECM-receptor interaction
+2	0.038	64	cfa05144=Malaria
+2	0.038	64	cfa00270=Cysteine and methionine metabolism
+1	0.038	64	cfa03440=Homologous recombination
+1	0.038	64	cfa00052=Galactose metabolism
+8	0.037	69	cfa04810=Regulation of actin cytoskeleton
+4	0.037	69	cfa05146=Amoebiasis
+4	0.037	69	cfa04666=Fc gamma R-mediated phagocytosis
+2	0.037	69	cfa05223=Non-small cell lung cancer
+6	0.036	73	cfa05168=Herpes simplex infection
+6	0.036	73	cfa05152=Tuberculosis
+3	0.036	73	cfa04640=Hematopoietic cell lineage
+7	0.034	76	cfa04510=Focal adhesion
+3	0.034	76	cfa00240=Pyrimidine metabolism
+3	0.033	78	cfa03008=Ribosome biogenesis in eukaryotes
+1	0.033	78	cfa00983=Drug metabolism - other enzymes
+2	0.032	80	cfa04976=Bile secretion
+6	0.031	81	cfa04060=Cytokine-cytokine receptor interaction
+4	0.031	81	cfa04110=Cell cycle
+1	0.031	81	cfa00250=Alanine, aspartate and glutamate metabolism
+4	0.029	84	cfa05145=Toxoplasmosis
+3	0.029	84	cfa04650=Natural killer cell mediated cytotoxicity
+2	0.029	84	cfa05214=Glioma
+4	0.028	87	cfa05162=Measles
+2	0.028	87	cfa05412=Arrhythmogenic right ventricular cardiomyopathy (ARVC)
+7	0.027	89	cfa05166=HTLV-I infection
+4	0.027	89	cfa05322=Systemic lupus erythematosus
+2	0.027	89	cfa05212=Pancreatic cancer
+2	0.026	92	cfa04146=Peroxisome
+2	0.026	92	cfa04070=Phosphatidylinositol signaling system
+1	0.026	92	cfa04978=Mineral absorption
+2	0.025	95	cfa05133=Pertussis
+2	0.025	95	cfa04612=Antigen processing and presentation
+2	0.025	95	cfa04350=TGF-beta signaling pathway
+1	0.025	95	cfa00830=Retinol metabolism
+3	0.024	99	cfa04514=Cell adhesion molecules (CAMs)
+2	0.024	99	cfa05410=Hypertrophic cardiomyopathy (HCM)
+2	0.024	99	cfa04012=ErbB signaling pathway
+1	0.024	99	cfa00980=Metabolism of xenobiotics by cytochrome P450
+1	0.024	99	cfa00640=Propanoate metabolism
+3	0.023	104	cfa04360=Axon guidance
+2	0.023	104	cfa04620=Toll-like receptor signaling pathway
+1	0.023	104	cfa04975=Fat digestion and absorption
+1	0.023	104	cfa04330=Notch signaling pathway
+7	0.022	108	cfa05200=Pathways in cancer
+3	0.022	108	cfa04910=Insulin signaling pathway
+2	0.022	108	cfa05215=Prostate cancer
+1	0.022	108	cfa03460=Fanconi anemia pathway
+24	0.021	112	cfa01100=Metabolic pathways
+3	0.021	112	cfa04630=Jak-STAT signaling pathway
+1	0.021	112	cfa00480=Glutathione metabolism
+3	0.020	115	cfa00230=Purine metabolism
+2	0.020	115	cfa04540=Gap junction
+1	0.02	115	cfa00620=Pyruvate metabolism
+2	0.019	118	cfa04912=GnRH signaling pathway
+2	0.018	119	cfa05142=Chagas disease (American trypanosomiasis)
+2	0.018	119	cfa04380=Osteoclast differentiation
+1	0.018	119	cfa05221=Acute myeloid leukemia
+1	0.018	119	cfa00330=Arginine and proline metabolism
+3	0.017	123	cfa05164=Influenza A
+2	0.017	123	cfa04270=Vascular smooth muscle contraction
+2	0.017	123	cfa04114=Oocyte meiosis
+3	0.016	126	cfa04141=Protein processing in endoplasmic reticulum
+3	0.016	126	cfa04020=Calcium signaling pathway
+2	0.016	126	cfa05160=Hepatitis C
+2	0.016	126	cfa04670=Leukocyte transendothelial migration
+1	0.016	126	cfa05210=Colorectal cancer
+1	0.016	126	cfa04610=Complement and coagulation cascades
+1	0.016	126	cfa04150=mTOR signaling pathway
+4	0.015	133	cfa04010=MAPK signaling pathway
+1	0.015	133	cfa04974=Protein digestion and absorption
+1	0.015	133	cfa04730=Long-term depression
+1	0.015	133	cfa04115=p53 signaling pathway
+1	0.014	137	cfa05220=Chronic myeloid leukemia
+1	0.014	137	cfa04971=Gastric acid secretion
+1	0.014	137	cfa04720=Long-term potentiation
+1	0.014	137	cfa04370=VEGF signaling pathway
+1	0.014	137	cfa04260=Cardiac muscle contraction
+1	0.014	137	cfa03018=RNA degradation
+2	0.013	143	cfa00010=Glycolysis / Gluconeogenesis
+1	0.013	143	cfa04970=Salivary secretion
+1	0.013	143	cfa04520=Adherens junction
+2	0.012	146	cfa04062=Chemokine signaling pathway
+1	0.012	146	cfa05134=Legionellosis
+1	0.012	146	cfa05132=Salmonella infection
+1	0.012	146	cfa04727=GABAergic synapse
+1	0.012	146	cfa04210=Apoptosis
+1	0.011	151	cfa03015=mRNA surveillance pathway
+1	0.010	152	cfa04914=Progesterone-mediated oocyte maturation
+1	0.0098	153	cfa04916=Melanogenesis
+2	0.0095	154	cfa04144=Endocytosis
+1	0.0087	155	cfa04142=Lysosome
+1	0.0086	156	cfa04660=T cell receptor signaling pathway
+1	0.0082	157	cfa04724=Glutamatergic synapse
+2	0.0081	158	cfa04080=Neuroactive ligand-receptor interaction
+1	0.0079	159	cfa04728=Dopaminergic synapse
+2	0.0074	160	cfa05010=Alzheimer's disease
+1	0.0074	160	cfa04722=Neurotrophin signaling pathway
+1	0.0074	160	cfa04120=Ubiquitin mediated proteolysis
+1	0.0068	163	cfa00190=Oxidative phosphorylation
+1	0.0067	164	cfa05012=Parkinson's disease
+1	0.0057	165	cfa03013=RNA transport
+1	0.0056	166	cfa03040=Spliceosome
+1	0.0049	167	cfa05016=Huntington's disease
+1	0.0023	168	cfa04740=Olfactory transduction
+0	0	169	cfa05222=Small cell lung cancer
+0	0	169	cfa05217=Basal cell carcinoma
+0	0	169	cfa05216=Thyroid cancer
+0	0	169	cfa05100=Bacterial invasion of epithelial cells
+0	0	169	cfa05020=Prion diseases
+0	0	169	cfa05014=Amyotrophic lateral sclerosis (ALS)
+0	0	169	cfa04973=Carbohydrate digestion and absorption
+0	0	169	cfa04966=Collecting duct acid secretion
+0	0	169	cfa04962=Vasopressin-regulated water reabsorption
+0	0	169	cfa04961=Endocrine and other factor-regulated calcium reabsorption
+0	0	169	cfa04960=Aldosterone-regulated sodium reabsorption
+0	0	169	cfa04950=Maturity onset diabetes of the young
+0	0	169	cfa04930=Type II diabetes mellitus
+0	0	169	cfa04744=Phototransduction
+0	0	169	cfa04742=Taste transduction
+0	0	169	cfa04725=Cholinergic synapse
+0	0	169	cfa04721=Synaptic vesicle cycle
+0	0	169	cfa04710=Circadian rhythm - mammal
+0	0	169	cfa04623=Cytosolic DNA-sensing pathway
+0	0	169	cfa04622=RIG-I-like receptor signaling pathway
+0	0	169	cfa04530=Tight junction
+0	0	169	cfa04340=Hedgehog signaling pathway
+0	0	169	cfa04310=Wnt signaling pathway
+0	0	169	cfa04140=Regulation of autophagy
+0	0	169	cfa04122=Sulfur relay system
+0	0	169	cfa03050=Proteasome
+0	0	169	cfa03020=RNA polymerase
+0	0	169	cfa03010=Ribosome
+0	0	169	cfa01040=Biosynthesis of unsaturated fatty acids
+0	0	169	cfa00920=Sulfur metabolism
+0	0	169	cfa00910=Nitrogen metabolism
+0	0	169	cfa00900=Terpenoid backbone biosynthesis
+0	0	169	cfa00860=Porphyrin and chlorophyll metabolism
+0	0	169	cfa00790=Folate biosynthesis
+0	0	169	cfa00785=Lipoic acid metabolism
+0	0	169	cfa00780=Biotin metabolism
+0	0	169	cfa00730=Thiamine metabolism
+0	0	169	cfa00670=One carbon pool by folate
+0	0	169	cfa00650=Butanoate metabolism
+0	0	169	cfa00604=Glycosphingolipid biosynthesis - ganglio series
+0	0	169	cfa00603=Glycosphingolipid biosynthesis - globo series
+0	0	169	cfa00601=Glycosphingolipid biosynthesis - lacto and neolacto series
+0	0	169	cfa00600=Sphingolipid metabolism
+0	0	169	cfa00563=Glycosylphosphatidylinositol(GPI)-anchor biosynthesis
+0	0	169	cfa00562=Inositol phosphate metabolism
+0	0	169	cfa00534=Glycosaminoglycan biosynthesis - heparan sulfate
+0	0	169	cfa00533=Glycosaminoglycan biosynthesis - keratan sulfate
+0	0	169	cfa00532=Glycosaminoglycan biosynthesis - chondroitin sulfate
+0	0	169	cfa00531=Glycosaminoglycan degradation
+0	0	169	cfa00520=Amino sugar and nucleotide sugar metabolism
+0	0	169	cfa00514=Other types of O-glycan biosynthesis
+0	0	169	cfa00512=Mucin type O-Glycan biosynthesis
+0	0	169	cfa00511=Other glycan degradation
+0	0	169	cfa00510=N-Glycan biosynthesis
+0	0	169	cfa00472=D-Arginine and D-ornithine metabolism
+0	0	169	cfa00471=D-Glutamine and D-glutamate metabolism
+0	0	169	cfa00460=Cyanoamino acid metabolism
+0	0	169	cfa00430=Taurine and hypotaurine metabolism
+0	0	169	cfa00400=Phenylalanine, tyrosine and tryptophan biosynthesis
+0	0	169	cfa00300=Lysine biosynthesis
+0	0	169	cfa00232=Caffeine metabolism
+0	0	169	cfa00140=Steroid hormone biosynthesis
+0	0	169	cfa00130=Ubiquinone and other terpenoid-quinone biosynthesis
+0	0	169	cfa00120=Primary bile acid biosynthesis
+0	0	169	cfa00100=Steroid biosynthesis
+0	0	169	cfa00072=Synthesis and degradation of ketone bodies
+0	0	169	cfa00062=Fatty acid elongation in mitochondria
+0	0	169	cfa00061=Fatty acid biosynthesis
+0	0	169	cfa00053=Ascorbate and aldarate metabolism
+0	0	169	cfa00051=Fructose and mannose metabolism
+0	0	169	cfa00040=Pentose and glucuronate interconversions
+0	0	169	cfa00030=Pentose phosphate pathway
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/select_snps/select_snps.gd_snp
--- a/test-data/test_out/select_snps/select_snps.gd_snp	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
-#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
-Contig161_chr1_4641264_4641879	115	C	T	73.5	chr1	4641382	C	6	0	2	45	8	0	2	51	15	0	2	72	5	0	2	42	6	0	2	45	10	0	2	57	Y	54	0.323	0
-Contig86_chr1_30984450_30985684	670	C	T	365.0	chr1	30985133	C	9	0	2	54	10	0	2	57	13	0	2	66	3	0	2	36	9	0	2	54	7	0	2	48	Y	145	0.031	0
-Contig21_chr1_60697952_60699446	307	G	A	51.9	chr1	60698265	G	12	0	2	63	9	0	2	54	4	0	2	39	6	0	2	45	9	0	2	54	4	0	2	39	Y	98	0.507	0
-Contig64_chr1_87343284_87345672	163	T	A	3.76	chr1	87343443	C	0	2	2	1	0	0	-1	0	5	0	2	42	2	0	2	33	0	1	2	14	0	0	-1	0	N	3	0.039	2
-Contig20_chr1_110679280_110679687	181	C	T	87.4	chr1	110679454	-	1	0	2	30	7	0	2	48	4	0	2	39	2	0	2	33	2	0	2	33	0	0	-1	0	N	31	0.660	2
-Contig222_chr2_9817738_9818143	220	C	T	888.0	chr2	9817960	C	17	0	2	78	12	0	2	63	20	0	2	87	8	0	2	51	11	0	2	60	12	0	2	63	Y	76	0.093	1
-Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
-Contig6_chr2_56859179_56859956	671	T	C	999.9	chr2	56859851	T	15	0	2	72	18	0	2	81	20	0	2	90	19	0	2	84	19	0	2	84	24	0	2	99	N	28	5.308	1
-Contig163_chr2_76402959_76404830	221	C	T	127.0	chr2	76403181	C	4	0	2	42	10	0	2	57	9	0	2	54	11	0	2	60	7	0	2	48	9	0	2	54	Y	54	0.178	1
-Contig56_chr3_17326225_17327548	387	G	C	91.2	chr3	17326591	G	14	0	2	69	13	0	2	66	15	0	2	72	15	0	2	72	13	0	2	66	12	0	2	63	Y	20	0.225	3
-Contig108_chr3_46210055_46210874	367	A	G	21.0	chr3	46210423	A	19	0	2	84	10	0	2	57	16	0	2	75	14	0	2	69	20	0	2	87	11	0	2	60	N	236	0.028	1
-Contig1_chr3_51588422_51589409	926	A	G	51.0	chr3	51589353	G	2	0	2	33	2	0	2	33	6	0	2	45	4	0	2	39	9	0	2	54	11	0	2	60	N	21	1.147	0
-Contig65_chr3_80727952_80728283	39	T	C	71.2	chr3	80727990	T	7	0	2	48	3	0	2	36	8	0	2	51	6	0	2	45	8	0	2	51	11	0	2	60	N	22	7.078	0
-Contig134_chr4_12145648_12148225	1326	C	T	164.0	chr4	12146961	C	9	0	2	54	8	0	2	51	7	0	2	48	3	0	2	36	5	0	2	42	5	0	2	42	Y	4	0.080	1
-Contig19_chr4_26233601_26233991	146	G	C	51.6	chr4	26233744	G	10	0	2	57	8	0	2	51	9	0	2	54	5	0	2	42	9	0	2	54	4	0	2	39	N	41	0.163	3
-Contig17_chr4_61310346_61311158	267	C	T	49.9	chr4	61310604	T	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	14	0	2	69	7	0	2	48	Y	219	0.098	0
-Contig31_chr5_4734956_4736547	1166	C	T	133.0	chr5	4736132	C	14	0	2	69	8	0	2	51	17	0	2	78	4	0	2	39	9	0	2	54	12	0	2	63	Y	1	0.021	0
-Contig6_chr5_26899813_26900498	97	A	C	88.6	chr5	26899910	A	15	0	2	72	14	0	2	69	27	0	2	108	15	0	2	72	13	0	2	69	12	0	2	63	Y	92	7.370	3
-Contig45_chr5_50892738_50892968	169	C	A	25.8	chr5	50892911	C	10	0	2	57	7	0	2	48	10	0	2	60	6	0	2	45	6	0	2	45	13	0	2	66	N	244	0.497	1
-Contig45_chr5_76133561_76134403	388	A	G	103.0	chr5	76133941	G	3	0	2	36	8	0	2	51	8	0	2	51	5	0	2	42	6	0	2	45	7	0	2	48	Y	57	0.038	0
-Contig111_chr6_5821219_5822519	1060	A	G	68.1	chr6	5822321	T	7	0	2	48	6	0	2	45	11	0	2	60	9	0	2	54	3	0	2	36	12	0	2	63	Y	7	0.231	1
-Contig102_chr6_30271329_30271577	39	T	G	139.0	chr6	30271371	G	3	0	2	36	4	0	2	39	6	0	2	45	1	0	2	30	4	0	2	39	4	0	2	39	N	15	1.159	0
-Contig112_chr6_51024554_51024851	100	A	G	121.0	chr6	51024654	A	10	0	2	57	12	0	2	63	9	0	2	54	13	0	2	66	14	0	2	69	17	0	2	78	N	75	4.287	0
-Contig84_chr7_6648683_6650255	1297	G	A	110.0	chr7	6649988	G	18	0	2	81	9	0	2	54	22	0	2	77	16	0	2	75	20	0	2	87	6	0	2	45	Y	83	0.166	0
-Contig206_chr7_26281823_26282074	103	C	A	101.0	chr7	26281925	T	11	0	2	60	16	0	2	61	19	0	2	84	6	0	2	45	19	0	2	84	16	0	2	75	N	-1	0.947	1
-Contig38_chr7_50681997_50682600	42	T	C	92.4	chr7	50682037	G	6	0	2	45	2	0	2	33	10	0	2	57	12	0	2	63	5	0	2	42	6	0	2	45	Y	94	0.146	0
-Contig91_chr8_12804505_12805470	409	C	A	111.0	chr8	12804906	C	8	0	2	51	10	0	2	57	15	0	2	72	12	0	2	63	14	0	2	69	15	0	2	72	N	145	0.175	0
-Contig8_chr8_27811135_27812620	333	C	T	37.9	chr8	27811458	C	4	0	2	39	11	0	2	60	18	0	2	81	5	0	2	42	6	0	2	45	5	0	2	42	Y	1	0.272	0
-Contig17_chr8_57490059_57490498	69	G	T	97.4	chr8	57490127	A	2	0	2	33	11	0	2	60	15	0	2	72	16	0	2	75	8	0	2	51	10	0	2	57	N	40	0.522	5
-Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
-Contig96_chr9_39008495_39009278	215	A	C	98.7	chr9	39008708	C	7	0	2	48	13	0	2	66	28	0	2	111	16	0	2	75	17	0	2	78	17	0	2	78	Y	8	0.427	1
-Contig22_chr10_15505382_15505589	172	T	C	38.5	chr10	15505548	T	2	0	2	33	6	0	2	45	8	0	2	51	8	0	2	51	9	0	2	54	12	0	2	63	N	284	2.861	0
-Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
-Contig9_chr10_51475063_51476054	770	C	T	57.3	chr10	51475839	C	6	0	2	45	16	0	2	75	16	0	2	75	13	0	2	66	9	0	2	54	9	2	2	21	N	80	0.394	0
-Contig72_chr11_7142765_7143772	146	G	A	152.0	chr11	7142911	A	8	0	2	51	8	0	2	51	24	0	2	99	10	0	2	57	17	0	2	78	11	0	2	60	Y	90	1.137	0
-Contig7_chr11_40017076_40017630	352	C	T	46.3	chr11	40017422	C	7	0	2	48	9	0	2	54	6	0	2	45	8	0	2	51	16	0	2	75	9	0	2	54	Y	44	0.336	0
-Contig16_chr11_53408448_53408790	187	A	G	153.0	chr11	53408638	A	7	0	2	48	9	0	2	54	18	0	2	81	10	0	2	57	11	0	2	60	12	0	2	63	Y	116	1.367	0
-Contig21_chr12_18403415_18404381	586	G	T	34.5	chr12	18403983	-	13	0	2	66	16	0	2	75	25	0	2	102	12	0	2	63	12	0	2	63	14	0	2	69	Y	12	0.068	0
-Contig41_chr12_25565452_25566993	475	G	T	6.29	chr12	25565926	G	15	0	2	72	14	0	2	69	10	0	2	57	15	0	2	72	18	0	2	81	19	0	2	84	N	10	2.231	1
-Contig5_chr12_53880670_53882675	1221	A	C	99.4	chr12	53881888	A	16	0	2	75	18	0	2	81	23	0	2	96	10	0	2	57	15	0	2	72	17	0	2	78	Y	31	0.061	0
-Contig107_chr13_26045881_26046290	341	C	G	81.4	chr13	26046230	C	16	0	2	75	20	0	2	90	14	0	2	69	15	0	2	72	9	0	2	54	9	0	2	54	Y	51	4.510	0
-Contig251_chr13_28498333_28501066	864	T	G	296.0	chr13	28499180	T	3	0	2	36	5	0	2	42	4	0	2	39	2	0	2	33	5	0	2	42	6	0	2	45	Y	9	0.068	0
-Contig55_chr13_53467708_53468101	221	T	G	132.0	chr13	53467925	T	25	0	2	102	12	0	2	63	26	0	2	105	7	0	2	48	16	0	2	75	16	0	2	75	N	20	5.717	1
-Contig48_chr14_11839435_11843272	3014	A	G	163.0	chr14	11842446	A	10	0	2	57	8	0	2	51	13	0	2	66	10	0	2	57	5	0	2	42	10	0	2	57	Y	31	0.908	0
-Contig28_chr14_26905747_26909514	975	G	C	3.13	chr14	26906723	G	16	0	2	75	10	0	2	57	12	0	2	63	15	0	2	72	10	0	2	57	7	0	2	48	N	287	0.117	2
-Contig64_chr14_56768376_56768902	473	C	T	29.0	chr14	56768832	C	15	0	2	72	11	0	2	60	14	0	2	69	14	0	2	69	7	0	2	48	9	0	2	54	Y	91	8.281	0
-Contig60_chr15_18493036_18494316	150	G	A	92.6	chr15	18493188	G	9	0	2	54	13	0	2	66	9	0	2	54	6	0	2	45	5	0	2	42	12	0	2	63	Y	45	0.125	0
-Contig112_chr15_26772864_26773267	374	C	T	21.6	chr15	26773244	C	4	0	2	39	4	0	2	39	5	0	2	42	2	0	2	33	4	0	2	39	3	0	2	36	N	18	+99.	0
-Contig119_chr16_6160274_6160477	180	G	A	54.8	chr16	6160457	G	7	0	2	48	6	0	2	45	12	0	2	63	3	0	2	36	11	0	2	60	10	0	2	57	N	42	+99.	0
-Contig60_chr16_28079136_28080263	588	T	G	157.0	chr16	28079739	T	22	0	2	93	20	0	2	87	22	0	2	93	17	0	2	78	12	0	2	63	10	0	2	57	Y	105	5.999	1
-Contig31_chr17_12128267_12129637	205	G	A	90.5	chr17	12128484	G	7	0	2	48	6	0	2	45	6	0	2	45	11	0	2	60	7	0	2	48	4	0	2	39	Y	10	0.246	0
-Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
-Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
-Contig229_chr18_3706523_3708577	1076	A	G	83.9	chr18	3707630	A	11	0	2	60	13	0	2	66	26	0	2	105	11	0	2	60	15	0	2	72	17	0	2	78	Y	63	0.445	0
-Contig82_chr18_27305489_27306229	566	C	T	49.5	chr18	27306051	A	6	0	2	45	6	0	2	45	10	0	2	57	11	0	2	60	6	0	2	45	7	0	2	48	N	1	0.349	0
-Contig64_chr18_55979770_55980315	49	G	A	89.1	chr18	55979824	G	3	0	2	36	9	0	2	54	7	0	2	51	4	0	2	39	3	0	2	36	3	0	2	36	Y	-1	2.124	0
-Contig146_chr19_5221790_5223013	143	A	G	114.0	chr19	5221916	-	1	0	2	30	4	0	2	39	3	0	2	36	5	0	2	42	2	0	2	33	5	0	2	42	Y	12	0.870	0
-Contig129_chr19_25541958_25542221	202	T	C	68.1	chr19	25542154	C	11	0	2	60	19	0	2	84	10	0	2	60	17	0	2	78	9	0	2	54	12	0	2	63	N	-1	2.551	1
-Contig60_chr19_54013816_54014398	281	A	G	138.0	chr19	54014103	C	6	0	2	45	15	0	2	72	7	0	2	48	10	0	2	57	15	0	2	72	10	0	2	57	Y	188	1.271	0
-Contig50_chr20_12138509_12141975	3206	C	A	248.0	chr20	12141763	C	8	0	2	51	15	0	2	72	14	0	2	69	6	0	2	45	10	0	2	57	7	0	2	48	Y	2	0.384	0
-Contig36_chr20_32631363_32632049	176	G	A	24.1	chr20	32631526	G	7	0	2	48	14	0	2	69	19	0	2	84	14	0	2	69	15	0	2	72	16	0	2	75	N	50	1.150	0
-Contig50_chr21_4178523_4178687	121	G	A	362.0	chr21	4178640	G	8	0	2	51	14	0	2	69	5	0	2	42	3	0	2	36	11	0	2	60	4	0	2	39	N	392	0.483	0
-Contig129_chr21_31045749_31046924	381	A	G	129.0	chr21	31046141	A	19	0	2	84	8	0	2	51	23	0	2	96	12	0	2	63	15	0	2	72	18	0	2	81	Y	69	0.028	2
-Contig159_chr22_7896450_7896974	109	G	C	151.0	chr22	7896570	G	16	0	2	75	5	7	1	62	14	0	2	69	16	0	2	75	13	0	2	66	13	0	2	66	Y	16	0.465	0
-Contig23_chr22_34612023_34612568	167	C	G	92.3	chr22	34612181	C	11	0	2	60	18	0	2	81	13	0	2	66	8	0	2	51	12	0	2	63	14	0	2	69	Y	7	0.409	0
-Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
-Contig133_chr23_3525134_3526502	1223	A	G	201.0	chr23	3526387	A	11	0	2	60	13	0	2	66	23	0	2	96	21	0	2	90	13	0	2	66	10	0	2	57	Y	61	1.359	0
-Contig35_chr23_28447813_28449115	70	T	A	21.3	chr23	28447881	T	9	0	2	54	8	0	2	51	10	0	2	57	9	0	2	54	10	0	2	57	12	0	2	63	N	251	0.163	1
-Contig50_chr24_22515247_22516072	761	C	T	243.0	chr24	22515981	T	11	0	2	60	10	0	2	57	8	0	2	51	9	0	2	54	18	0	2	81	8	0	2	51	Y	1	0.190	0
-Contig84_chr24_29196623_29199644	466	C	T	126.0	chr24	29197091	T	7	0	2	48	11	0	2	60	8	0	2	51	7	0	2	48	11	0	2	60	15	0	2	72	Y	42	0.215	0
-Contig144_chr25_4011170_4013134	541	A	G	160.0	chr25	4011690	A	12	0	2	63	17	0	2	78	13	0	2	66	13	0	2	66	13	0	2	66	13	0	2	66	Y	5	0.087	0
-Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
-Contig204_chr26_4311195_4311778	170	C	T	16.9	chr26	4311363	T	20	0	2	87	8	0	2	51	13	0	2	66	18	0	2	81	11	0	2	60	14	0	2	69	N	35	0.085	0
-Contig146_chr26_26622638_26623906	574	G	A	186.0	chr26	26623219	A	11	0	2	60	12	0	2	63	9	0	2	54	11	0	2	60	9	0	2	54	12	0	2	63	Y	1	0.318	0
-Contig135_chr27_6853874_6854079	158	C	T	116.0	chr27	6854032	T	18	0	2	81	19	0	2	84	13	0	2	66	7	0	2	48	8	0	2	51	11	0	2	60	N	4	0.060	1
-Contig64_chr27_34654435_34654621	132	C	A	115.0	chr27	34654567	T	2	0	2	33	2	0	2	33	5	0	2	42	3	0	2	36	3	0	2	36	8	0	2	51	N	12	0.297	1
-Contig131_chr28_6481806_6483783	138	C	T	36.2	chr28	6481953	C	12	0	2	63	12	0	2	63	20	0	2	87	11	0	2	60	10	0	2	57	12	0	2	63	Y	10	0.387	0
-Contig60_chr28_30197166_30197364	92	T	C	164.0	chr28	30197258	T	10	0	2	57	13	0	2	66	15	0	2	72	16	0	2	75	12	0	2	63	11	0	2	60	N	369	1.139	0
-Contig29_chr29_4726399_4727143	559	A	T	163.0	chr29	4726955	A	15	0	2	72	18	0	2	81	18	0	2	81	16	0	2	75	11	0	2	60	14	0	2	72	Y	161	3.114	0
-Contig1_chr30_5992217_5993068	106	C	T	129.0	chr30	5992319	C	10	0	2	57	11	0	2	60	7	0	2	48	11	0	2	60	10	0	2	57	12	0	2	63	Y	76	1.079	0
-Contig165_chr30_25804389_25804926	190	T	C	126.0	chr30	25804592	C	3	0	2	36	8	0	2	51	7	0	2	48	10	0	2	57	7	0	2	48	4	0	2	39	Y	113	0.329	0
-Contig38_chr31_5164423_5166573	2074	C	T	134.0	chr31	5166501	T	13	0	2	66	10	0	2	57	17	0	2	78	11	0	2	60	17	0	2	78	10	0	2	57	Y	58	+99.	0
-Contig17_chr31_26433828_26434459	498	T	C	9.79	chr31	26434322	T	18	0	2	81	10	0	2	57	15	0	2	72	13	0	2	66	16	0	2	75	15	0	2	72	Y	137	4.814	0
-Contig9_chr32_19479532_19479735	12	A	G	20.7	chr32	19479544	A	1	0	2	30	2	0	2	33	1	0	2	30	5	0	2	42	3	0	2	36	3	0	2	36	N	17	+99.	0
-Contig30_chr32_25902721_25905783	208	C	G	162.0	chr32	25902927	G	11	0	2	60	13	0	2	66	11	0	2	60	12	0	2	63	7	0	2	48	11	0	2	60	Y	145	0.322	2
-Contig18_chr33_22207246_22209159	1363	G	T	51.5	chr33	22208619	-	16	0	2	75	8	0	2	51	11	0	2	60	10	0	2	57	15	0	2	72	12	0	2	63	Y	59	2.560	0
-Contig170_chr33_26189421_26189940	292	T	C	98.4	chr33	26189703	T	21	0	2	90	13	0	2	66	15	0	2	72	13	0	2	66	19	0	2	84	13	0	2	66	Y	23	0.307	0
-Contig113_chr34_13341080_13341643	236	C	T	90.7	chr34	13341316	C	4	0	2	39	2	0	2	33	8	0	2	51	4	0	2	39	8	0	2	51	3	0	2	36	Y	47	0.412	3
-Contig152_chr34_31794848_31795540	242	G	A	93.2	chr34	31795093	G	11	0	2	60	24	0	2	99	17	0	2	78	15	0	2	72	18	0	2	81	17	0	2	78	Y	123	2.780	0
-Contig47_chr35_3666773_3667898	348	G	T	124.0	chr35	3667121	G	9	0	2	54	20	0	2	87	18	0	2	81	15	0	2	72	12	0	2	63	14	0	2	69	Y	285	0.235	0
-Contig74_chr35_25394343_25394813	303	A	T	221.0	chr35	25394646	G	23	0	2	96	15	0	2	72	25	0	2	105	7	7	1	49	18	0	2	81	16	0	2	75	Y	58	4.298	0
-Contig5_chr36_4562983_4563634	343	C	T	151.0	chr36	4563324	T	20	0	2	87	20	0	2	87	23	0	2	96	24	0	2	99	9	0	2	54	8	0	2	51	Y	40	1.169	0
-Contig133_chr36_32954045_32955409	136	A	G	116.0	chr36	32954182	A	16	0	2	75	15	0	2	72	20	0	2	87	11	0	2	60	18	0	2	81	13	0	2	66	Y	74	3.772	1
-Contig53_chr37_6665763_6665919	116	C	T	111.0	chr37	6665875	C	9	0	2	54	9	0	2	54	5	0	2	42	9	0	2	54	8	0	2	51	10	0	2	57	N	15	10.875	1
-Contig2_chr37_31197993_31198256	182	C	T	39.6	chr37	31198171	T	6	0	2	45	10	0	2	57	7	0	2	48	9	0	2	54	10	0	2	57	12	0	2	63	N	2	0.595	0
-Contig7_chr38_12217200_12218387	1163	A	T	44.4	chr38	12218353	A	11	0	2	60	13	0	2	66	17	0	2	78	10	0	2	57	11	0	2	60	11	0	2	60	Y	67	+99.	0
-Contig265_chrX_2689247_2689484	114	C	G	103.0	chrX	2689356	C	11	0	2	60	9	0	2	54	13	0	2	66	16	0	2	75	14	0	2	69	10	0	2	57	N	2	9.232	1
-Contig113_chrX_26287829_26288398	385	C	T	59.6	chrX	26288213	C	9	0	2	54	9	0	2	54	17	0	2	78	11	0	2	60	3	8	1	44	4	0	2	39	N	13	0.077	0
-Contig90_chrX_57430715_57431566	548	C	T	116.0	chrX	57431266	T	9	0	2	54	18	0	2	81	13	0	2	66	14	0	2	69	8	0	2	54	7	0	2	48	Y	261	0.154	1
-Contig133_chrX_84833782_84834125	182	G	A	69.7	chrX	84833962	G	5	0	2	42	18	0	2	81	12	0	2	63	19	0	2	84	6	3	1	27	7	0	2	48	N	619	0.278	0
diff -r fdb4240fb565 -r 8ae67e9fb6ff test-data/test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp
--- a/test-data/test_out/specify_restriction_enzymes/specify_restriction_enzymes.gd_snp	Fri Sep 28 11:34:31 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-#{"column_names":["scaf","pos","A","B","qual","ref","rpos","rnuc","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","pair","dist",
-#"prim","rflp"],"dbkey":"canFam2","individuals":[["PB1",9],["PB2",13],["PB3",17],["PB4",21],["PB6",25],["PB8",29]],"pos":2,"rPos":7,"ref":6,"scaffold":1,"species":"bear"}
-Contig47_chr2_25470778_25471576	126	G	A	888.0	chr2	25470896	G	12	0	2	63	14	0	2	69	14	0	2	69	10	0	2	57	18	0	2	81	13	0	2	66	N	11	0.289	1
-Contig73_chr9_29451535_29452248	616	A	G	24.7	chr9	29452127	G	4	0	2	39	7	0	2	48	1	0	2	30	4	0	2	39	7	0	2	48	6	0	2	45	N	49	0.448	4
-Contig69_chr10_40547265_40548153	371	G	A	58.1	chr10	40547649	A	9	0	2	54	8	0	2	51	8	0	2	51	9	0	2	54	4	0	2	39	5	0	2	42	Y	20	0.138	4
-Contig99_chr17_26021506_26022200	505	C	T	88.8	chr17	26022017	T	15	0	2	72	13	0	2	66	19	0	2	84	9	0	2	54	10	0	2	57	11	0	2	60	Y	1	0.172	1
-Contig27_chr17_61713766_61716585	1056	G	C	40.0	chr17	61714821	G	4	0	2	39	8	0	2	51	10	0	2	57	6	0	2	45	6	0	2	45	3	0	2	36	N	6	2.200	4
-Contig26_chr22_57817664_57819633	1453	A	G	150.0	chr22	57819121	G	9	0	2	54	9	0	2	54	13	0	2	66	15	0	2	72	11	0	2	60	14	0	2	69	N	15	0.471	1
-Contig103_chr25_38891221_38892140	407	G	A	131.0	chr25	38891644	G	8	0	2	51	14	0	2	69	18	0	2	81	8	0	2	51	8	0	2	51	11	0	2	60	Y	149	0.167	4
-Contig64_chr27_34654435_34654621	132	C	A	115.0	chr27	34654567	T	2	0	2	33	2	0	2	33	5	0	2	42	3	0	2	36	3	0	2	36	8	0	2	51	N	12	0.297	1
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.heterochromatic.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.heterochromatic.loc.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,2 @@
+# ref_species   heterochromatic_file
+#canFam2	/galaxy/local_data/genome_diversity/dpmix/canFam2_heterochrom.txt
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.oscar.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.oscar.loc.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,4 @@
+#<species>	<data_file>
+#hg19	/galaxy/local_data/genome_diversity/oscar/hsa_ENSEMBLcKEGGctpthw.tsv
+#bosTau4	/galaxy/local_data/genome_diversity/oscar/bta_ENSEMBLcKEGGctpthw.tsv
+#canFam2	/galaxy/local_data/genome_diversity/oscar/cfa_ENSEMBLcKEGGctpthw.tsv
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.pathways.txt.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.pathways.txt.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,721 @@
+hg19	hsa00010	hsa00010 - Glycolysis/ Gluconeogenesis
+hg19	hsa00020	hsa00020 - Citratecycle (TCA cycle)
+hg19	hsa00030	hsa00030 - Pentosephosphate pathway
+hg19	hsa00040	hsa00040 - Pentoseand glucuronate interconversions
+hg19	hsa00051	hsa00051 - Fructoseand mannose metabolism
+hg19	hsa00052	hsa00052 - Galactosemetabolism
+hg19	hsa00053	hsa00053 - Ascorbateand aldarate metabolism
+hg19	hsa00061	hsa00061 - Fattyacid biosynthesis
+hg19	hsa00062	hsa00062 - Fattyacid elongation
+hg19	hsa00071	hsa00071 - Fattyacid metabolism
+hg19	hsa00072	hsa00072 - Synthesisand degradation of ketone bodies
+hg19	hsa00100	hsa00100 - Steroidbiosynthesis
+hg19	hsa00120	hsa00120 - Primarybile acid biosynthesis
+hg19	hsa00130	hsa00130 - Ubiquinoneand other terpenoid-quinone biosynthesis
+hg19	hsa00140	hsa00140 - Steroidhormone biosynthesis
+hg19	hsa00190	hsa00190 - Oxidativephosphorylation
+hg19	hsa00230	hsa00230 - Purinemetabolism
+hg19	hsa00232	hsa00232 - Caffeinemetabolism
+hg19	hsa00240	hsa00240 - Pyrimidinemetabolism
+hg19	hsa00250	hsa00250 - Alanine,aspartate and glutamate metabolism
+hg19	hsa00260	hsa00260 - Glycine,serine and threonine metabolism
+hg19	hsa00270	hsa00270 - Cysteineand methionine metabolism
+hg19	hsa00280	hsa00280 - Valine,leucine and isoleucine degradation
+hg19	hsa00290	hsa00290 - Valine,leucine and isoleucine biosynthesis
+hg19	hsa00300	hsa00300 - Lysinebiosynthesis
+hg19	hsa00310	hsa00310 - Lysinedegradation
+hg19	hsa00330	hsa00330 - Arginineand proline metabolism
+hg19	hsa00340	hsa00340 - Histidinemetabolism
+hg19	hsa00350	hsa00350 - Tyrosinemetabolism
+hg19	hsa00360	hsa00360 - Phenylalaninemetabolism
+hg19	hsa00380	hsa00380 - Tryptophanmetabolism
+hg19	hsa00400	hsa00400 - Phenylalanine,tyrosine and tryptophan biosynthesis
+hg19	hsa00410	hsa00410 - beta-Alaninemetabolism
+hg19	hsa00430	hsa00430 - Taurineand hypotaurine metabolism
+hg19	hsa00450	hsa00450 - Selenocompoundmetabolism
+hg19	hsa00460	hsa00460 - Cyanoaminoacid metabolism
+hg19	hsa00480	hsa00480 - Glutathionemetabolism
+hg19	hsa00500	hsa00500 - Starchand sucrose metabolism
+hg19	hsa00510	hsa00510 - N-Glycanbiosynthesis
+hg19	hsa00511	hsa00511 - Otherglycan degradation
+hg19	hsa00512	hsa00512 - Mucintype O-Glycan biosynthesis
+hg19	hsa00514	hsa00514 - Othertypes of O-glycan biosynthesis
+hg19	hsa00520	hsa00520 - Aminosugar and nucleotide sugar metabolism
+hg19	hsa00524	hsa00524 - Butirosinand neomycin biosynthesis
+hg19	hsa00531	hsa00531 - Glycosaminoglycandegradation
+hg19	hsa00532	hsa00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate
+hg19	hsa00533	hsa00533 - Glycosaminoglycanbiosynthesis - keratan sulfate
+hg19	hsa00534	hsa00534 - Glycosaminoglycanbiosynthesis - heparan sulfate
+hg19	hsa00561	hsa00561 - Glycerolipidmetabolism
+hg19	hsa00562	hsa00562 - Inositolphosphate metabolism
+hg19	hsa00563	hsa00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis
+hg19	hsa00564	hsa00564 - Glycerophospholipidmetabolism
+hg19	hsa00565	hsa00565 - Etherlipid metabolism
+hg19	hsa00590	hsa00590 - Arachidonicacid metabolism
+hg19	hsa00591	hsa00591 - Linoleicacid metabolism
+hg19	hsa00592	hsa00592 - alpha-Linolenicacid metabolism
+hg19	hsa00600	hsa00600 - Sphingolipidmetabolism
+hg19	hsa00601	hsa00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series
+hg19	hsa00603	hsa00603 - Glycosphingolipidbiosynthesis - globo series
+hg19	hsa00604	hsa00604 - Glycosphingolipidbiosynthesis - ganglio series
+hg19	hsa00620	hsa00620 - Pyruvatemetabolism
+hg19	hsa00630	hsa00630 - Glyoxylateand dicarboxylate metabolism
+hg19	hsa00640	hsa00640 - Propanoatemetabolism
+hg19	hsa00650	hsa00650 - Butanoatemetabolism
+hg19	hsa00670	hsa00670 - Onecarbon pool by folate
+hg19	hsa00730	hsa00730 - Thiaminemetabolism
+hg19	hsa00740	hsa00740 - Riboflavinmetabolism
+hg19	hsa00750	hsa00750 - VitaminB6 metabolism
+hg19	hsa00760	hsa00760 - Nicotinateand nicotinamide metabolism
+hg19	hsa00770	hsa00770 - Pantothenateand CoA biosynthesis
+hg19	hsa00780	hsa00780 - Biotinmetabolism
+hg19	hsa00785	hsa00785 - Lipoicacid metabolism
+hg19	hsa00790	hsa00790 - Folatebiosynthesis
+hg19	hsa00830	hsa00830 - Retinolmetabolism
+hg19	hsa00860	hsa00860 - Porphyrinand chlorophyll metabolism
+hg19	hsa00900	hsa00900 - Terpenoidbackbone biosynthesis
+hg19	hsa00910	hsa00910 - Nitrogenmetabolism
+hg19	hsa00920	hsa00920 - Sulfurmetabolism
+hg19	hsa00970	hsa00970 - Aminoacyl-tRNAbiosynthesis
+hg19	hsa00980	hsa00980 - Metabolismof xenobiotics by cytochrome P450
+hg19	hsa00982	hsa00982 - Drugmetabolism - cytochrome P450
+hg19	hsa00983	hsa00983 - Drugmetabolism - other enzymes
+hg19	hsa01040	hsa01040 - Biosynthesisof unsaturated fatty acids
+hg19	hsa01100	hsa01100 - Metabolicpathways
+hg19	hsa02010	hsa02010 - ABCtransporters
+hg19	hsa03008	hsa03008 - Ribosomebiogenesis in eukaryotes
+hg19	hsa03010	hsa03010 - Ribosome
+hg19	hsa03013	hsa03013 - RNAtransport
+hg19	hsa03015	hsa03015 - mRNAsurveillance pathway
+hg19	hsa03018	hsa03018 - RNAdegradation
+hg19	hsa03020	hsa03020 - RNApolymerase
+hg19	hsa03022	hsa03022 - Basaltranscription factors
+hg19	hsa03030	hsa03030 - DNAreplication
+hg19	hsa03040	hsa03040 - Spliceosome
+hg19	hsa03050	hsa03050 - Proteasome
+hg19	hsa03060	hsa03060 - Proteinexport
+hg19	hsa03320	hsa03320 - PPARsignaling pathway
+hg19	hsa03410	hsa03410 - Baseexcision repair
+hg19	hsa03420	hsa03420 - Nucleotideexcision repair
+hg19	hsa03430	hsa03430 - Mismatchrepair
+hg19	hsa03440	hsa03440 - Homologousrecombination
+hg19	hsa03450	hsa03450 - Non-homologousend-joining
+hg19	hsa03460	hsa03460 - Fanconianemia pathway
+hg19	hsa04010	hsa04010 - MAPKsignaling pathway
+hg19	hsa04012	hsa04012 - ErbBsignaling pathway
+hg19	hsa04020	hsa04020 - Calciumsignaling pathway
+hg19	hsa04060	hsa04060 - Cytokine-cytokinereceptor interaction
+hg19	hsa04062	hsa04062 - Chemokinesignaling pathway
+hg19	hsa04070	hsa04070 - Phosphatidylinositolsignaling system
+hg19	hsa04080	hsa04080 - Neuroactiveligand-receptor interaction
+hg19	hsa04110	hsa04110 - Cellcycle
+hg19	hsa04114	hsa04114 - Oocytemeiosis
+hg19	hsa04115	hsa04115 - p53signaling pathway
+hg19	hsa04120	hsa04120 - Ubiquitinmediated proteolysis
+hg19	hsa04122	hsa04122 - Sulfurrelay system
+hg19	hsa04130	hsa04130 - SNAREinteractions in vesicular transport
+hg19	hsa04140	hsa04140 - Regulationof autophagy
+hg19	hsa04141	hsa04141 - Proteinprocessing in endoplasmic reticulum
+hg19	hsa04142	hsa04142 - Lysosome
+hg19	hsa04144	hsa04144 - Endocytosis
+hg19	hsa04145	hsa04145 - Phagosome
+hg19	hsa04146	hsa04146 - Peroxisome
+hg19	hsa04150	hsa04150 - mTORsignaling pathway
+hg19	hsa04210	hsa04210 - Apoptosis
+hg19	hsa04260	hsa04260 - Cardiacmuscle contraction
+hg19	hsa04270	hsa04270 - Vascularsmooth muscle contraction
+hg19	hsa04310	hsa04310 - Wntsignaling pathway
+hg19	hsa04320	hsa04320 - Dorso-ventralaxis formation
+hg19	hsa04330	hsa04330 - Notchsignaling pathway
+hg19	hsa04340	hsa04340 - Hedgehogsignaling pathway
+hg19	hsa04350	hsa04350 - TGF-betasignaling pathway
+hg19	hsa04360	hsa04360 - Axonguidance
+hg19	hsa04370	hsa04370 - VEGFsignaling pathway
+hg19	hsa04380	hsa04380 - Osteoclastdifferentiation
+hg19	hsa04510	hsa04510 - Focaladhesion
+hg19	hsa04512	hsa04512 - ECM-receptorinteraction
+hg19	hsa04514	hsa04514 - Celladhesion molecules (CAMs)
+hg19	hsa04520	hsa04520 - Adherensjunction
+hg19	hsa04530	hsa04530 - Tightjunction
+hg19	hsa04540	hsa04540 - Gapjunction
+hg19	hsa04610	hsa04610 - Complementand coagulation cascades
+hg19	hsa04612	hsa04612 - Antigenprocessing and presentation
+hg19	hsa04614	hsa04614 - Renin-angiotensinsystem
+hg19	hsa04620	hsa04620 - Toll-likereceptor signaling pathway
+hg19	hsa04621	hsa04621 - NOD-likereceptor signaling pathway
+hg19	hsa04622	hsa04622 - RIG-I-likereceptor signaling pathway
+hg19	hsa04623	hsa04623 - CytosolicDNA-sensing pathway
+hg19	hsa04630	hsa04630 - Jak-STATsignaling pathway
+hg19	hsa04640	hsa04640 - Hematopoieticcell lineage
+hg19	hsa04650	hsa04650 - Naturalkiller cell mediated cytotoxicity
+hg19	hsa04660	hsa04660 - Tcell receptor signaling pathway
+hg19	hsa04662	hsa04662 - Bcell receptor signaling pathway
+hg19	hsa04664	hsa04664 - Fcepsilon RI signaling pathway
+hg19	hsa04666	hsa04666 - Fcgamma R-mediated phagocytosis
+hg19	hsa04670	hsa04670 - Leukocytetransendothelial migration
+hg19	hsa04672	hsa04672 - Intestinalimmune network for IgA production
+hg19	hsa04710	hsa04710 - Circadianrhythm - mammal
+hg19	hsa04720	hsa04720 - Long-termpotentiation
+hg19	hsa04721	hsa04721 - Synapticvesicle cycle
+hg19	hsa04722	hsa04722 - Neurotrophinsignaling pathway
+hg19	hsa04724	hsa04724 - Glutamatergicsynapse
+hg19	hsa04725	hsa04725 - Cholinergicsynapse
+hg19	hsa04727	hsa04727 - GABAergicsynapse
+hg19	hsa04728	hsa04728 - Dopaminergicsynapse
+hg19	hsa04730	hsa04730 - Long-termdepression
+hg19	hsa04740	hsa04740 - Olfactorytransduction
+hg19	hsa04742	hsa04742 - Tastetransduction
+hg19	hsa04744	hsa04744 - Phototransduction
+hg19	hsa04810	hsa04810 - Regulationof actin cytoskeleton
+hg19	hsa04910	hsa04910 - Insulinsignaling pathway
+hg19	hsa04912	hsa04912 - GnRHsignaling pathway
+hg19	hsa04914	hsa04914 - Progesterone-mediatedoocyte maturation
+hg19	hsa04916	hsa04916 - Melanogenesis
+hg19	hsa04920	hsa04920 - Adipocytokinesignaling pathway
+hg19	hsa04930	hsa04930 - TypeII diabetes mellitus
+hg19	hsa04940	hsa04940 - TypeI diabetes mellitus
+hg19	hsa04950	hsa04950 - Maturityonset diabetes of the young
+hg19	hsa04960	hsa04960 - Aldosterone-regulatedsodium reabsorption
+hg19	hsa04961	hsa04961 - Endocrineand other factor-regulated calcium reabsorption
+hg19	hsa04962	hsa04962 - Vasopressin-regulatedwater reabsorption
+hg19	hsa04964	hsa04964 - Proximaltubule bicarbonate reclamation
+hg19	hsa04966	hsa04966 - Collectingduct acid secretion
+hg19	hsa04970	hsa04970 - Salivarysecretion
+hg19	hsa04971	hsa04971 - Gastricacid secretion
+hg19	hsa04972	hsa04972 - Pancreaticsecretion
+hg19	hsa04973	hsa04973 - Carbohydratedigestion and absorption
+hg19	hsa04974	hsa04974 - Proteindigestion and absorption
+hg19	hsa04975	hsa04975 - Fatdigestion and absorption
+hg19	hsa04976	hsa04976 - Bilesecretion
+hg19	hsa04977	hsa04977 - Vitamindigestion and absorption
+hg19	hsa04978	hsa04978 - Mineralabsorption
+hg19	hsa05010	hsa05010 - Alzheimer'sdisease
+hg19	hsa05012	hsa05012 - Parkinson'sdisease
+hg19	hsa05014	hsa05014 - Amyotrophiclateral sclerosis (ALS)
+hg19	hsa05016	hsa05016 - Huntington'sdisease
+hg19	hsa05020	hsa05020 - Priondiseases
+hg19	hsa05100	hsa05100 - Bacterialinvasion of epithelial cells
+hg19	hsa05132	hsa05132 - Salmonellainfection
+hg19	hsa05133	hsa05133 - Pertussis
+hg19	hsa05134	hsa05134 - Legionellosis
+hg19	hsa05140	hsa05140 - Leishmaniasis
+hg19	hsa05142	hsa05142 - Chagasdisease (American trypanosomiasis)
+hg19	hsa05143	hsa05143 - Africantrypanosomiasis
+hg19	hsa05144	hsa05144 - Malaria
+hg19	hsa05145	hsa05145 - Toxoplasmosis
+hg19	hsa05146	hsa05146 - Amoebiasis
+hg19	hsa05150	hsa05150 - Staphylococcusaureus infection
+hg19	hsa05152	hsa05152 - Tuberculosis
+hg19	hsa05160	hsa05160 - HepatitisC
+hg19	hsa05162	hsa05162 - Measles
+hg19	hsa05164	hsa05164 - InfluenzaA
+hg19	hsa05166	hsa05166 - HTLV-Iinfection
+hg19	hsa05168	hsa05168 - Herpessimplex infection
+hg19	hsa05200	hsa05200 - Pathwaysin cancer
+hg19	hsa05202	hsa05202 - Transcriptionalmisregulation in cancers
+hg19	hsa05210	hsa05210 - Colorectalcancer
+hg19	hsa05211	hsa05211 - Renalcell carcinoma
+hg19	hsa05212	hsa05212 - Pancreaticcancer
+hg19	hsa05213	hsa05213 - Endometrialcancer
+hg19	hsa05214	hsa05214 - Glioma
+hg19	hsa05215	hsa05215 - Prostatecancer
+hg19	hsa05216	hsa05216 - Thyroidcancer
+hg19	hsa05217	hsa05217 - Basalcell carcinoma
+hg19	hsa05218	hsa05218 - Melanoma
+hg19	hsa05219	hsa05219 - Bladdercancer
+hg19	hsa05220	hsa05220 - Chronicmyeloid leukemia
+hg19	hsa05221	hsa05221 - Acutemyeloid leukemia
+hg19	hsa05222	hsa05222 - Smallcell lung cancer
+hg19	hsa05223	hsa05223 - Non-smallcell lung cancer
+hg19	hsa05310	hsa05310 - Asthma
+hg19	hsa05320	hsa05320 - Autoimmunethyroid disease
+hg19	hsa05322	hsa05322 - Systemiclupus erythematosus
+hg19	hsa05323	hsa05323 - Rheumatoidarthritis
+hg19	hsa05330	hsa05330 - Allograftrejection
+hg19	hsa05332	hsa05332 - Graft-versus-hostdisease
+hg19	hsa05340	hsa05340 - Primaryimmunodeficiency
+hg19	hsa05410	hsa05410 - Hypertrophiccardiomyopathy (HCM)
+hg19	hsa05412	hsa05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC)
+hg19	hsa05414	hsa05414 - Dilatedcardiomyopathy
+hg19	hsa05416	hsa05416 - Viralmyocarditis
+canFam2	cfa00010	cfa00010 - Glycolysis/ Gluconeogenesis
+canFam2	cfa00020	cfa00020 - Citratecycle (TCA cycle)
+canFam2	cfa00030	cfa00030 - Pentosephosphate pathway
+canFam2	cfa00040	cfa00040 - Pentoseand glucuronate interconversions
+canFam2	cfa00051	cfa00051 - Fructoseand mannose metabolism
+canFam2	cfa00052	cfa00052 - Galactosemetabolism
+canFam2	cfa00053	cfa00053 - Ascorbateand aldarate metabolism
+canFam2	cfa00061	cfa00061 - Fattyacid biosynthesis
+canFam2	cfa00062	cfa00062 - Fattyacid elongation in mitochondria
+canFam2	cfa00071	cfa00071 - Fattyacid metabolism
+canFam2	cfa00072	cfa00072 - Synthesisand degradation of ketone bodies
+canFam2	cfa00100	cfa00100 - Steroidbiosynthesis
+canFam2	cfa00120	cfa00120 - Primarybile acid biosynthesis
+canFam2	cfa00130	cfa00130 - Ubiquinoneand other terpenoid-quinone biosynthesis
+canFam2	cfa00140	cfa00140 - Steroidhormone biosynthesis
+canFam2	cfa00190	cfa00190 - Oxidativephosphorylation
+canFam2	cfa00230	cfa00230 - Purinemetabolism
+canFam2	cfa00232	cfa00232 - Caffeinemetabolism
+canFam2	cfa00240	cfa00240 - Pyrimidinemetabolism
+canFam2	cfa00250	cfa00250 - Alanine,aspartate and glutamate metabolism
+canFam2	cfa00260	cfa00260 - Glycine,serine and threonine metabolism
+canFam2	cfa00270	cfa00270 - Cysteineand methionine metabolism
+canFam2	cfa00280	cfa00280 - Valine,leucine and isoleucine degradation
+canFam2	cfa00290	cfa00290 - Valine,leucine and isoleucine biosynthesis
+canFam2	cfa00300	cfa00300 - Lysinebiosynthesis
+canFam2	cfa00310	cfa00310 - Lysinedegradation
+canFam2	cfa00330	cfa00330 - Arginineand proline metabolism
+canFam2	cfa00340	cfa00340 - Histidinemetabolism
+canFam2	cfa00350	cfa00350 - Tyrosinemetabolism
+canFam2	cfa00360	cfa00360 - Phenylalaninemetabolism
+canFam2	cfa00380	cfa00380 - Tryptophanmetabolism
+canFam2	cfa00400	cfa00400 - Phenylalanine,tyrosine and tryptophan biosynthesis
+canFam2	cfa00410	cfa00410 - beta-Alaninemetabolism
+canFam2	cfa00430	cfa00430 - Taurineand hypotaurine metabolism
+canFam2	cfa00450	cfa00450 - Selenocompoundmetabolism
+canFam2	cfa00460	cfa00460 - Cyanoaminoacid metabolism
+canFam2	cfa00472	cfa00472 - D-Arginineand D-ornithine metabolism
+canFam2	cfa00480	cfa00480 - Glutathionemetabolism
+canFam2	cfa00500	cfa00500 - Starchand sucrose metabolism
+canFam2	cfa00510	cfa00510 - N-Glycanbiosynthesis
+canFam2	cfa00511	cfa00511 - Otherglycan degradation
+canFam2	cfa00512	cfa00512 - Mucintype O-Glycan biosynthesis
+canFam2	cfa00514	cfa00514 - Othertypes of O-glycan biosynthesis
+canFam2	cfa00520	cfa00520 - Aminosugar and nucleotide sugar metabolism
+canFam2	cfa00531	cfa00531 - Glycosaminoglycandegradation
+canFam2	cfa00532	cfa00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate
+canFam2	cfa00533	cfa00533 - Glycosaminoglycanbiosynthesis - keratan sulfate
+canFam2	cfa00534	cfa00534 - Glycosaminoglycanbiosynthesis - heparan sulfate
+canFam2	cfa00561	cfa00561 - Glycerolipidmetabolism
+canFam2	cfa00562	cfa00562 - Inositolphosphate metabolism
+canFam2	cfa00563	cfa00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis
+canFam2	cfa00564	cfa00564 - Glycerophospholipidmetabolism
+canFam2	cfa00565	cfa00565 - Etherlipid metabolism
+canFam2	cfa00590	cfa00590 - Arachidonicacid metabolism
+canFam2	cfa00591	cfa00591 - Linoleicacid metabolism
+canFam2	cfa00592	cfa00592 - alpha-Linolenicacid metabolism
+canFam2	cfa00600	cfa00600 - Sphingolipidmetabolism
+canFam2	cfa00601	cfa00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series
+canFam2	cfa00603	cfa00603 - Glycosphingolipidbiosynthesis - globo series
+canFam2	cfa00604	cfa00604 - Glycosphingolipidbiosynthesis - ganglio series
+canFam2	cfa00620	cfa00620 - Pyruvatemetabolism
+canFam2	cfa00630	cfa00630 - Glyoxylateand dicarboxylate metabolism
+canFam2	cfa00640	cfa00640 - Propanoatemetabolism
+canFam2	cfa00650	cfa00650 - Butanoatemetabolism
+canFam2	cfa00670	cfa00670 - Onecarbon pool by folate
+canFam2	cfa00730	cfa00730 - Thiaminemetabolism
+canFam2	cfa00740	cfa00740 - Riboflavinmetabolism
+canFam2	cfa00750	cfa00750 - VitaminB6 metabolism
+canFam2	cfa00760	cfa00760 - Nicotinateand nicotinamide metabolism
+canFam2	cfa00770	cfa00770 - Pantothenateand CoA biosynthesis
+canFam2	cfa00780	cfa00780 - Biotinmetabolism
+canFam2	cfa00785	cfa00785 - Lipoicacid metabolism
+canFam2	cfa00790	cfa00790 - Folatebiosynthesis
+canFam2	cfa00830	cfa00830 - Retinolmetabolism
+canFam2	cfa00860	cfa00860 - Porphyrinand chlorophyll metabolism
+canFam2	cfa00900	cfa00900 - Terpenoidbackbone biosynthesis
+canFam2	cfa00910	cfa00910 - Nitrogenmetabolism
+canFam2	cfa00920	cfa00920 - Sulfurmetabolism
+canFam2	cfa00970	cfa00970 - Aminoacyl-tRNAbiosynthesis
+canFam2	cfa00980	cfa00980 - Metabolismof xenobiotics by cytochrome P450
+canFam2	cfa00982	cfa00982 - Drugmetabolism - cytochrome P450
+canFam2	cfa00983	cfa00983 - Drugmetabolism - other enzymes
+canFam2	cfa01040	cfa01040 - Biosynthesisof unsaturated fatty acids
+canFam2	cfa01100	cfa01100 - Metabolicpathways
+canFam2	cfa02010	cfa02010 - ABCtransporters
+canFam2	cfa03008	cfa03008 - Ribosomebiogenesis in eukaryotes
+canFam2	cfa03010	cfa03010 - Ribosome
+canFam2	cfa03013	cfa03013 - RNAtransport
+canFam2	cfa03015	cfa03015 - mRNAsurveillance pathway
+canFam2	cfa03018	cfa03018 - RNAdegradation
+canFam2	cfa03020	cfa03020 - RNApolymerase
+canFam2	cfa03022	cfa03022 - Basaltranscription factors
+canFam2	cfa03030	cfa03030 - DNAreplication
+canFam2	cfa03040	cfa03040 - Spliceosome
+canFam2	cfa03050	cfa03050 - Proteasome
+canFam2	cfa03060	cfa03060 - Proteinexport
+canFam2	cfa03320	cfa03320 - PPARsignaling pathway
+canFam2	cfa03410	cfa03410 - Baseexcision repair
+canFam2	cfa03420	cfa03420 - Nucleotideexcision repair
+canFam2	cfa03430	cfa03430 - Mismatchrepair
+canFam2	cfa03440	cfa03440 - Homologousrecombination
+canFam2	cfa03450	cfa03450 - Non-homologousend-joining
+canFam2	cfa03460	cfa03460 - Fanconianemia pathway
+canFam2	cfa04010	cfa04010 - MAPKsignaling pathway
+canFam2	cfa04012	cfa04012 - ErbBsignaling pathway
+canFam2	cfa04020	cfa04020 - Calciumsignaling pathway
+canFam2	cfa04060	cfa04060 - Cytokine-cytokinereceptor interaction
+canFam2	cfa04062	cfa04062 - Chemokinesignaling pathway
+canFam2	cfa04070	cfa04070 - Phosphatidylinositolsignaling system
+canFam2	cfa04080	cfa04080 - Neuroactiveligand-receptor interaction
+canFam2	cfa04110	cfa04110 - Cellcycle
+canFam2	cfa04114	cfa04114 - Oocytemeiosis
+canFam2	cfa04115	cfa04115 - p53signaling pathway
+canFam2	cfa04120	cfa04120 - Ubiquitinmediated proteolysis
+canFam2	cfa04122	cfa04122 - Sulfurrelay system
+canFam2	cfa04130	cfa04130 - SNAREinteractions in vesicular transport
+canFam2	cfa04140	cfa04140 - Regulationof autophagy
+canFam2	cfa04141	cfa04141 - Proteinprocessing in endoplasmic reticulum
+canFam2	cfa04142	cfa04142 - Lysosome
+canFam2	cfa04144	cfa04144 - Endocytosis
+canFam2	cfa04145	cfa04145 - Phagosome
+canFam2	cfa04146	cfa04146 - Peroxisome
+canFam2	cfa04150	cfa04150 - mTORsignaling pathway
+canFam2	cfa04210	cfa04210 - Apoptosis
+canFam2	cfa04260	cfa04260 - Cardiacmuscle contraction
+canFam2	cfa04270	cfa04270 - Vascularsmooth muscle contraction
+canFam2	cfa04310	cfa04310 - Wntsignaling pathway
+canFam2	cfa04320	cfa04320 - Dorso-ventralaxis formation
+canFam2	cfa04330	cfa04330 - Notchsignaling pathway
+canFam2	cfa04340	cfa04340 - Hedgehogsignaling pathway
+canFam2	cfa04350	cfa04350 - TGF-betasignaling pathway
+canFam2	cfa04360	cfa04360 - Axonguidance
+canFam2	cfa04370	cfa04370 - VEGFsignaling pathway
+canFam2	cfa04380	cfa04380 - Osteoclastdifferentiation
+canFam2	cfa04510	cfa04510 - Focaladhesion
+canFam2	cfa04512	cfa04512 - ECM-receptorinteraction
+canFam2	cfa04514	cfa04514 - Celladhesion molecules (CAMs)
+canFam2	cfa04520	cfa04520 - Adherensjunction
+canFam2	cfa04530	cfa04530 - Tightjunction
+canFam2	cfa04540	cfa04540 - Gapjunction
+canFam2	cfa04610	cfa04610 - Complementand coagulation cascades
+canFam2	cfa04612	cfa04612 - Antigenprocessing and presentation
+canFam2	cfa04614	cfa04614 - Renin-angiotensinsystem
+canFam2	cfa04620	cfa04620 - Toll-likereceptor signaling pathway
+canFam2	cfa04621	cfa04621 - NOD-likereceptor signaling pathway
+canFam2	cfa04622	cfa04622 - RIG-I-likereceptor signaling pathway
+canFam2	cfa04623	cfa04623 - CytosolicDNA-sensing pathway
+canFam2	cfa04630	cfa04630 - Jak-STATsignaling pathway
+canFam2	cfa04640	cfa04640 - Hematopoieticcell lineage
+canFam2	cfa04650	cfa04650 - Naturalkiller cell mediated cytotoxicity
+canFam2	cfa04660	cfa04660 - Tcell receptor signaling pathway
+canFam2	cfa04662	cfa04662 - Bcell receptor signaling pathway
+canFam2	cfa04664	cfa04664 - Fcepsilon RI signaling pathway
+canFam2	cfa04666	cfa04666 - Fcgamma R-mediated phagocytosis
+canFam2	cfa04670	cfa04670 - Leukocytetransendothelial migration
+canFam2	cfa04672	cfa04672 - Intestinalimmune network for IgA production
+canFam2	cfa04710	cfa04710 - Circadianrhythm - mammal
+canFam2	cfa04720	cfa04720 - Long-termpotentiation
+canFam2	cfa04721	cfa04721 - Synapticvesicle cycle
+canFam2	cfa04722	cfa04722 - Neurotrophinsignaling pathway
+canFam2	cfa04724	cfa04724 - Glutamatergicsynapse
+canFam2	cfa04725	cfa04725 - Cholinergicsynapse
+canFam2	cfa04727	cfa04727 - GABAergicsynapse
+canFam2	cfa04728	cfa04728 - Dopaminergicsynapse
+canFam2	cfa04730	cfa04730 - Long-termdepression
+canFam2	cfa04740	cfa04740 - Olfactorytransduction
+canFam2	cfa04742	cfa04742 - Tastetransduction
+canFam2	cfa04744	cfa04744 - Phototransduction
+canFam2	cfa04810	cfa04810 - Regulationof actin cytoskeleton
+canFam2	cfa04910	cfa04910 - Insulinsignaling pathway
+canFam2	cfa04912	cfa04912 - GnRHsignaling pathway
+canFam2	cfa04914	cfa04914 - Progesterone-mediatedoocyte maturation
+canFam2	cfa04916	cfa04916 - Melanogenesis
+canFam2	cfa04920	cfa04920 - Adipocytokinesignaling pathway
+canFam2	cfa04930	cfa04930 - TypeII diabetes mellitus
+canFam2	cfa04940	cfa04940 - TypeI diabetes mellitus
+canFam2	cfa04950	cfa04950 - Maturityonset diabetes of the young
+canFam2	cfa04960	cfa04960 - Aldosterone-regulatedsodium reabsorption
+canFam2	cfa04961	cfa04961 - Endocrineand other factor-regulated calcium reabsorption
+canFam2	cfa04962	cfa04962 - Vasopressin-regulatedwater reabsorption
+canFam2	cfa04964	cfa04964 - Proximaltubule bicarbonate reclamation
+canFam2	cfa04966	cfa04966 - Collectingduct acid secretion
+canFam2	cfa04970	cfa04970 - Salivarysecretion
+canFam2	cfa04971	cfa04971 - Gastricacid secretion
+canFam2	cfa04972	cfa04972 - Pancreaticsecretion
+canFam2	cfa04973	cfa04973 - Carbohydratedigestion and absorption
+canFam2	cfa04974	cfa04974 - Proteindigestion and absorption
+canFam2	cfa04975	cfa04975 - Fatdigestion and absorption
+canFam2	cfa04976	cfa04976 - Bilesecretion
+canFam2	cfa04977	cfa04977 - Vitamindigestion and absorption
+canFam2	cfa04978	cfa04978 - Mineralabsorption
+canFam2	cfa05010	cfa05010 - Alzheimer'sdisease
+canFam2	cfa05012	cfa05012 - Parkinson'sdisease
+canFam2	cfa05014	cfa05014 - Amyotrophiclateral sclerosis (ALS)
+canFam2	cfa05016	cfa05016 - Huntington'sdisease
+canFam2	cfa05020	cfa05020 - Priondiseases
+canFam2	cfa05100	cfa05100 - Bacterialinvasion of epithelial cells
+canFam2	cfa05132	cfa05132 - Salmonellainfection
+canFam2	cfa05133	cfa05133 - Pertussis
+canFam2	cfa05134	cfa05134 - Legionellosis
+canFam2	cfa05140	cfa05140 - Leishmaniasis
+canFam2	cfa05142	cfa05142 - Chagasdisease (American trypanosomiasis)
+canFam2	cfa05143	cfa05143 - Africantrypanosomiasis
+canFam2	cfa05144	cfa05144 - Malaria
+canFam2	cfa05145	cfa05145 - Toxoplasmosis
+canFam2	cfa05146	cfa05146 - Amoebiasis
+canFam2	cfa05150	cfa05150 - Staphylococcusaureus infection
+canFam2	cfa05152	cfa05152 - Tuberculosis
+canFam2	cfa05160	cfa05160 - HepatitisC
+canFam2	cfa05162	cfa05162 - Measles
+canFam2	cfa05164	cfa05164 - InfluenzaA
+canFam2	cfa05166	cfa05166 - HTLV-Iinfection
+canFam2	cfa05168	cfa05168 - Herpessimplex infection
+canFam2	cfa05200	cfa05200 - Pathwaysin cancer
+canFam2	cfa05210	cfa05210 - Colorectalcancer
+canFam2	cfa05211	cfa05211 - Renalcell carcinoma
+canFam2	cfa05212	cfa05212 - Pancreaticcancer
+canFam2	cfa05213	cfa05213 - Endometrialcancer
+canFam2	cfa05214	cfa05214 - Glioma
+canFam2	cfa05215	cfa05215 - Prostatecancer
+canFam2	cfa05216	cfa05216 - Thyroidcancer
+canFam2	cfa05217	cfa05217 - Basalcell carcinoma
+canFam2	cfa05218	cfa05218 - Melanoma
+canFam2	cfa05219	cfa05219 - Bladdercancer
+canFam2	cfa05220	cfa05220 - Chronicmyeloid leukemia
+canFam2	cfa05221	cfa05221 - Acutemyeloid leukemia
+canFam2	cfa05222	cfa05222 - Smallcell lung cancer
+canFam2	cfa05223	cfa05223 - Non-smallcell lung cancer
+canFam2	cfa05310	cfa05310 - Asthma
+canFam2	cfa05320	cfa05320 - Autoimmunethyroid disease
+canFam2	cfa05322	cfa05322 - Systemiclupus erythematosus
+canFam2	cfa05323	cfa05323 - Rheumatoidarthritis
+canFam2	cfa05330	cfa05330 - Allograftrejection
+canFam2	cfa05332	cfa05332 - Graft-versus-hostdisease
+canFam2	cfa05340	cfa05340 - Primaryimmunodeficiency
+canFam2	cfa05410	cfa05410 - Hypertrophiccardiomyopathy (HCM)
+canFam2	cfa05412	cfa05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC)
+canFam2	cfa05414	cfa05414 - Dilatedcardiomyopathy
+canFam2	cfa05416	cfa05416 - Viralmyocarditis
+bosTau4	bta00010	bta00010 - Glycolysis/ Gluconeogenesis
+bosTau4	bta00020	bta00020 - Citratecycle (TCA cycle)
+bosTau4	bta00030	bta00030 - Pentosephosphate pathway
+bosTau4	bta00040	bta00040 - Pentoseand glucuronate interconversions
+bosTau4	bta00051	bta00051 - Fructoseand mannose metabolism
+bosTau4	bta00052	bta00052 - Galactosemetabolism
+bosTau4	bta00053	bta00053 - Ascorbateand aldarate metabolism
+bosTau4	bta00061	bta00061 - Fattyacid biosynthesis
+bosTau4	bta00062	bta00062 - Fattyacid elongation
+bosTau4	bta00071	bta00071 - Fattyacid metabolism
+bosTau4	bta00072	bta00072 - Synthesisand degradation of ketone bodies
+bosTau4	bta00100	bta00100 - Steroidbiosynthesis
+bosTau4	bta00120	bta00120 - Primarybile acid biosynthesis
+bosTau4	bta00130	bta00130 - Ubiquinoneand other terpenoid-quinone biosynthesis
+bosTau4	bta00140	bta00140 - Steroidhormone biosynthesis
+bosTau4	bta00190	bta00190 - Oxidativephosphorylation
+bosTau4	bta00230	bta00230 - Purinemetabolism
+bosTau4	bta00232	bta00232 - Caffeinemetabolism
+bosTau4	bta00240	bta00240 - Pyrimidinemetabolism
+bosTau4	bta00250	bta00250 - Alanine,aspartate and glutamate metabolism
+bosTau4	bta00260	bta00260 - Glycine,serine and threonine metabolism
+bosTau4	bta00270	bta00270 - Cysteineand methionine metabolism
+bosTau4	bta00280	bta00280 - Valine,leucine and isoleucine degradation
+bosTau4	bta00290	bta00290 - Valine,leucine and isoleucine biosynthesis
+bosTau4	bta00300	bta00300 - Lysinebiosynthesis
+bosTau4	bta00310	bta00310 - Lysinedegradation
+bosTau4	bta00330	bta00330 - Arginineand proline metabolism
+bosTau4	bta00340	bta00340 - Histidinemetabolism
+bosTau4	bta00350	bta00350 - Tyrosinemetabolism
+bosTau4	bta00360	bta00360 - Phenylalaninemetabolism
+bosTau4	bta00380	bta00380 - Tryptophanmetabolism
+bosTau4	bta00400	bta00400 - Phenylalanine,tyrosine and tryptophan biosynthesis
+bosTau4	bta00410	bta00410 - beta-Alaninemetabolism
+bosTau4	bta00430	bta00430 - Taurineand hypotaurine metabolism
+bosTau4	bta00450	bta00450 - Selenocompoundmetabolism
+bosTau4	bta00460	bta00460 - Cyanoaminoacid metabolism
+bosTau4	bta00471	bta00471 - D-Glutamineand D-glutamate metabolism
+bosTau4	bta00472	bta00472 - D-Arginineand D-ornithine metabolism
+bosTau4	bta00480	bta00480 - Glutathionemetabolism
+bosTau4	bta00500	bta00500 - Starchand sucrose metabolism
+bosTau4	bta00510	bta00510 - N-Glycanbiosynthesis
+bosTau4	bta00511	bta00511 - Otherglycan degradation
+bosTau4	bta00512	bta00512 - Mucintype O-Glycan biosynthesis
+bosTau4	bta00514	bta00514 - Othertypes of O-glycan biosynthesis
+bosTau4	bta00520	bta00520 - Aminosugar and nucleotide sugar metabolism
+bosTau4	bta00524	bta00524 - Butirosinand neomycin biosynthesis
+bosTau4	bta00531	bta00531 - Glycosaminoglycandegradation
+bosTau4	bta00532	bta00532 - Glycosaminoglycanbiosynthesis - chondroitin sulfate
+bosTau4	bta00533	bta00533 - Glycosaminoglycanbiosynthesis - keratan sulfate
+bosTau4	bta00534	bta00534 - Glycosaminoglycanbiosynthesis - heparan sulfate
+bosTau4	bta00561	bta00561 - Glycerolipidmetabolism
+bosTau4	bta00562	bta00562 - Inositolphosphate metabolism
+bosTau4	bta00563	bta00563 - Glycosylphosphatidylinositol(GPI)-anchorbiosynthesis
+bosTau4	bta00564	bta00564 - Glycerophospholipidmetabolism
+bosTau4	bta00565	bta00565 - Etherlipid metabolism
+bosTau4	bta00590	bta00590 - Arachidonicacid metabolism
+bosTau4	bta00591	bta00591 - Linoleicacid metabolism
+bosTau4	bta00592	bta00592 - alpha-Linolenicacid metabolism
+bosTau4	bta00600	bta00600 - Sphingolipidmetabolism
+bosTau4	bta00601	bta00601 - Glycosphingolipidbiosynthesis - lacto and neolacto series
+bosTau4	bta00603	bta00603 - Glycosphingolipidbiosynthesis - globo series
+bosTau4	bta00604	bta00604 - Glycosphingolipidbiosynthesis - ganglio series
+bosTau4	bta00620	bta00620 - Pyruvatemetabolism
+bosTau4	bta00630	bta00630 - Glyoxylateand dicarboxylate metabolism
+bosTau4	bta00640	bta00640 - Propanoatemetabolism
+bosTau4	bta00650	bta00650 - Butanoatemetabolism
+bosTau4	bta00670	bta00670 - Onecarbon pool by folate
+bosTau4	bta00730	bta00730 - Thiaminemetabolism
+bosTau4	bta00740	bta00740 - Riboflavinmetabolism
+bosTau4	bta00750	bta00750 - VitaminB6 metabolism
+bosTau4	bta00760	bta00760 - Nicotinateand nicotinamide metabolism
+bosTau4	bta00770	bta00770 - Pantothenateand CoA biosynthesis
+bosTau4	bta00780	bta00780 - Biotinmetabolism
+bosTau4	bta00785	bta00785 - Lipoicacid metabolism
+bosTau4	bta00790	bta00790 - Folatebiosynthesis
+bosTau4	bta00830	bta00830 - Retinolmetabolism
+bosTau4	bta00860	bta00860 - Porphyrinand chlorophyll metabolism
+bosTau4	bta00900	bta00900 - Terpenoidbackbone biosynthesis
+bosTau4	bta00910	bta00910 - Nitrogenmetabolism
+bosTau4	bta00920	bta00920 - Sulfurmetabolism
+bosTau4	bta00970	bta00970 - Aminoacyl-tRNAbiosynthesis
+bosTau4	bta00980	bta00980 - Metabolismof xenobiotics by cytochrome P450
+bosTau4	bta00982	bta00982 - Drugmetabolism - cytochrome P450
+bosTau4	bta00983	bta00983 - Drugmetabolism - other enzymes
+bosTau4	bta01040	bta01040 - Biosynthesisof unsaturated fatty acids
+bosTau4	bta01100	bta01100 - Metabolicpathways
+bosTau4	bta02010	bta02010 - ABCtransporters
+bosTau4	bta03008	bta03008 - Ribosomebiogenesis in eukaryotes
+bosTau4	bta03010	bta03010 - Ribosome
+bosTau4	bta03013	bta03013 - RNAtransport
+bosTau4	bta03015	bta03015 - mRNAsurveillance pathway
+bosTau4	bta03018	bta03018 - RNAdegradation
+bosTau4	bta03020	bta03020 - RNApolymerase
+bosTau4	bta03022	bta03022 - Basaltranscription factors
+bosTau4	bta03030	bta03030 - DNAreplication
+bosTau4	bta03040	bta03040 - Spliceosome
+bosTau4	bta03050	bta03050 - Proteasome
+bosTau4	bta03060	bta03060 - Proteinexport
+bosTau4	bta03320	bta03320 - PPARsignaling pathway
+bosTau4	bta03410	bta03410 - Baseexcision repair
+bosTau4	bta03420	bta03420 - Nucleotideexcision repair
+bosTau4	bta03430	bta03430 - Mismatchrepair
+bosTau4	bta03440	bta03440 - Homologousrecombination
+bosTau4	bta03450	bta03450 - Non-homologousend-joining
+bosTau4	bta03460	bta03460 - Fanconianemia pathway
+bosTau4	bta04010	bta04010 - MAPKsignaling pathway
+bosTau4	bta04012	bta04012 - ErbBsignaling pathway
+bosTau4	bta04020	bta04020 - Calciumsignaling pathway
+bosTau4	bta04060	bta04060 - Cytokine-cytokinereceptor interaction
+bosTau4	bta04062	bta04062 - Chemokinesignaling pathway
+bosTau4	bta04070	bta04070 - Phosphatidylinositolsignaling system
+bosTau4	bta04080	bta04080 - Neuroactiveligand-receptor interaction
+bosTau4	bta04110	bta04110 - Cellcycle
+bosTau4	bta04114	bta04114 - Oocytemeiosis
+bosTau4	bta04115	bta04115 - p53signaling pathway
+bosTau4	bta04120	bta04120 - Ubiquitinmediated proteolysis
+bosTau4	bta04122	bta04122 - Sulfurrelay system
+bosTau4	bta04130	bta04130 - SNAREinteractions in vesicular transport
+bosTau4	bta04140	bta04140 - Regulationof autophagy
+bosTau4	bta04141	bta04141 - Proteinprocessing in endoplasmic reticulum
+bosTau4	bta04142	bta04142 - Lysosome
+bosTau4	bta04144	bta04144 - Endocytosis
+bosTau4	bta04145	bta04145 - Phagosome
+bosTau4	bta04146	bta04146 - Peroxisome
+bosTau4	bta04150	bta04150 - mTORsignaling pathway
+bosTau4	bta04210	bta04210 - Apoptosis
+bosTau4	bta04260	bta04260 - Cardiacmuscle contraction
+bosTau4	bta04270	bta04270 - Vascularsmooth muscle contraction
+bosTau4	bta04310	bta04310 - Wntsignaling pathway
+bosTau4	bta04320	bta04320 - Dorso-ventralaxis formation
+bosTau4	bta04330	bta04330 - Notchsignaling pathway
+bosTau4	bta04340	bta04340 - Hedgehogsignaling pathway
+bosTau4	bta04350	bta04350 - TGF-betasignaling pathway
+bosTau4	bta04360	bta04360 - Axonguidance
+bosTau4	bta04370	bta04370 - VEGFsignaling pathway
+bosTau4	bta04380	bta04380 - Osteoclastdifferentiation
+bosTau4	bta04510	bta04510 - Focaladhesion
+bosTau4	bta04512	bta04512 - ECM-receptorinteraction
+bosTau4	bta04514	bta04514 - Celladhesion molecules (CAMs)
+bosTau4	bta04520	bta04520 - Adherensjunction
+bosTau4	bta04530	bta04530 - Tightjunction
+bosTau4	bta04540	bta04540 - Gapjunction
+bosTau4	bta04610	bta04610 - Complementand coagulation cascades
+bosTau4	bta04612	bta04612 - Antigenprocessing and presentation
+bosTau4	bta04614	bta04614 - Renin-angiotensinsystem
+bosTau4	bta04620	bta04620 - Toll-likereceptor signaling pathway
+bosTau4	bta04621	bta04621 - NOD-likereceptor signaling pathway
+bosTau4	bta04622	bta04622 - RIG-I-likereceptor signaling pathway
+bosTau4	bta04623	bta04623 - CytosolicDNA-sensing pathway
+bosTau4	bta04630	bta04630 - Jak-STATsignaling pathway
+bosTau4	bta04640	bta04640 - Hematopoieticcell lineage
+bosTau4	bta04650	bta04650 - Naturalkiller cell mediated cytotoxicity
+bosTau4	bta04660	bta04660 - Tcell receptor signaling pathway
+bosTau4	bta04662	bta04662 - Bcell receptor signaling pathway
+bosTau4	bta04664	bta04664 - Fcepsilon RI signaling pathway
+bosTau4	bta04666	bta04666 - Fcgamma R-mediated phagocytosis
+bosTau4	bta04670	bta04670 - Leukocytetransendothelial migration
+bosTau4	bta04672	bta04672 - Intestinalimmune network for IgA production
+bosTau4	bta04710	bta04710 - Circadianrhythm - mammal
+bosTau4	bta04720	bta04720 - Long-termpotentiation
+bosTau4	bta04721	bta04721 - Synapticvesicle cycle
+bosTau4	bta04722	bta04722 - Neurotrophinsignaling pathway
+bosTau4	bta04724	bta04724 - Glutamatergicsynapse
+bosTau4	bta04725	bta04725 - Cholinergicsynapse
+bosTau4	bta04727	bta04727 - GABAergicsynapse
+bosTau4	bta04728	bta04728 - Dopaminergicsynapse
+bosTau4	bta04730	bta04730 - Long-termdepression
+bosTau4	bta04740	bta04740 - Olfactorytransduction
+bosTau4	bta04742	bta04742 - Tastetransduction
+bosTau4	bta04744	bta04744 - Phototransduction
+bosTau4	bta04810	bta04810 - Regulationof actin cytoskeleton
+bosTau4	bta04910	bta04910 - Insulinsignaling pathway
+bosTau4	bta04912	bta04912 - GnRHsignaling pathway
+bosTau4	bta04914	bta04914 - Progesterone-mediatedoocyte maturation
+bosTau4	bta04916	bta04916 - Melanogenesis
+bosTau4	bta04920	bta04920 - Adipocytokinesignaling pathway
+bosTau4	bta04930	bta04930 - TypeII diabetes mellitus
+bosTau4	bta04940	bta04940 - TypeI diabetes mellitus
+bosTau4	bta04950	bta04950 - Maturityonset diabetes of the young
+bosTau4	bta04960	bta04960 - Aldosterone-regulatedsodium reabsorption
+bosTau4	bta04961	bta04961 - Endocrineand other factor-regulated calcium reabsorption
+bosTau4	bta04962	bta04962 - Vasopressin-regulatedwater reabsorption
+bosTau4	bta04964	bta04964 - Proximaltubule bicarbonate reclamation
+bosTau4	bta04966	bta04966 - Collectingduct acid secretion
+bosTau4	bta04970	bta04970 - Salivarysecretion
+bosTau4	bta04971	bta04971 - Gastricacid secretion
+bosTau4	bta04972	bta04972 - Pancreaticsecretion
+bosTau4	bta04973	bta04973 - Carbohydratedigestion and absorption
+bosTau4	bta04974	bta04974 - Proteindigestion and absorption
+bosTau4	bta04975	bta04975 - Fatdigestion and absorption
+bosTau4	bta04976	bta04976 - Bilesecretion
+bosTau4	bta04977	bta04977 - Vitamindigestion and absorption
+bosTau4	bta04978	bta04978 - Mineralabsorption
+bosTau4	bta05010	bta05010 - Alzheimer'sdisease
+bosTau4	bta05012	bta05012 - Parkinson'sdisease
+bosTau4	bta05014	bta05014 - Amyotrophiclateral sclerosis (ALS)
+bosTau4	bta05016	bta05016 - Huntington'sdisease
+bosTau4	bta05020	bta05020 - Priondiseases
+bosTau4	bta05100	bta05100 - Bacterialinvasion of epithelial cells
+bosTau4	bta05132	bta05132 - Salmonellainfection
+bosTau4	bta05133	bta05133 - Pertussis
+bosTau4	bta05134	bta05134 - Legionellosis
+bosTau4	bta05140	bta05140 - Leishmaniasis
+bosTau4	bta05142	bta05142 - Chagasdisease (American trypanosomiasis)
+bosTau4	bta05143	bta05143 - Africantrypanosomiasis
+bosTau4	bta05144	bta05144 - Malaria
+bosTau4	bta05145	bta05145 - Toxoplasmosis
+bosTau4	bta05146	bta05146 - Amoebiasis
+bosTau4	bta05150	bta05150 - Staphylococcusaureus infection
+bosTau4	bta05152	bta05152 - Tuberculosis
+bosTau4	bta05160	bta05160 - HepatitisC
+bosTau4	bta05162	bta05162 - Measles
+bosTau4	bta05164	bta05164 - InfluenzaA
+bosTau4	bta05166	bta05166 - HTLV-Iinfection
+bosTau4	bta05168	bta05168 - Herpessimplex infection
+bosTau4	bta05200	bta05200 - Pathwaysin cancer
+bosTau4	bta05202	bta05202 - Transcriptionalmisregulation in cancers
+bosTau4	bta05210	bta05210 - Colorectalcancer
+bosTau4	bta05211	bta05211 - Renalcell carcinoma
+bosTau4	bta05212	bta05212 - Pancreaticcancer
+bosTau4	bta05213	bta05213 - Endometrialcancer
+bosTau4	bta05214	bta05214 - Glioma
+bosTau4	bta05215	bta05215 - Prostatecancer
+bosTau4	bta05216	bta05216 - Thyroidcancer
+bosTau4	bta05217	bta05217 - Basalcell carcinoma
+bosTau4	bta05218	bta05218 - Melanoma
+bosTau4	bta05219	bta05219 - Bladdercancer
+bosTau4	bta05220	bta05220 - Chronicmyeloid leukemia
+bosTau4	bta05221	bta05221 - Acutemyeloid leukemia
+bosTau4	bta05222	bta05222 - Smallcell lung cancer
+bosTau4	bta05223	bta05223 - Non-smallcell lung cancer
+bosTau4	bta05310	bta05310 - Asthma
+bosTau4	bta05320	bta05320 - Autoimmunethyroid disease
+bosTau4	bta05322	bta05322 - Systemiclupus erythematosus
+bosTau4	bta05323	bta05323 - Rheumatoidarthritis
+bosTau4	bta05330	bta05330 - Allograftrejection
+bosTau4	bta05332	bta05332 - Graft-versus-hostdisease
+bosTau4	bta05340	bta05340 - Primaryimmunodeficiency
+bosTau4	bta05410	bta05410 - Hypertrophiccardiomyopathy (HCM)
+bosTau4	bta05412	bta05412 - Arrhythmogenicright ventricular cardiomyopathy (ARVC)
+bosTau4	bta05414	bta05414 - Dilatedcardiomyopathy
+bosTau4	bta05416	bta05416 - Viralmyocarditis
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.primers.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.primers.loc.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,6 @@
+#<species>  <primers_file_path>
+#aye-aye	/galaxy/local_data/genome_diversity/primers/aye-aye_Galaxy_primers.txt
+#bear	/galaxy/local_data/genome_diversity/primers/bear_Galaxy_primers.txt
+#bighorn	/galaxy/local_data/genome_diversity/primers/bighorn_Galaxy_primers.txt
+#tasmanian_devil	/galaxy/local_data/genome_diversity/primers/devil_Galaxy_primers.txt
+#tick	/galaxy/local_data/genome_diversity/primers/tick_Galaxy_primers.txt
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.rank.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.rank.loc.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,4 @@
+#<species> <prefix> <kxml_dir_path> <path_to_dict_file>
+#hg19	hsa	/galaxy/local_data/genome_diversity/rank/KXML_hsa.d	/galaxy/local_data/genome_diversity/rank/hsa_dict.txt
+#canFam2	cfa	/galaxy/local_data/genome_diversity/rank/KXML_cfa.d	/galaxy/local_data/genome_diversity/rank/cfa_dict.txt
+#bosTau4	bta	/galaxy/local_data/genome_diversity/rank/KXML_bta.d	/galaxy/local_data/genome_diversity/rank/bta_dict.txt
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.ref_species.txt.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.ref_species.txt.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,4 @@
+# genome diversity species
+cow	cow
+hg19	hg19
+dog	dog
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.restriction_enzymes.txt.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.restriction_enzymes.txt.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,99 @@
+Acc65I - GGTACC	Acc65I
+AccB7I - CCANNNNNTGG	AccB7I
+AccI - GT(A/C)(G/T)AC	AccI
+AccIII - TCCGGA	AccIII
+AcyI - G(A/G)CG(C/T)C	AcyI
+AgeI - ACCGGT	AgeI
+AluI - AGCT	AluI
+Alw44I - GTGCAC	Alw44I
+ApaI - GGGCCC	ApaI
+AvaI - C(C/T)CG(A/G)G	AvaI
+AvaII - GG(A/T)CC	AvaII
+BalI - TGGCCA	BalI
+BamHI - GGATCC	BamHI
+BanI - GG(C/T)(A/G)CC	BanI
+BanII - G(A/G)GC(C/T)C	BanII
+BbuI - GCATGC	BbuI
+BclI - TGATCA	BclI
+BglI - GCCNNNNNGGC	BglI
+BglII - AGATCT	BglII
+BsaMI - GAATGC	BsaMI
+BsaOI - CG(A/G)(C/T)CG	BsaOI
+Bsp1286I - G(A/G/T)GC(A/C/T)C	Bsp1286I
+BsrBRI - GATNNNNATC	BsrBRI
+BsrSI - ACTGG	BsrSI
+BssHII - GCGCGC	BssHII
+Bst98I - CTTAAG	Bst98I
+BstEII - GGTNACC	BstEII
+BstOI - CC(A/T)GG	BstOI
+BstXI - CCANNNNNNTGG	BstXI
+BstZI - CGGCCG	BstZI
+Bsu36I - CCTNAGG	Bsu36I
+CfoI - GCGC	CfoI
+ClaI - ATCGAT	ClaI
+Csp45I - TTCGAA	Csp45I
+CspI - CGG(A/T)CCG	CspI
+DdeI - CTNAG	DdeI
+DpnI - GATC	DpnI
+DraI - TTTAAA	DraI
+EclHKI - GACNNNNNGTC	EclHKI
+Eco47III - AGCGCT	Eco47III
+Eco52I - CGGCCG	Eco52I
+Eco72I - CACGTG	Eco72I
+EcoRI - GAATTC	EcoRI
+EcoRV - GATATC	EcoRV
+HaeII - (A/G)GCGC(C/T)	HaeII
+HaeIII - GGCC	HaeIII
+HhaI - GCGC	HhaI
+HincII - GT(C/T)(A/G)AC	HincII
+HindIII - AAGCTT	HindIII
+HinfI - GANTC	HinfI
+HpaI - GTTAAC	HpaI
+HpaII - CCGG	HpaII
+Hsp92I - G(A/G)CG(C/T)C	Hsp92I
+Hsp92II - CATG	Hsp92II
+I-PpoI - TAACTATGACTCTCTTAAGGTAGCCAAAT	I-PpoI
+KpnI - GGTACC	KpnI
+MboI - GATC	MboI
+MluI - ACGCGT	MluI
+MspA1I - C(A/C)GC(G/T)G	MspA1I
+MspI - CCGG	MspI
+NaeI - GCCGGC	NaeI
+NarI - GGCGCC	NarI
+NciI - CC(C/G)GG	NciI
+NcoI - CCATGG	NcoI
+NdeI - CATATG	NdeI
+NgoMIV - GCCGGC	NgoMIV
+NheI - GCTAGC	NheI
+NotI - GCGGCCGC	NotI
+NruI - TCGCGA	NruI
+NsiI - ATGCAT	NsiI
+PstI - CTGCAG	PstI
+PvuI - CGATCG	PvuI
+PvuII - CAGCTG	PvuII
+RsaI - GTAC	RsaI
+SacI - GAGCTC	SacI
+SacII - CCGCGG	SacII
+SalI - GTCGAC	SalI
+Sau3AI - GATC	Sau3AI
+Sau96I - GGNCC	Sau96I
+ScaI - AGTACT	ScaI
+SfiI - GGCCNNNNNGGCC	SfiI
+SgfI - GCGATCGC	SgfI
+SinI - GG(A/T)CC	SinI
+SmaI - CCCGGG	SmaI
+SnaBI - TACGTA	SnaBI
+SpeI - ACTAGT	SpeI
+SphI - GCATGC	SphI
+SspI - AATATT	SspI
+StuI - AGGCCT	StuI
+StyI - CC(A/T)(A/T)GG	StyI
+TaqI - TCGA	TaqI
+Tru9I - TTAA	Tru9I
+Tth111I - GACNNNGTC	Tth111I
+VspI - ATTAAT	VspI
+XbaI - TCTAGA	XbaI
+XhoI - CTCGAG	XhoI
+XhoII - (A/G)GATC(C/T)	XhoII
+XmaI - CCCGGG	XmaI
+XmnI - GAANNNNTTC	XmnI
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.snps.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.snps.loc.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,3 @@
+#<species>  <SNP_call_file_path>
+#bighorn	/galaxy/local_data/genome_diversity/snps/bighorn_snps.txt
+#tasmanian_devil	/galaxy/local_data/genome_diversity/snps/devil_snps.txt
diff -r fdb4240fb565 -r 8ae67e9fb6ff tool-data/gd.species.txt.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gd.species.txt.sample	Fri Sep 28 11:35:56 2012 -0400
@@ -0,0 +1,6 @@
+# genome diversity species
+aye-aye aye-aye
+bear bear
+bighorn bighorn
+tasmanian_devil Tasmanian devil
+tick tick